diff --git a/.github/shadps4.png b/.github/shadps4.png index 86fa592d5..7ad301d4c 100644 Binary files a/.github/shadps4.png and b/.github/shadps4.png differ diff --git a/.github/workflows/linux-qt.yml b/.github/workflows/linux-qt.yml index 31f8df01e..5611ae50f 100644 --- a/.github/workflows/linux-qt.yml +++ b/.github/workflows/linux-qt.yml @@ -10,7 +10,6 @@ on: branches: [ "main" ] env: - # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) BUILD_TYPE: Release jobs: @@ -19,8 +18,8 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Fetch submodules - run: git submodule update --init --recursive + with: + submodules: recursive - name: Install misc packages run: > diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 7ebb33651..ef77a16c8 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -8,10 +8,8 @@ on: branches: [ "main" ] pull_request: branches: [ "main" ] - workflow_dispatch: env: - # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) BUILD_TYPE: Release jobs: diff --git a/.github/workflows/macos-qt.yml b/.github/workflows/macos-qt.yml index e9a9aa4f3..4b3672dff 100644 --- a/.github/workflows/macos-qt.yml +++ b/.github/workflows/macos-qt.yml @@ -8,10 +8,8 @@ on: branches: [ "main" ] pull_request: branches: [ "main" ] - workflow_dispatch: env: - # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) BUILD_TYPE: Release jobs: @@ -36,10 +34,11 @@ jobs: - name: Setup Qt uses: jurplel/install-qt-action@v4 with: + version: 6.7.2 host: mac target: desktop arch: clang_64 - version: 6.7.2 + archives: qtbase - name: Configure CMake run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DENABLE_QT_GUI=ON diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 910f0484e..e46401cb7 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -8,10 +8,8 @@ on: branches: [ "main" ] pull_request: branches: [ "main" ] - workflow_dispatch: env: - # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) BUILD_TYPE: Release jobs: diff --git a/.github/workflows/windows-qt.yml b/.github/workflows/windows-qt.yml index 9610280bc..06a16eb5b 100644 --- a/.github/workflows/windows-qt.yml +++ b/.github/workflows/windows-qt.yml @@ -10,12 +10,8 @@ on: branches: [ "main" ] env: - # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) BUILD_TYPE: Release -permissions: - contents: read - jobs: build: runs-on: windows-latest @@ -28,16 +24,16 @@ jobs: - name: Setup Qt uses: jurplel/install-qt-action@v4 with: - arch: win64_msvc2019_64 version: 6.7.2 + host: windows + target: desktop + arch: win64_msvc2019_64 + archives: qtbase - name: Configure CMake - # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. - # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -T ClangCL -DENABLE_QT_GUI=ON - name: Build - # Build your program with the given configuration run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel - name: Deploy diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 01ae3f9c3..46dc13a89 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -10,12 +10,8 @@ on: branches: [ "main" ] env: - # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) BUILD_TYPE: Release -permissions: - contents: read - jobs: build: runs-on: windows-latest @@ -25,16 +21,14 @@ jobs: submodules: recursive - name: Configure CMake - # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. - # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -T ClangCL + - name: Build - # Build your program with the given configuration run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel - - name: Upload a Build Artifact + + - name: Upload executable uses: actions/upload-artifact@v4 with: name: shadps4-win64 - # A file, directory or wildcard pattern that describes what to upload path: | ${{github.workspace}}/build/Release/shadPS4.exe diff --git a/.gitignore b/.gitignore index fcf7634f4..2a3145085 100644 --- a/.gitignore +++ b/.gitignore @@ -408,3 +408,4 @@ FodyWeavers.xsd /emulator/eboot.bin /out/* /third-party/out/* +/src/common/scm_rev.cpp diff --git a/.gitmodules b/.gitmodules index 3a9d8f42f..60fb5fbbf 100644 --- a/.gitmodules +++ b/.gitmodules @@ -61,3 +61,6 @@ [submodule "externals/date"] path = externals/date url = https://github.com/HowardHinnant/date.git +[submodule "externals/ffmpeg-core"] + path = externals/ffmpeg-core + url = https://github.com/shadps4-emu/ext-ffmpeg-core diff --git a/.reuse/dep5 b/.reuse/dep5 index 1dad50148..a80001f84 100644 --- a/.reuse/dep5 +++ b/.reuse/dep5 @@ -14,8 +14,6 @@ Files: CMakeSettings.json documents/Screenshots/Sonic Mania.png documents/Screenshots/Undertale.png documents/Screenshots/We are DOOMED.png - externals/stb_image.h - externals/tracy/* scripts/ps4_names.txt src/images/controller_icon.png src/images/exit_icon.png @@ -36,9 +34,26 @@ Files: CMakeSettings.json src/images/refresh_icon.png src/images/settings_icon.png src/images/stop_icon.png + src/images/shadPS4.icns src/images/shadps4.ico src/images/themes_icon.png src/shadps4.qrc src/shadps4.rc Copyright: shadPS4 Emulator Project License: GPL-2.0-or-later + +Files: externals/cmake-modules/* +Copyright: 2009-2010 Iowa State University +License: BSL-1.0 + +Files: externals/renderdoc/* +Copyright: 2019-2024 Baldur Karlsson +License: MIT + +Files: externals/stb_image.h +Copyright: 2017 Sean Barrett +License: MIT + +Files: externals/tracy/* +Copyright: 2017-2024 Bartosz Taudul +License: BSD-3-Clause diff --git a/CMakeLists.txt b/CMakeLists.txt index 8ec7cd54b..ff03befc6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,12 +19,11 @@ project(shadPS4) option(ENABLE_QT_GUI "Enable the Qt GUI. If not selected then the emulator uses a minimal SDL-based UI instead" OFF) -# This function should be passed a list of all files in a target. It will automatically generate -# file groups following the directory hierarchy, so that the layout of the files in IDEs matches the -# one in the filesystem. +# This function should be passed a list of all files in a target. It will automatically generate file groups +# following the directory hierarchy, so that the layout of the files in IDEs matches the one in the filesystem. function(create_target_directory_groups target_name) - # Place any files that aren't in the source list in a separate group so that they don't get in - # the way. + + # Place any files that aren't in the source list in a separate group so that they don't get in the way. source_group("Other Files" REGULAR_EXPRESSION ".") get_target_property(target_sources "${target_name}" SOURCES) @@ -39,14 +38,6 @@ endfunction() # Setup a custom clang-format target (if clang-format can be found) that will run # against all the src files. This should be used before making a pull request. -# ======================================================================= - -set(CLANG_FORMAT_POSTFIX "-17") -find_program(CLANG_FORMAT - NAMES clang-format${CLANG_FORMAT_POSTFIX} - clang-format - PATHS ${PROJECT_BINARY_DIR}/externals) - if (CLANG_FORMAT) set(SRCS ${PROJECT_SOURCE_DIR}/src) set(CCOMMENT "Running clang format against all the .h and .cpp files in src/") @@ -65,6 +56,15 @@ endif() list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") +# generate git revision information +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules/") +include(GetGitRevisionDescription) +get_git_head_revision(GIT_REF_SPEC GIT_REV) +git_describe(GIT_DESC --always --long --dirty) +git_branch_name(GIT_BRANCH) + +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/common/scm_rev.cpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/src/common/scm_rev.cpp" @ONLY) + find_package(Boost 1.84.0 CONFIG) find_package(cryptopp 8.9.0 MODULE) find_package(fmt 10.2.1 CONFIG) @@ -79,17 +79,31 @@ find_package(xbyak 7.07 CONFIG) find_package(xxHash 0.8.2 MODULE) find_package(zlib-ng 2.2.0 MODULE) find_package(Zydis 4.1.0 CONFIG) +find_package(RenderDoc MODULE) if (APPLE) find_package(date 3.0.1 CONFIG) endif() +# Note: Windows always has these functions through winpthreads include(CheckSymbolExists) check_symbol_exists(pthread_mutex_timedlock "pthread.h" HAVE_PTHREAD_MUTEX_TIMEDLOCK) -# Windows always has the function through winpthreads if(HAVE_PTHREAD_MUTEX_TIMEDLOCK OR WIN32) add_compile_options(-DHAVE_PTHREAD_MUTEX_TIMEDLOCK) endif() +check_symbol_exists(sem_timedwait "semaphore.h" HAVE_SEM_TIMEDWAIT) +if(HAVE_SEM_TIMEDWAIT OR WIN32) + add_compile_options(-DHAVE_SEM_TIMEDWAIT) +endif() + +if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") + # libc++ requires -fexperimental-library to enable std::jthread and std::stop_token support. + include(CheckCXXSymbolExists) + check_cxx_symbol_exists(_LIBCPP_VERSION version LIBCPP) + if(LIBCPP) + add_compile_options(-fexperimental-library) + endif() +endif() add_subdirectory(externals) include_directories(src) @@ -98,9 +112,11 @@ if(ENABLE_QT_GUI) find_package(Qt6 REQUIRED COMPONENTS Widgets Concurrent) qt_standard_project_setup() set(CMAKE_AUTORCC ON) + set(CMAKE_AUTOMOC ON) + set(CMAKE_AUTOUIC ON) endif() -set(AUDIO_CORE src/audio_core/sdl_audio.cpp +set(AUDIO_CORE src/audio_core/sdl_audio.cpp src/audio_core/sdl_audio.h ) @@ -177,6 +193,16 @@ set(SYSTEM_LIBS src/core/libraries/system/commondialog.cpp src/core/libraries/disc_map/disc_map.cpp src/core/libraries/disc_map/disc_map.h src/core/libraries/disc_map/disc_map_codes.h + src/core/libraries/avplayer/avplayer_common.cpp + src/core/libraries/avplayer/avplayer_common.h + src/core/libraries/avplayer/avplayer_file_streamer.cpp + src/core/libraries/avplayer/avplayer_file_streamer.h + src/core/libraries/avplayer/avplayer_impl.cpp + src/core/libraries/avplayer/avplayer_impl.h + src/core/libraries/avplayer/avplayer_source.cpp + src/core/libraries/avplayer/avplayer_source.h + src/core/libraries/avplayer/avplayer_state.cpp + src/core/libraries/avplayer/avplayer_state.h src/core/libraries/avplayer/avplayer.cpp src/core/libraries/avplayer/avplayer.h ) @@ -226,6 +252,10 @@ set(PLAYGO_LIB src/core/libraries/playgo/playgo.cpp src/core/libraries/playgo/playgo_types.h ) +set(RANDOM_LIB src/core/libraries/random/random.cpp + src/core/libraries/random/random.h +) + set(USBD_LIB src/core/libraries/usbd/usbd.cpp src/core/libraries/usbd/usbd.h ) @@ -274,6 +304,7 @@ set(COMMON src/common/logging/backend.cpp src/common/native_clock.h src/common/path_util.cpp src/common/path_util.h + src/common/object_pool.h src/common/polyfill_thread.h src/common/rdtsc.cpp src/common/rdtsc.h @@ -285,9 +316,12 @@ set(COMMON src/common/logging/backend.cpp src/common/thread.h src/common/types.h src/common/uint128.h + src/common/unique_function.h src/common/version.h src/common/ntapi.h src/common/ntapi.cpp + src/common/scm_rev.cpp + src/common/scm_rev.h ) set(CORE src/core/aerolib/stubs.cpp @@ -308,6 +342,8 @@ set(CORE src/core/aerolib/stubs.cpp src/core/file_format/pkg_type.h src/core/file_format/psf.cpp src/core/file_format/psf.h + src/core/file_format/playgo_chunk.cpp + src/core/file_format/playgo_chunk.h src/core/file_format/trp.cpp src/core/file_format/trp.h src/core/file_format/splash.h @@ -336,6 +372,7 @@ set(CORE src/core/aerolib/stubs.cpp ${NP_LIBS} ${PNG_LIB} ${PLAYGO_LIB} + ${RANDOM_LIB} ${USBD_LIB} ${MISC_LIBS} ${DIALOGS_LIB} @@ -353,7 +390,6 @@ set(CORE src/core/aerolib/stubs.cpp ) set(SHADER_RECOMPILER src/shader_recompiler/exception.h - src/shader_recompiler/object_pool.h src/shader_recompiler/profile.h src/shader_recompiler/recompiler.cpp src/shader_recompiler/recompiler.h @@ -404,6 +440,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp src/shader_recompiler/ir/passes/identity_removal_pass.cpp src/shader_recompiler/ir/passes/ir_passes.h + src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp src/shader_recompiler/ir/passes/resource_tracking_pass.cpp src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp @@ -437,6 +474,13 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/amdgpu/pm4_cmds.h src/video_core/amdgpu/pm4_opcodes.h src/video_core/amdgpu/resource.h + src/video_core/buffer_cache/buffer.cpp + src/video_core/buffer_cache/buffer.h + src/video_core/buffer_cache/buffer_cache.cpp + src/video_core/buffer_cache/buffer_cache.h + src/video_core/buffer_cache/memory_tracker_base.h + src/video_core/buffer_cache/range_set.h + src/video_core/buffer_cache/word_manager.h src/video_core/renderer_vulkan/liverpool_to_vk.cpp src/video_core/renderer_vulkan/liverpool_to_vk.h src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -465,8 +509,6 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/renderer_vulkan/vk_scheduler.h src/video_core/renderer_vulkan/vk_shader_util.cpp src/video_core/renderer_vulkan/vk_shader_util.h - src/video_core/renderer_vulkan/vk_stream_buffer.cpp - src/video_core/renderer_vulkan/vk_stream_buffer.h src/video_core/renderer_vulkan/vk_swapchain.cpp src/video_core/renderer_vulkan/vk_swapchain.h src/video_core/texture_cache/image.cpp @@ -482,6 +524,11 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/texture_cache/tile_manager.cpp src/video_core/texture_cache/tile_manager.h src/video_core/texture_cache/types.h + src/video_core/page_manager.cpp + src/video_core/page_manager.h + src/video_core/multi_level_page_table.h + src/video_core/renderdoc.cpp + src/video_core/renderdoc.h ) set(INPUT src/input/controller.cpp @@ -494,37 +541,39 @@ set(EMULATOR src/emulator.cpp src/sdl_window.cpp ) -# the above is shared in sdl and qt version (TODO share them all) +# The above is shared in SDL and Qt version (TODO share them all) if(ENABLE_QT_GUI) qt_add_resources(RESOURCE_FILES src/shadps4.qrc) -set(QT_GUI - src/qt_gui/main_window_ui.h - src/qt_gui/main_window.cpp - src/qt_gui/main_window.h - src/qt_gui/gui_context_menus.h - src/qt_gui/game_list_utils.h - src/qt_gui/game_info.cpp - src/qt_gui/game_info.h - src/qt_gui/game_list_frame.cpp - src/qt_gui/game_list_frame.h - src/qt_gui/game_grid_frame.cpp - src/qt_gui/game_grid_frame.h - src/qt_gui/game_install_dialog.cpp - src/qt_gui/game_install_dialog.h - src/qt_gui/pkg_viewer.cpp - src/qt_gui/pkg_viewer.h - src/qt_gui/trophy_viewer.cpp - src/qt_gui/trophy_viewer.h - src/qt_gui/elf_viewer.cpp - src/qt_gui/elf_viewer.h - src/qt_gui/main_window_themes.cpp - src/qt_gui/main_window_themes.h - src/qt_gui/main.cpp - ${EMULATOR} - ${RESOURCE_FILES} - ) +set(QT_GUI src/qt_gui/main_window_ui.h + src/qt_gui/main_window.cpp + src/qt_gui/main_window.h + src/qt_gui/gui_context_menus.h + src/qt_gui/game_list_utils.h + src/qt_gui/game_info.cpp + src/qt_gui/game_info.h + src/qt_gui/game_list_frame.cpp + src/qt_gui/game_list_frame.h + src/qt_gui/game_grid_frame.cpp + src/qt_gui/game_grid_frame.h + src/qt_gui/game_install_dialog.cpp + src/qt_gui/game_install_dialog.h + src/qt_gui/pkg_viewer.cpp + src/qt_gui/pkg_viewer.h + src/qt_gui/trophy_viewer.cpp + src/qt_gui/trophy_viewer.h + src/qt_gui/elf_viewer.cpp + src/qt_gui/elf_viewer.h + src/qt_gui/main_window_themes.cpp + src/qt_gui/main_window_themes.h + src/qt_gui/settings_dialog.cpp + src/qt_gui/settings_dialog.h + src/qt_gui/settings_dialog.ui + src/qt_gui/main.cpp + ${EMULATOR} + ${RESOURCE_FILES} +) endif() if (ENABLE_QT_GUI) @@ -537,6 +586,7 @@ if (ENABLE_QT_GUI) ${SHADER_RECOMPILER} ${VIDEO_CORE} ${EMULATOR} + src/images/shadPS4.icns ) else() add_executable(shadps4 @@ -557,7 +607,7 @@ endif() create_target_directory_groups(shadps4) -target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient) +target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API ffmpeg) target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::SPIRV glslang::glslang SDL3::SDL3) if (APPLE) @@ -623,6 +673,10 @@ target_include_directories(shadps4 PRIVATE ${HOST_SHADERS_INCLUDE}) if (ENABLE_QT_GUI) set_target_properties(shadps4 PROPERTIES -# WIN32_EXECUTABLE ON - MACOSX_BUNDLE ON) +# WIN32_EXECUTABLE ON + MACOSX_BUNDLE ON + MACOSX_BUNDLE_ICON_FILE shadPS4.icns) + + set_source_files_properties(src/images/shadPS4.icns PROPERTIES + MACOSX_PACKAGE_LOCATION Resources) endif() diff --git a/LICENSES/BSL-1.0.txt b/LICENSES/BSL-1.0.txt new file mode 100644 index 000000000..2d87ab1a9 --- /dev/null +++ b/LICENSES/BSL-1.0.txt @@ -0,0 +1,7 @@ +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md index cf7d9f9b7..7089b3c01 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ SPDX-License-Identifier: GPL-2.0-or-later # shadPS4 -shadPS4 is an early PS4 emulator for Windows and Linux written in C++ +shadPS4 is an early **PlayStation 4** emulator for **Windows**, **Linux** and **macOS** written in C++ If you encounter problems or have doubts, do not hesitate to look at the [**Quickstart**](https://github.com/shadps4-emu/shadPS4/blob/main/documents/Quickstart/Quickstart.md). @@ -42,13 +42,13 @@ To verify that a game works, you can look at [**shadPS4 Game Compatibility**](ht To discuss shadPS4 development or suggest ideas, join the [**Discord server**](https://discord.gg/MyZRaBngxA). -Check us on [**X (twitter)**](https://x.com/shadps4) or on our [**website**](https://shadps4.net/). +To get the latest news, go to our [**X (twitter)**](https://x.com/shadps4) or our [**website**](https://shadps4.net/). # Status In development, small games are working like [**Sonic Mania**](https://www.youtube.com/watch?v=AAHoNzhHyCU), [**Undertale**](https://youtu.be/5zIvdy65Ro4), [**Dysmantle**](https://youtu.be/b9xzhLBdESE) and others... -# Why? +# Why The project started as a fun project. Due to limited free time, it will probably take a while before shadPS4 is able to run anything decent, but we're trying to make small, regular commits. @@ -64,20 +64,37 @@ Check the build instructions for [**Linux**](https://github.com/shadps4-emu/shad ## Build status -|Windows|Build status| -|--------|------------| +
+Windows + +| Windows | Build status | +|--------|--------| |Windows SDL Build|[![Windows-sdl](https://github.com/shadps4-emu/shadPS4/actions/workflows/windows.yml/badge.svg)](https://github.com/shadps4-emu/shadPS4/actions/workflows/windows.yml) |Windows Qt Build|[![Windows-qt](https://github.com/shadps4-emu/shadPS4/actions/workflows/windows-qt.yml/badge.svg)](https://github.com/shadps4-emu/shadPS4/actions/workflows/windows-qt.yml) +
-|Linux|Build status| -|--------|------------| +
+Linux + +| Linux | Build status | +|--------|--------| |Linux SDL Build|[![Linux-sdl](https://github.com/shadps4-emu/shadPS4/actions/workflows/linux.yml/badge.svg)](https://github.com/shadps4-emu/shadPS4/actions/workflows/linux.yml) |Linux Qt Build|[![Linux-qt](https://github.com/shadps4-emu/shadPS4/actions/workflows/linux-qt.yml/badge.svg)](https://github.com/shadps4-emu/shadPS4/actions/workflows/linux-qt.yml) +
+ +
+macOS + +| macOS | Build status | +|--------|--------| +|macOS SDL Build|[![macOS-sdl](https://github.com/shadps4-emu/shadPS4/actions/workflows/macos.yml/badge.svg)](https://github.com/shadps4-emu/shadPS4/actions/workflows/macos.yml) +|macOS Qt Build|[![macOS-qt](https://github.com/shadps4-emu/shadPS4/actions/workflows/macos-qt.yml/badge.svg)](https://github.com/shadps4-emu/shadPS4/actions/workflows/macos-qt.yml) +
# Keyboard Mapping | Controller button | Keyboard | -| ------------- | ------------- | +|-------------|-------------| LEFT AXIS UP | W | LEFT AXIS DOWN | S | LEFT AXIS LEFT | A | @@ -123,7 +140,7 @@ Open a PR and we'll check it :) # Contributors - + # Sister Projects diff --git a/documents/Quickstart/Quickstart.md b/documents/Quickstart/Quickstart.md index 29c7ba499..4c51b288a 100644 --- a/documents/Quickstart/Quickstart.md +++ b/documents/Quickstart/Quickstart.md @@ -37,13 +37,18 @@ SPDX-License-Identifier: GPL-2.0-or-later - Windows 10 or Ubuntu 22.04 -## Have the latest WIP version +## How to run the latest Work-in-Progress builds of ShadPS4 -When you go to Github Release, you have the latest major versions (e.g. v0.0.3), but if you want to have the latest Work-In-Progress version, you can go to Actions on Github to download it (Please note a Github account is required to be able to download). +1. Go to and make sure you are logged into your GitHub account (important!) +2. On the left side of the page, select your operating system of choice (the "**qt**" versions have a user interface, which is probably the one you want. The others are SDL versions, which can only be run via command line). ![image](https://github.com/user-attachments/assets/43f01bbf-236c-4d6d-98ac-f5a5badd4ce8) - +3. In the workflow list, select the latest entry with a green :white_check_mark: icon in front of it. (or the latest entry for whatever pull request you wish to test). ![image](https://github.com/user-attachments/assets/6365f407-867c-44ae-bf00-944f8d84a349) -After downloading the version suitable for you (Windows or Linux), you must unzip the file and then you can run it. Please note, there are two versions for each platform, a Qt version with user interface and one without (SDL Builds). +4. On the bottom of this page, select the name of the file, and it should start downloading. (If there is no file here, double check that you are indeed logged into a GitHub account, and that there is a green :white_check_mark: icon. ![image](https://github.com/user-attachments/assets/97924500-3911-4f90-ab63-ffae7e52700b) + +5. Once downloaded, extract to its own folder, and run ShadPS4's executable from the extracted folder. + +6. Upon first launch, ShadPS4 will prompt you to select a folder to store your installed games in. Select "Browse" and then select a folder that ShadPS4 can use to install your PKG files to. ## Install PKG files diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 7fca7b546..bc313232d 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -47,6 +47,11 @@ else() endif() endif() +if (NOT TARGET ffmpeg) + set(ARCHITECTURE "x86_64") + add_subdirectory(ffmpeg-core) +endif() + # Zlib-Ng if (NOT TARGET zlib-ng::zlib) set(ZLIB_ENABLE_TESTS OFF) @@ -74,6 +79,13 @@ if (NOT TARGET GPUOpen::VulkanMemoryAllocator) add_subdirectory(vma) endif() +# RenderDoc +if (NOT TARGET RenderDoc::API) + add_library(renderdoc INTERFACE) + target_include_directories(renderdoc SYSTEM INTERFACE ./renderdoc) + add_library(RenderDoc::API ALIAS renderdoc) +endif() + # glslang if (NOT TARGET glslang::glslang) set(SKIP_GLSLANG_INSTALL ON CACHE BOOL "") diff --git a/externals/cmake-modules/GetGitRevisionDescription.cmake b/externals/cmake-modules/GetGitRevisionDescription.cmake new file mode 100644 index 000000000..087f5deea --- /dev/null +++ b/externals/cmake-modules/GetGitRevisionDescription.cmake @@ -0,0 +1,158 @@ +# - Returns a version string from Git +# +# These functions force a re-configure on each git commit so that you can +# trust the values of the variables in your build system. +# +# get_git_head_revision( [ ...]) +# +# Returns the refspec and sha hash of the current head revision +# +# git_describe( [ ...]) +# +# Returns the results of git describe on the source tree, and adjusting +# the output so that it tests false if an error occurs. +# +# git_get_exact_tag( [ ...]) +# +# Returns the results of git describe --exact-match on the source tree, +# and adjusting the output so that it tests false if there was no exact +# matching tag. +# +# Requires CMake 2.6 or newer (uses the 'function' command) +# +# Original Author: +# 2009-2010 Ryan Pavlik +# http://academic.cleardefinition.com +# Iowa State University HCI Graduate Program/VRAC +# +# Copyright Iowa State University 2009-2010. +# Distributed under the Boost Software License, Version 1.0. +# (See accompanying file LICENSE_1_0.txt or copy at +# http://www.boost.org/LICENSE_1_0.txt) + +if(__get_git_revision_description) + return() +endif() +set(__get_git_revision_description YES) + +# We must run the following at "include" time, not at function call time, +# to find the path to this module rather than the path to a calling list file +get_filename_component(_gitdescmoddir ${CMAKE_CURRENT_LIST_FILE} PATH) + +function(get_git_head_revision _refspecvar _hashvar) + set(GIT_PARENT_DIR "${CMAKE_CURRENT_SOURCE_DIR}") + set(GIT_DIR "${GIT_PARENT_DIR}/.git") + while(NOT EXISTS "${GIT_DIR}") # .git dir not found, search parent directories + set(GIT_PREVIOUS_PARENT "${GIT_PARENT_DIR}") + get_filename_component(GIT_PARENT_DIR ${GIT_PARENT_DIR} PATH) + if(GIT_PARENT_DIR STREQUAL GIT_PREVIOUS_PARENT) + # We have reached the root directory, we are not in git + set(${_refspecvar} "GITDIR-NOTFOUND" PARENT_SCOPE) + set(${_hashvar} "GITDIR-NOTFOUND" PARENT_SCOPE) + return() + endif() + set(GIT_DIR "${GIT_PARENT_DIR}/.git") + endwhile() + # check if this is a submodule + if(NOT IS_DIRECTORY ${GIT_DIR}) + file(READ ${GIT_DIR} submodule) + string(REGEX REPLACE "gitdir: (.*)\n$" "\\1" GIT_DIR_RELATIVE ${submodule}) + get_filename_component(SUBMODULE_DIR ${GIT_DIR} PATH) + get_filename_component(GIT_DIR ${SUBMODULE_DIR}/${GIT_DIR_RELATIVE} ABSOLUTE) + endif() + set(GIT_DATA "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/git-data") + if(NOT EXISTS "${GIT_DATA}") + file(MAKE_DIRECTORY "${GIT_DATA}") + endif() + + if(NOT EXISTS "${GIT_DIR}/HEAD") + return() + endif() + set(HEAD_FILE "${GIT_DATA}/HEAD") + configure_file("${GIT_DIR}/HEAD" "${HEAD_FILE}" COPYONLY) + + configure_file("${_gitdescmoddir}/GetGitRevisionDescription.cmake.in" + "${GIT_DATA}/grabRef.cmake" + @ONLY) + include("${GIT_DATA}/grabRef.cmake") + + set(${_refspecvar} "${HEAD_REF}" PARENT_SCOPE) + set(${_hashvar} "${HEAD_HASH}" PARENT_SCOPE) +endfunction() + +function(git_branch_name _var) + if(NOT GIT_FOUND) + find_package(Git QUIET) + endif() + + if(NOT GIT_FOUND) + set(${_var} "GIT-NOTFOUND" PARENT_SCOPE) + return() + endif() + + execute_process(COMMAND + "${GIT_EXECUTABLE}" + rev-parse --abbrev-ref HEAD + WORKING_DIRECTORY + "${CMAKE_SOURCE_DIR}" + RESULT_VARIABLE + res + OUTPUT_VARIABLE + out + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(NOT res EQUAL 0) + set(out "${out}-${res}-NOTFOUND") + endif() + + set(${_var} "${out}" PARENT_SCOPE) +endfunction() + +function(git_describe _var) + if(NOT GIT_FOUND) + find_package(Git QUIET) + endif() + #get_git_head_revision(refspec hash) + if(NOT GIT_FOUND) + set(${_var} "GIT-NOTFOUND" PARENT_SCOPE) + return() + endif() + #if(NOT hash) + # set(${_var} "HEAD-HASH-NOTFOUND" PARENT_SCOPE) + # return() + #endif() + + # TODO sanitize + #if((${ARGN}" MATCHES "&&") OR + # (ARGN MATCHES "||") OR + # (ARGN MATCHES "\\;")) + # message("Please report the following error to the project!") + # message(FATAL_ERROR "Looks like someone's doing something nefarious with git_describe! Passed arguments ${ARGN}") + #endif() + + #message(STATUS "Arguments to execute_process: ${ARGN}") + + execute_process(COMMAND + "${GIT_EXECUTABLE}" + describe + ${hash} + ${ARGN} + WORKING_DIRECTORY + "${CMAKE_SOURCE_DIR}" + RESULT_VARIABLE + res + OUTPUT_VARIABLE + out + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(NOT res EQUAL 0) + set(out "${out}-${res}-NOTFOUND") + endif() + + set(${_var} "${out}" PARENT_SCOPE) +endfunction() + +function(git_get_exact_tag _var) + git_describe(out --exact-match ${ARGN}) + set(${_var} "${out}" PARENT_SCOPE) +endfunction() diff --git a/externals/cmake-modules/GetGitRevisionDescription.cmake.in b/externals/cmake-modules/GetGitRevisionDescription.cmake.in new file mode 100644 index 000000000..0d7eb3c26 --- /dev/null +++ b/externals/cmake-modules/GetGitRevisionDescription.cmake.in @@ -0,0 +1,42 @@ +# +# Internal file for GetGitRevisionDescription.cmake +# +# Requires CMake 2.6 or newer (uses the 'function' command) +# +# Original Author: +# 2009-2010 Ryan Pavlik +# http://academic.cleardefinition.com +# Iowa State University HCI Graduate Program/VRAC +# +# Copyright Iowa State University 2009-2010. +# Distributed under the Boost Software License, Version 1.0. +# (See accompanying file LICENSE_1_0.txt or copy at +# http://www.boost.org/LICENSE_1_0.txt) + +set(HEAD_HASH) + +file(READ "@HEAD_FILE@" HEAD_CONTENTS LIMIT 1024) + +string(STRIP "${HEAD_CONTENTS}" HEAD_CONTENTS) +if(HEAD_CONTENTS MATCHES "ref") + # named branch + string(REPLACE "ref: " "" HEAD_REF "${HEAD_CONTENTS}") + if(EXISTS "@GIT_DIR@/${HEAD_REF}") + configure_file("@GIT_DIR@/${HEAD_REF}" "@GIT_DATA@/head-ref" COPYONLY) + elseif(EXISTS "@GIT_DIR@/logs/${HEAD_REF}") + configure_file("@GIT_DIR@/logs/${HEAD_REF}" "@GIT_DATA@/head-ref" COPYONLY) + set(HEAD_HASH "${HEAD_REF}") + endif() +else() + # detached HEAD + configure_file("@GIT_DIR@/HEAD" "@GIT_DATA@/head-ref" COPYONLY) +endif() + +if(NOT HEAD_HASH) + if(EXISTS "@GIT_DATA@/head-ref") + file(READ "@GIT_DATA@/head-ref" HEAD_HASH LIMIT 1024) + string(STRIP "${HEAD_HASH}" HEAD_HASH) + else() + set(HEAD_HASH "Unknown") + endif() +endif() diff --git a/externals/ext-boost b/externals/ext-boost index 147b2de77..a04136add 160000 --- a/externals/ext-boost +++ b/externals/ext-boost @@ -1 +1 @@ -Subproject commit 147b2de7734f5dc3b9aeb1f4135ae15fcd44b9d7 +Subproject commit a04136add1e469f46d8ae8d3e8307779240a5c53 diff --git a/externals/ffmpeg-core b/externals/ffmpeg-core new file mode 160000 index 000000000..e30b7d7fe --- /dev/null +++ b/externals/ffmpeg-core @@ -0,0 +1 @@ +Subproject commit e30b7d7fe228bfb3f6e41ce1040b44a15eb7d5e0 diff --git a/externals/fmt b/externals/fmt index bc8d32e96..c98518351 160000 --- a/externals/fmt +++ b/externals/fmt @@ -1 +1 @@ -Subproject commit bc8d32e9643d2be5fc070abf2a50bc021544545d +Subproject commit c98518351efd5a46f5d448e947e0b7242d197d07 diff --git a/externals/glslang b/externals/glslang index 52f68dc6b..7c4d91e78 160000 --- a/externals/glslang +++ b/externals/glslang @@ -1 +1 @@ -Subproject commit 52f68dc6b2a9d017b43161f31f13a6f44636ee7c +Subproject commit 7c4d91e7819a1d27213aa3499953d54ae1a00e8f diff --git a/externals/magic_enum b/externals/magic_enum index 664ee62c1..dae6bbf16 160000 --- a/externals/magic_enum +++ b/externals/magic_enum @@ -1 +1 @@ -Subproject commit 664ee62c12570948b0e025d15b42d641fba8d54a +Subproject commit dae6bbf16c363e9ead4e628a47fdb02956a634f3 diff --git a/externals/renderdoc/renderdoc_app.h b/externals/renderdoc/renderdoc_app.h new file mode 100644 index 000000000..c01e05932 --- /dev/null +++ b/externals/renderdoc/renderdoc_app.h @@ -0,0 +1,741 @@ +/****************************************************************************** + * The MIT License (MIT) + * + * Copyright (c) 2019-2024 Baldur Karlsson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +#pragma once + +////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Documentation for the API is available at https://renderdoc.org/docs/in_application_api.html +// + +#if !defined(RENDERDOC_NO_STDINT) +#include +#endif + +#if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER) +#define RENDERDOC_CC __cdecl +#elif defined(__linux__) || defined(__FreeBSD__) +#define RENDERDOC_CC +#elif defined(__APPLE__) +#define RENDERDOC_CC +#else +#error "Unknown platform" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +////////////////////////////////////////////////////////////////////////////////////////////////// +// Constants not used directly in below API + +// This is a GUID/magic value used for when applications pass a path where shader debug +// information can be found to match up with a stripped shader. +// the define can be used like so: const GUID RENDERDOC_ShaderDebugMagicValue = +// RENDERDOC_ShaderDebugMagicValue_value +#define RENDERDOC_ShaderDebugMagicValue_struct \ + { \ + 0xeab25520, 0x6670, 0x4865, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff \ + } + +// as an alternative when you want a byte array (assuming x86 endianness): +#define RENDERDOC_ShaderDebugMagicValue_bytearray \ + { \ + 0x20, 0x55, 0xb2, 0xea, 0x70, 0x66, 0x65, 0x48, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff \ + } + +// truncated version when only a uint64_t is available (e.g. Vulkan tags): +#define RENDERDOC_ShaderDebugMagicValue_truncated 0x48656670eab25520ULL + +////////////////////////////////////////////////////////////////////////////////////////////////// +// RenderDoc capture options +// + +typedef enum RENDERDOC_CaptureOption +{ + // Allow the application to enable vsync + // + // Default - enabled + // + // 1 - The application can enable or disable vsync at will + // 0 - vsync is force disabled + eRENDERDOC_Option_AllowVSync = 0, + + // Allow the application to enable fullscreen + // + // Default - enabled + // + // 1 - The application can enable or disable fullscreen at will + // 0 - fullscreen is force disabled + eRENDERDOC_Option_AllowFullscreen = 1, + + // Record API debugging events and messages + // + // Default - disabled + // + // 1 - Enable built-in API debugging features and records the results into + // the capture, which is matched up with events on replay + // 0 - no API debugging is forcibly enabled + eRENDERDOC_Option_APIValidation = 2, + eRENDERDOC_Option_DebugDeviceMode = 2, // deprecated name of this enum + + // Capture CPU callstacks for API events + // + // Default - disabled + // + // 1 - Enables capturing of callstacks + // 0 - no callstacks are captured + eRENDERDOC_Option_CaptureCallstacks = 3, + + // When capturing CPU callstacks, only capture them from actions. + // This option does nothing without the above option being enabled + // + // Default - disabled + // + // 1 - Only captures callstacks for actions. + // Ignored if CaptureCallstacks is disabled + // 0 - Callstacks, if enabled, are captured for every event. + eRENDERDOC_Option_CaptureCallstacksOnlyDraws = 4, + eRENDERDOC_Option_CaptureCallstacksOnlyActions = 4, + + // Specify a delay in seconds to wait for a debugger to attach, after + // creating or injecting into a process, before continuing to allow it to run. + // + // 0 indicates no delay, and the process will run immediately after injection + // + // Default - 0 seconds + // + eRENDERDOC_Option_DelayForDebugger = 5, + + // Verify buffer access. This includes checking the memory returned by a Map() call to + // detect any out-of-bounds modification, as well as initialising buffers with undefined contents + // to a marker value to catch use of uninitialised memory. + // + // NOTE: This option is only valid for OpenGL and D3D11. Explicit APIs such as D3D12 and Vulkan do + // not do the same kind of interception & checking and undefined contents are really undefined. + // + // Default - disabled + // + // 1 - Verify buffer access + // 0 - No verification is performed, and overwriting bounds may cause crashes or corruption in + // RenderDoc. + eRENDERDOC_Option_VerifyBufferAccess = 6, + + // The old name for eRENDERDOC_Option_VerifyBufferAccess was eRENDERDOC_Option_VerifyMapWrites. + // This option now controls the filling of uninitialised buffers with 0xdddddddd which was + // previously always enabled + eRENDERDOC_Option_VerifyMapWrites = eRENDERDOC_Option_VerifyBufferAccess, + + // Hooks any system API calls that create child processes, and injects + // RenderDoc into them recursively with the same options. + // + // Default - disabled + // + // 1 - Hooks into spawned child processes + // 0 - Child processes are not hooked by RenderDoc + eRENDERDOC_Option_HookIntoChildren = 7, + + // By default RenderDoc only includes resources in the final capture necessary + // for that frame, this allows you to override that behaviour. + // + // Default - disabled + // + // 1 - all live resources at the time of capture are included in the capture + // and available for inspection + // 0 - only the resources referenced by the captured frame are included + eRENDERDOC_Option_RefAllResources = 8, + + // **NOTE**: As of RenderDoc v1.1 this option has been deprecated. Setting or + // getting it will be ignored, to allow compatibility with older versions. + // In v1.1 the option acts as if it's always enabled. + // + // By default RenderDoc skips saving initial states for resources where the + // previous contents don't appear to be used, assuming that writes before + // reads indicate previous contents aren't used. + // + // Default - disabled + // + // 1 - initial contents at the start of each captured frame are saved, even if + // they are later overwritten or cleared before being used. + // 0 - unless a read is detected, initial contents will not be saved and will + // appear as black or empty data. + eRENDERDOC_Option_SaveAllInitials = 9, + + // In APIs that allow for the recording of command lists to be replayed later, + // RenderDoc may choose to not capture command lists before a frame capture is + // triggered, to reduce overheads. This means any command lists recorded once + // and replayed many times will not be available and may cause a failure to + // capture. + // + // NOTE: This is only true for APIs where multithreading is difficult or + // discouraged. Newer APIs like Vulkan and D3D12 will ignore this option + // and always capture all command lists since the API is heavily oriented + // around it and the overheads have been reduced by API design. + // + // 1 - All command lists are captured from the start of the application + // 0 - Command lists are only captured if their recording begins during + // the period when a frame capture is in progress. + eRENDERDOC_Option_CaptureAllCmdLists = 10, + + // Mute API debugging output when the API validation mode option is enabled + // + // Default - enabled + // + // 1 - Mute any API debug messages from being displayed or passed through + // 0 - API debugging is displayed as normal + eRENDERDOC_Option_DebugOutputMute = 11, + + // Option to allow vendor extensions to be used even when they may be + // incompatible with RenderDoc and cause corrupted replays or crashes. + // + // Default - inactive + // + // No values are documented, this option should only be used when absolutely + // necessary as directed by a RenderDoc developer. + eRENDERDOC_Option_AllowUnsupportedVendorExtensions = 12, + + // Define a soft memory limit which some APIs may aim to keep overhead under where + // possible. Anything above this limit will where possible be saved directly to disk during + // capture. + // This will cause increased disk space use (which may cause a capture to fail if disk space is + // exhausted) as well as slower capture times. + // + // Not all memory allocations may be deferred like this so it is not a guarantee of a memory + // limit. + // + // Units are in MBs, suggested values would range from 200MB to 1000MB. + // + // Default - 0 Megabytes + eRENDERDOC_Option_SoftMemoryLimit = 13, +} RENDERDOC_CaptureOption; + +// Sets an option that controls how RenderDoc behaves on capture. +// +// Returns 1 if the option and value are valid +// Returns 0 if either is invalid and the option is unchanged +typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionU32)(RENDERDOC_CaptureOption opt, uint32_t val); +typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionF32)(RENDERDOC_CaptureOption opt, float val); + +// Gets the current value of an option as a uint32_t +// +// If the option is invalid, 0xffffffff is returned +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionU32)(RENDERDOC_CaptureOption opt); + +// Gets the current value of an option as a float +// +// If the option is invalid, -FLT_MAX is returned +typedef float(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionF32)(RENDERDOC_CaptureOption opt); + +typedef enum RENDERDOC_InputButton +{ + // '0' - '9' matches ASCII values + eRENDERDOC_Key_0 = 0x30, + eRENDERDOC_Key_1 = 0x31, + eRENDERDOC_Key_2 = 0x32, + eRENDERDOC_Key_3 = 0x33, + eRENDERDOC_Key_4 = 0x34, + eRENDERDOC_Key_5 = 0x35, + eRENDERDOC_Key_6 = 0x36, + eRENDERDOC_Key_7 = 0x37, + eRENDERDOC_Key_8 = 0x38, + eRENDERDOC_Key_9 = 0x39, + + // 'A' - 'Z' matches ASCII values + eRENDERDOC_Key_A = 0x41, + eRENDERDOC_Key_B = 0x42, + eRENDERDOC_Key_C = 0x43, + eRENDERDOC_Key_D = 0x44, + eRENDERDOC_Key_E = 0x45, + eRENDERDOC_Key_F = 0x46, + eRENDERDOC_Key_G = 0x47, + eRENDERDOC_Key_H = 0x48, + eRENDERDOC_Key_I = 0x49, + eRENDERDOC_Key_J = 0x4A, + eRENDERDOC_Key_K = 0x4B, + eRENDERDOC_Key_L = 0x4C, + eRENDERDOC_Key_M = 0x4D, + eRENDERDOC_Key_N = 0x4E, + eRENDERDOC_Key_O = 0x4F, + eRENDERDOC_Key_P = 0x50, + eRENDERDOC_Key_Q = 0x51, + eRENDERDOC_Key_R = 0x52, + eRENDERDOC_Key_S = 0x53, + eRENDERDOC_Key_T = 0x54, + eRENDERDOC_Key_U = 0x55, + eRENDERDOC_Key_V = 0x56, + eRENDERDOC_Key_W = 0x57, + eRENDERDOC_Key_X = 0x58, + eRENDERDOC_Key_Y = 0x59, + eRENDERDOC_Key_Z = 0x5A, + + // leave the rest of the ASCII range free + // in case we want to use it later + eRENDERDOC_Key_NonPrintable = 0x100, + + eRENDERDOC_Key_Divide, + eRENDERDOC_Key_Multiply, + eRENDERDOC_Key_Subtract, + eRENDERDOC_Key_Plus, + + eRENDERDOC_Key_F1, + eRENDERDOC_Key_F2, + eRENDERDOC_Key_F3, + eRENDERDOC_Key_F4, + eRENDERDOC_Key_F5, + eRENDERDOC_Key_F6, + eRENDERDOC_Key_F7, + eRENDERDOC_Key_F8, + eRENDERDOC_Key_F9, + eRENDERDOC_Key_F10, + eRENDERDOC_Key_F11, + eRENDERDOC_Key_F12, + + eRENDERDOC_Key_Home, + eRENDERDOC_Key_End, + eRENDERDOC_Key_Insert, + eRENDERDOC_Key_Delete, + eRENDERDOC_Key_PageUp, + eRENDERDOC_Key_PageDn, + + eRENDERDOC_Key_Backspace, + eRENDERDOC_Key_Tab, + eRENDERDOC_Key_PrtScrn, + eRENDERDOC_Key_Pause, + + eRENDERDOC_Key_Max, +} RENDERDOC_InputButton; + +// Sets which key or keys can be used to toggle focus between multiple windows +// +// If keys is NULL or num is 0, toggle keys will be disabled +typedef void(RENDERDOC_CC *pRENDERDOC_SetFocusToggleKeys)(RENDERDOC_InputButton *keys, int num); + +// Sets which key or keys can be used to capture the next frame +// +// If keys is NULL or num is 0, captures keys will be disabled +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureKeys)(RENDERDOC_InputButton *keys, int num); + +typedef enum RENDERDOC_OverlayBits +{ + // This single bit controls whether the overlay is enabled or disabled globally + eRENDERDOC_Overlay_Enabled = 0x1, + + // Show the average framerate over several seconds as well as min/max + eRENDERDOC_Overlay_FrameRate = 0x2, + + // Show the current frame number + eRENDERDOC_Overlay_FrameNumber = 0x4, + + // Show a list of recent captures, and how many captures have been made + eRENDERDOC_Overlay_CaptureList = 0x8, + + // Default values for the overlay mask + eRENDERDOC_Overlay_Default = (eRENDERDOC_Overlay_Enabled | eRENDERDOC_Overlay_FrameRate | + eRENDERDOC_Overlay_FrameNumber | eRENDERDOC_Overlay_CaptureList), + + // Enable all bits + eRENDERDOC_Overlay_All = ~0U, + + // Disable all bits + eRENDERDOC_Overlay_None = 0, +} RENDERDOC_OverlayBits; + +// returns the overlay bits that have been set +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetOverlayBits)(); +// sets the overlay bits with an and & or mask +typedef void(RENDERDOC_CC *pRENDERDOC_MaskOverlayBits)(uint32_t And, uint32_t Or); + +// this function will attempt to remove RenderDoc's hooks in the application. +// +// Note: that this can only work correctly if done immediately after +// the module is loaded, before any API work happens. RenderDoc will remove its +// injected hooks and shut down. Behaviour is undefined if this is called +// after any API functions have been called, and there is still no guarantee of +// success. +typedef void(RENDERDOC_CC *pRENDERDOC_RemoveHooks)(); + +// DEPRECATED: compatibility for code compiled against pre-1.4.1 headers. +typedef pRENDERDOC_RemoveHooks pRENDERDOC_Shutdown; + +// This function will unload RenderDoc's crash handler. +// +// If you use your own crash handler and don't want RenderDoc's handler to +// intercede, you can call this function to unload it and any unhandled +// exceptions will pass to the next handler. +typedef void(RENDERDOC_CC *pRENDERDOC_UnloadCrashHandler)(); + +// Sets the capture file path template +// +// pathtemplate is a UTF-8 string that gives a template for how captures will be named +// and where they will be saved. +// +// Any extension is stripped off the path, and captures are saved in the directory +// specified, and named with the filename and the frame number appended. If the +// directory does not exist it will be created, including any parent directories. +// +// If pathtemplate is NULL, the template will remain unchanged +// +// Example: +// +// SetCaptureFilePathTemplate("my_captures/example"); +// +// Capture #1 -> my_captures/example_frame123.rdc +// Capture #2 -> my_captures/example_frame456.rdc +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFilePathTemplate)(const char *pathtemplate); + +// returns the current capture path template, see SetCaptureFileTemplate above, as a UTF-8 string +typedef const char *(RENDERDOC_CC *pRENDERDOC_GetCaptureFilePathTemplate)(); + +// DEPRECATED: compatibility for code compiled against pre-1.1.2 headers. +typedef pRENDERDOC_SetCaptureFilePathTemplate pRENDERDOC_SetLogFilePathTemplate; +typedef pRENDERDOC_GetCaptureFilePathTemplate pRENDERDOC_GetLogFilePathTemplate; + +// returns the number of captures that have been made +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetNumCaptures)(); + +// This function returns the details of a capture, by index. New captures are added +// to the end of the list. +// +// filename will be filled with the absolute path to the capture file, as a UTF-8 string +// pathlength will be written with the length in bytes of the filename string +// timestamp will be written with the time of the capture, in seconds since the Unix epoch +// +// Any of the parameters can be NULL and they'll be skipped. +// +// The function will return 1 if the capture index is valid, or 0 if the index is invalid +// If the index is invalid, the values will be unchanged +// +// Note: when captures are deleted in the UI they will remain in this list, so the +// capture path may not exist anymore. +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCapture)(uint32_t idx, char *filename, + uint32_t *pathlength, uint64_t *timestamp); + +// Sets the comments associated with a capture file. These comments are displayed in the +// UI program when opening. +// +// filePath should be a path to the capture file to add comments to. If set to NULL or "" +// the most recent capture file created made will be used instead. +// comments should be a NULL-terminated UTF-8 string to add as comments. +// +// Any existing comments will be overwritten. +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFileComments)(const char *filePath, + const char *comments); + +// returns 1 if the RenderDoc UI is connected to this application, 0 otherwise +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsTargetControlConnected)(); + +// DEPRECATED: compatibility for code compiled against pre-1.1.1 headers. +// This was renamed to IsTargetControlConnected in API 1.1.1, the old typedef is kept here for +// backwards compatibility with old code, it is castable either way since it's ABI compatible +// as the same function pointer type. +typedef pRENDERDOC_IsTargetControlConnected pRENDERDOC_IsRemoteAccessConnected; + +// This function will launch the Replay UI associated with the RenderDoc library injected +// into the running application. +// +// if connectTargetControl is 1, the Replay UI will be launched with a command line parameter +// to connect to this application +// cmdline is the rest of the command line, as a UTF-8 string. E.g. a captures to open +// if cmdline is NULL, the command line will be empty. +// +// returns the PID of the replay UI if successful, 0 if not successful. +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_LaunchReplayUI)(uint32_t connectTargetControl, + const char *cmdline); + +// RenderDoc can return a higher version than requested if it's backwards compatible, +// this function returns the actual version returned. If a parameter is NULL, it will be +// ignored and the others will be filled out. +typedef void(RENDERDOC_CC *pRENDERDOC_GetAPIVersion)(int *major, int *minor, int *patch); + +// Requests that the replay UI show itself (if hidden or not the current top window). This can be +// used in conjunction with IsTargetControlConnected and LaunchReplayUI to intelligently handle +// showing the UI after making a capture. +// +// This will return 1 if the request was successfully passed on, though it's not guaranteed that +// the UI will be on top in all cases depending on OS rules. It will return 0 if there is no current +// target control connection to make such a request, or if there was another error +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_ShowReplayUI)(); + +////////////////////////////////////////////////////////////////////////// +// Capturing functions +// + +// A device pointer is a pointer to the API's root handle. +// +// This would be an ID3D11Device, HGLRC/GLXContext, ID3D12Device, etc +typedef void *RENDERDOC_DevicePointer; + +// A window handle is the OS's native window handle +// +// This would be an HWND, GLXDrawable, etc +typedef void *RENDERDOC_WindowHandle; + +// A helper macro for Vulkan, where the device handle cannot be used directly. +// +// Passing the VkInstance to this macro will return the RENDERDOC_DevicePointer to use. +// +// Specifically, the value needed is the dispatch table pointer, which sits as the first +// pointer-sized object in the memory pointed to by the VkInstance. Thus we cast to a void** and +// indirect once. +#define RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(inst) (*((void **)(inst))) + +// This sets the RenderDoc in-app overlay in the API/window pair as 'active' and it will +// respond to keypresses. Neither parameter can be NULL +typedef void(RENDERDOC_CC *pRENDERDOC_SetActiveWindow)(RENDERDOC_DevicePointer device, + RENDERDOC_WindowHandle wndHandle); + +// capture the next frame on whichever window and API is currently considered active +typedef void(RENDERDOC_CC *pRENDERDOC_TriggerCapture)(); + +// capture the next N frames on whichever window and API is currently considered active +typedef void(RENDERDOC_CC *pRENDERDOC_TriggerMultiFrameCapture)(uint32_t numFrames); + +// When choosing either a device pointer or a window handle to capture, you can pass NULL. +// Passing NULL specifies a 'wildcard' match against anything. This allows you to specify +// any API rendering to a specific window, or a specific API instance rendering to any window, +// or in the simplest case of one window and one API, you can just pass NULL for both. +// +// In either case, if there are two or more possible matching (device,window) pairs it +// is undefined which one will be captured. +// +// Note: for headless rendering you can pass NULL for the window handle and either specify +// a device pointer or leave it NULL as above. + +// Immediately starts capturing API calls on the specified device pointer and window handle. +// +// If there is no matching thing to capture (e.g. no supported API has been initialised), +// this will do nothing. +// +// The results are undefined (including crashes) if two captures are started overlapping, +// even on separate devices and/oror windows. +typedef void(RENDERDOC_CC *pRENDERDOC_StartFrameCapture)(RENDERDOC_DevicePointer device, + RENDERDOC_WindowHandle wndHandle); + +// Returns whether or not a frame capture is currently ongoing anywhere. +// +// This will return 1 if a capture is ongoing, and 0 if there is no capture running +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsFrameCapturing)(); + +// Ends capturing immediately. +// +// This will return 1 if the capture succeeded, and 0 if there was an error capturing. +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_EndFrameCapture)(RENDERDOC_DevicePointer device, + RENDERDOC_WindowHandle wndHandle); + +// Ends capturing immediately and discard any data stored without saving to disk. +// +// This will return 1 if the capture was discarded, and 0 if there was an error or no capture +// was in progress +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_DiscardFrameCapture)(RENDERDOC_DevicePointer device, + RENDERDOC_WindowHandle wndHandle); + +// Only valid to be called between a call to StartFrameCapture and EndFrameCapture. Gives a custom +// title to the capture produced which will be displayed in the UI. +// +// If multiple captures are ongoing, this title will be applied to the first capture to end after +// this call. The second capture to end will have no title, unless this function is called again. +// +// Calling this function has no effect if no capture is currently running, and if it is called +// multiple times only the last title will be used. +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureTitle)(const char *title); + +////////////////////////////////////////////////////////////////////////////////////////////////// +// RenderDoc API versions +// + +// RenderDoc uses semantic versioning (http://semver.org/). +// +// MAJOR version is incremented when incompatible API changes happen. +// MINOR version is incremented when functionality is added in a backwards-compatible manner. +// PATCH version is incremented when backwards-compatible bug fixes happen. +// +// Note that this means the API returned can be higher than the one you might have requested. +// e.g. if you are running against a newer RenderDoc that supports 1.0.1, it will be returned +// instead of 1.0.0. You can check this with the GetAPIVersion entry point +typedef enum RENDERDOC_Version +{ + eRENDERDOC_API_Version_1_0_0 = 10000, // RENDERDOC_API_1_0_0 = 1 00 00 + eRENDERDOC_API_Version_1_0_1 = 10001, // RENDERDOC_API_1_0_1 = 1 00 01 + eRENDERDOC_API_Version_1_0_2 = 10002, // RENDERDOC_API_1_0_2 = 1 00 02 + eRENDERDOC_API_Version_1_1_0 = 10100, // RENDERDOC_API_1_1_0 = 1 01 00 + eRENDERDOC_API_Version_1_1_1 = 10101, // RENDERDOC_API_1_1_1 = 1 01 01 + eRENDERDOC_API_Version_1_1_2 = 10102, // RENDERDOC_API_1_1_2 = 1 01 02 + eRENDERDOC_API_Version_1_2_0 = 10200, // RENDERDOC_API_1_2_0 = 1 02 00 + eRENDERDOC_API_Version_1_3_0 = 10300, // RENDERDOC_API_1_3_0 = 1 03 00 + eRENDERDOC_API_Version_1_4_0 = 10400, // RENDERDOC_API_1_4_0 = 1 04 00 + eRENDERDOC_API_Version_1_4_1 = 10401, // RENDERDOC_API_1_4_1 = 1 04 01 + eRENDERDOC_API_Version_1_4_2 = 10402, // RENDERDOC_API_1_4_2 = 1 04 02 + eRENDERDOC_API_Version_1_5_0 = 10500, // RENDERDOC_API_1_5_0 = 1 05 00 + eRENDERDOC_API_Version_1_6_0 = 10600, // RENDERDOC_API_1_6_0 = 1 06 00 +} RENDERDOC_Version; + +// API version changelog: +// +// 1.0.0 - initial release +// 1.0.1 - Bugfix: IsFrameCapturing() was returning false for captures that were triggered +// by keypress or TriggerCapture, instead of Start/EndFrameCapture. +// 1.0.2 - Refactor: Renamed eRENDERDOC_Option_DebugDeviceMode to eRENDERDOC_Option_APIValidation +// 1.1.0 - Add feature: TriggerMultiFrameCapture(). Backwards compatible with 1.0.x since the new +// function pointer is added to the end of the struct, the original layout is identical +// 1.1.1 - Refactor: Renamed remote access to target control (to better disambiguate from remote +// replay/remote server concept in replay UI) +// 1.1.2 - Refactor: Renamed "log file" in function names to just capture, to clarify that these +// are captures and not debug logging files. This is the first API version in the v1.0 +// branch. +// 1.2.0 - Added feature: SetCaptureFileComments() to add comments to a capture file that will be +// displayed in the UI program on load. +// 1.3.0 - Added feature: New capture option eRENDERDOC_Option_AllowUnsupportedVendorExtensions +// which allows users to opt-in to allowing unsupported vendor extensions to function. +// Should be used at the user's own risk. +// Refactor: Renamed eRENDERDOC_Option_VerifyMapWrites to +// eRENDERDOC_Option_VerifyBufferAccess, which now also controls initialisation to +// 0xdddddddd of uninitialised buffer contents. +// 1.4.0 - Added feature: DiscardFrameCapture() to discard a frame capture in progress and stop +// capturing without saving anything to disk. +// 1.4.1 - Refactor: Renamed Shutdown to RemoveHooks to better clarify what is happening +// 1.4.2 - Refactor: Renamed 'draws' to 'actions' in callstack capture option. +// 1.5.0 - Added feature: ShowReplayUI() to request that the replay UI show itself if connected +// 1.6.0 - Added feature: SetCaptureTitle() which can be used to set a title for a +// capture made with StartFrameCapture() or EndFrameCapture() + +typedef struct RENDERDOC_API_1_6_0 +{ + pRENDERDOC_GetAPIVersion GetAPIVersion; + + pRENDERDOC_SetCaptureOptionU32 SetCaptureOptionU32; + pRENDERDOC_SetCaptureOptionF32 SetCaptureOptionF32; + + pRENDERDOC_GetCaptureOptionU32 GetCaptureOptionU32; + pRENDERDOC_GetCaptureOptionF32 GetCaptureOptionF32; + + pRENDERDOC_SetFocusToggleKeys SetFocusToggleKeys; + pRENDERDOC_SetCaptureKeys SetCaptureKeys; + + pRENDERDOC_GetOverlayBits GetOverlayBits; + pRENDERDOC_MaskOverlayBits MaskOverlayBits; + + // Shutdown was renamed to RemoveHooks in 1.4.1. + // These unions allow old code to continue compiling without changes + union + { + pRENDERDOC_Shutdown Shutdown; + pRENDERDOC_RemoveHooks RemoveHooks; + }; + pRENDERDOC_UnloadCrashHandler UnloadCrashHandler; + + // Get/SetLogFilePathTemplate was renamed to Get/SetCaptureFilePathTemplate in 1.1.2. + // These unions allow old code to continue compiling without changes + union + { + // deprecated name + pRENDERDOC_SetLogFilePathTemplate SetLogFilePathTemplate; + // current name + pRENDERDOC_SetCaptureFilePathTemplate SetCaptureFilePathTemplate; + }; + union + { + // deprecated name + pRENDERDOC_GetLogFilePathTemplate GetLogFilePathTemplate; + // current name + pRENDERDOC_GetCaptureFilePathTemplate GetCaptureFilePathTemplate; + }; + + pRENDERDOC_GetNumCaptures GetNumCaptures; + pRENDERDOC_GetCapture GetCapture; + + pRENDERDOC_TriggerCapture TriggerCapture; + + // IsRemoteAccessConnected was renamed to IsTargetControlConnected in 1.1.1. + // This union allows old code to continue compiling without changes + union + { + // deprecated name + pRENDERDOC_IsRemoteAccessConnected IsRemoteAccessConnected; + // current name + pRENDERDOC_IsTargetControlConnected IsTargetControlConnected; + }; + pRENDERDOC_LaunchReplayUI LaunchReplayUI; + + pRENDERDOC_SetActiveWindow SetActiveWindow; + + pRENDERDOC_StartFrameCapture StartFrameCapture; + pRENDERDOC_IsFrameCapturing IsFrameCapturing; + pRENDERDOC_EndFrameCapture EndFrameCapture; + + // new function in 1.1.0 + pRENDERDOC_TriggerMultiFrameCapture TriggerMultiFrameCapture; + + // new function in 1.2.0 + pRENDERDOC_SetCaptureFileComments SetCaptureFileComments; + + // new function in 1.4.0 + pRENDERDOC_DiscardFrameCapture DiscardFrameCapture; + + // new function in 1.5.0 + pRENDERDOC_ShowReplayUI ShowReplayUI; + + // new function in 1.6.0 + pRENDERDOC_SetCaptureTitle SetCaptureTitle; +} RENDERDOC_API_1_6_0; + +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_1; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_2; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_1; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_2; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_2_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_3_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_1; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_2; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_5_0; + +////////////////////////////////////////////////////////////////////////////////////////////////// +// RenderDoc API entry point +// +// This entry point can be obtained via GetProcAddress/dlsym if RenderDoc is available. +// +// The name is the same as the typedef - "RENDERDOC_GetAPI" +// +// This function is not thread safe, and should not be called on multiple threads at once. +// Ideally, call this once as early as possible in your application's startup, before doing +// any API work, since some configuration functionality etc has to be done also before +// initialising any APIs. +// +// Parameters: +// version is a single value from the RENDERDOC_Version above. +// +// outAPIPointers will be filled out with a pointer to the corresponding struct of function +// pointers. +// +// Returns: +// 1 - if the outAPIPointers has been filled with a pointer to the API struct requested +// 0 - if the requested version is not supported or the arguments are invalid. +// +typedef int(RENDERDOC_CC *pRENDERDOC_GetAPI)(RENDERDOC_Version version, void **outAPIPointers); + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/externals/sdl3 b/externals/sdl3 index f9a06c20e..4cc3410dc 160000 --- a/externals/sdl3 +++ b/externals/sdl3 @@ -1 +1 @@ -Subproject commit f9a06c20ed85fb1d6754fc2280d6183382217910 +Subproject commit 4cc3410dce50cefce98d3cf3cf1bc8eca83b862a diff --git a/externals/toml11 b/externals/toml11 index b389bbc4e..fcb1d3d7e 160000 --- a/externals/toml11 +++ b/externals/toml11 @@ -1 +1 @@ -Subproject commit b389bbc4ebf90fa2fe7651de3046fb19f661ba3c +Subproject commit fcb1d3d7e5885edfadbbe9572991dc4b3248af58 diff --git a/externals/vulkan-headers b/externals/vulkan-headers index b379292b2..595c8d479 160000 --- a/externals/vulkan-headers +++ b/externals/vulkan-headers @@ -1 +1 @@ -Subproject commit b379292b2ab6df5771ba9870d53cf8b2c9295daf +Subproject commit 595c8d4794410a4e64b98dc58d27c0310d7ea2fd diff --git a/externals/xxhash b/externals/xxhash index a57f6cce2..ee65ff988 160000 --- a/externals/xxhash +++ b/externals/xxhash @@ -1 +1 @@ -Subproject commit a57f6cce2698049863af8c25787084ae0489d849 +Subproject commit ee65ff988bab34a184c700e2fbe1e1c5bc27485d diff --git a/src/audio_core/sdl_audio.cpp b/src/audio_core/sdl_audio.cpp index 0d494707d..f544c52f9 100644 --- a/src/audio_core/sdl_audio.cpp +++ b/src/audio_core/sdl_audio.cpp @@ -1,19 +1,23 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "sdl_audio.h" + +#include "common/assert.h" +#include "core/libraries/error_codes.h" + #include #include #include -#include -#include -#include "sdl_audio.h" + +#include // std::unique_lock namespace Audio { int SDLAudio::AudioOutOpen(int type, u32 samples_num, u32 freq, Libraries::AudioOut::OrbisAudioOutParamFormat format) { using Libraries::AudioOut::OrbisAudioOutParamFormat; - std::scoped_lock lock{m_mutex}; + std::unique_lock lock{m_mutex}; for (int id = 0; id < portsOut.size(); id++) { auto& port = portsOut[id]; if (!port.isOpen) { @@ -88,7 +92,7 @@ int SDLAudio::AudioOutOpen(int type, u32 samples_num, u32 freq, } s32 SDLAudio::AudioOutOutput(s32 handle, const void* ptr) { - std::scoped_lock lock{m_mutex}; + std::shared_lock lock{m_mutex}; auto& port = portsOut[handle - 1]; if (!port.isOpen) { return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; @@ -100,7 +104,7 @@ s32 SDLAudio::AudioOutOutput(s32 handle, const void* ptr) { int result = SDL_PutAudioStreamData(port.stream, ptr, port.samples_num * port.sample_size * port.channels_num); // TODO find a correct value 8192 is estimated - while (SDL_GetAudioStreamAvailable(port.stream) > 8192) { + while (SDL_GetAudioStreamAvailable(port.stream) > 65536) { SDL_Delay(0); } @@ -109,7 +113,7 @@ s32 SDLAudio::AudioOutOutput(s32 handle, const void* ptr) { bool SDLAudio::AudioOutSetVolume(s32 handle, s32 bitflag, s32* volume) { using Libraries::AudioOut::OrbisAudioOutParamFormat; - std::scoped_lock lock{m_mutex}; + std::shared_lock lock{m_mutex}; auto& port = portsOut[handle - 1]; if (!port.isOpen) { return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; @@ -147,7 +151,7 @@ bool SDLAudio::AudioOutSetVolume(s32 handle, s32 bitflag, s32* volume) { } bool SDLAudio::AudioOutGetStatus(s32 handle, int* type, int* channels_num) { - std::scoped_lock lock{m_mutex}; + std::shared_lock lock{m_mutex}; auto& port = portsOut[handle - 1]; *type = port.type; *channels_num = port.channels_num; diff --git a/src/audio_core/sdl_audio.h b/src/audio_core/sdl_audio.h index d20c44559..7844bd61b 100644 --- a/src/audio_core/sdl_audio.h +++ b/src/audio_core/sdl_audio.h @@ -3,7 +3,7 @@ #pragma once -#include +#include #include #include "core/libraries/audio/audioout.h" @@ -32,7 +32,7 @@ private: int volume[8] = {}; SDL_AudioStream* stream = nullptr; }; - std::mutex m_mutex; + std::shared_mutex m_mutex; std::array portsOut; // main up to 8 ports , BGM 1 port , voice up to 4 ports , // personal up to 4 ports , padspk up to 5 ports , aux 1 port }; diff --git a/src/common/config.cpp b/src/common/config.cpp index a577b143a..24db6b039 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -15,15 +15,20 @@ static u32 screenWidth = 1280; static u32 screenHeight = 720; static s32 gpuId = -1; // Vulkan physical device index. Set to negative for auto select static std::string logFilter; -static std::string logType = "sync"; +static std::string logType = "async"; +static std::string userName = "shadPS4"; static bool isDebugDump = false; static bool isLibc = true; static bool isShowSplash = false; static bool isNullGpu = false; static bool shouldDumpShaders = false; static bool shouldDumpPM4 = false; +static u32 vblankDivider = 1; static bool vkValidation = false; static bool vkValidationSync = false; +static bool vkValidationGpu = false; +static bool rdocEnable = false; +static bool rdocMarkersEnable = false; // Gui std::string settings_install_dir = ""; u32 main_window_geometry_x = 400; @@ -41,6 +46,8 @@ u32 m_window_size_H = 720; std::vector m_pkg_viewer; std::vector m_elf_viewer; std::vector m_recent_files; +// Settings +u32 m_language = 1; // english bool isLleLibc() { return isLibc; @@ -74,6 +81,10 @@ std::string getLogType() { return logType; } +std::string getUserName() { + return userName; +} + bool debugDump() { return isDebugDump; } @@ -94,6 +105,18 @@ bool dumpPM4() { return shouldDumpPM4; } +bool isRdocEnabled() { + return rdocEnable; +} + +bool isMarkersEnabled() { + return rdocMarkersEnable; +} + +u32 vblankDiv() { + return vblankDivider; +} + bool vkValidationEnabled() { return vkValidation; } @@ -102,6 +125,82 @@ bool vkValidationSyncEnabled() { return vkValidationSync; } +bool vkValidationGpuEnabled() { + return vkValidationGpu; +} + +void setGpuId(s32 selectedGpuId) { + gpuId = selectedGpuId; +} + +void setScreenWidth(u32 width) { + screenWidth = width; +} + +void setScreenHeight(u32 height) { + screenHeight = height; +} + +void setDebugDump(bool enable) { + isDebugDump = enable; +} + +void setShowSplash(bool enable) { + isShowSplash = enable; +} + +void setNullGpu(bool enable) { + isNullGpu = enable; +} + +void setDumpShaders(bool enable) { + shouldDumpShaders = enable; +} + +void setDumpPM4(bool enable) { + shouldDumpPM4 = enable; +} + +void setVkValidation(bool enable) { + vkValidation = enable; +} + +void setVkSyncValidation(bool enable) { + vkValidationSync = enable; +} + +void setRdocEnabled(bool enable) { + rdocEnable = enable; +} + +void setVblankDiv(u32 value) { + vblankDivider = value; +} + +void setFullscreenMode(bool enable) { + isFullscreen = enable; +} + +void setLanguage(u32 language) { + m_language = language; +} + +void setNeoMode(bool enable) { + isNeo = enable; +} + +void setLogType(std::string type) { + logType = type; +} + +void setLogFilter(std::string type) { + logFilter = type; +} + +void setUserName(std::string type) { + userName = type; +} + void setMainWindowGeometry(u32 x, u32 y, u32 w, u32 h) { main_window_geometry_x = x; main_window_geometry_y = y; @@ -197,6 +296,9 @@ std::vector getRecentFiles() { return m_recent_files; } +u32 GetLanguage() { + return m_language; +} void load(const std::filesystem::path& path) { // If the configuration file does not exist, create it and return std::error_code error; @@ -213,88 +315,85 @@ void load(const std::filesystem::path& path) { fmt::print("Got exception trying to load config file. Exception: {}\n", ex.what()); return; } - if (data.contains("General")) { - auto generalResult = toml::expect(data.at("General")); - if (generalResult.is_ok()) { - auto general = generalResult.unwrap(); + const toml::value& general = data.at("General"); - isNeo = toml::find_or(general, "isPS4Pro", false); - isFullscreen = toml::find_or(general, "Fullscreen", true); - logFilter = toml::find_or(general, "logFilter", ""); - logType = toml::find_or(general, "logType", "sync"); - isShowSplash = toml::find_or(general, "showSplash", true); - } + isNeo = toml::find_or(general, "isPS4Pro", false); + isFullscreen = toml::find_or(general, "Fullscreen", false); + logFilter = toml::find_or(general, "logFilter", ""); + logType = toml::find_or(general, "logType", "sync"); + userName = toml::find_or(general, "userName", "shadPS4"); + isShowSplash = toml::find_or(general, "showSplash", true); } + if (data.contains("GPU")) { - auto gpuResult = toml::expect(data.at("GPU")); - if (gpuResult.is_ok()) { - auto gpu = gpuResult.unwrap(); + const toml::value& gpu = data.at("GPU"); - screenWidth = toml::find_or(gpu, "screenWidth", screenWidth); - screenHeight = toml::find_or(gpu, "screenHeight", screenHeight); - gpuId = toml::find_or(gpu, "gpuId", 0); - isNullGpu = toml::find_or(gpu, "nullGpu", false); - shouldDumpShaders = toml::find_or(gpu, "dumpShaders", false); - shouldDumpPM4 = toml::find_or(gpu, "dumpPM4", false); - } + screenWidth = toml::find_or(gpu, "screenWidth", screenWidth); + screenHeight = toml::find_or(gpu, "screenHeight", screenHeight); + isNullGpu = toml::find_or(gpu, "nullGpu", false); + shouldDumpShaders = toml::find_or(gpu, "dumpShaders", false); + shouldDumpPM4 = toml::find_or(gpu, "dumpPM4", false); + vblankDivider = toml::find_or(gpu, "vblankDivider", 1); } + if (data.contains("Vulkan")) { - const auto vkResult = toml::expect(data.at("Vulkan")); - if (vkResult.is_ok()) { - auto vk = vkResult.unwrap(); + const toml::value& vk = data.at("Vulkan"); - vkValidation = toml::find_or(vk, "validation", true); - vkValidationSync = toml::find_or(vk, "validation_sync", true); - } + gpuId = toml::find_or(vk, "gpuId", -1); + vkValidation = toml::find_or(vk, "validation", false); + vkValidationSync = toml::find_or(vk, "validation_sync", false); + vkValidationGpu = toml::find_or(vk, "validation_gpu", true); + rdocEnable = toml::find_or(vk, "rdocEnable", false); + rdocMarkersEnable = toml::find_or(vk, "rdocMarkersEnable", false); } + if (data.contains("Debug")) { - auto debugResult = toml::expect(data.at("Debug")); - if (debugResult.is_ok()) { - auto debug = debugResult.unwrap(); + const toml::value& debug = data.at("Debug"); - isDebugDump = toml::find_or(debug, "DebugDump", false); - } + isDebugDump = toml::find_or(debug, "DebugDump", false); } + if (data.contains("LLE")) { - auto lleResult = toml::expect(data.at("LLE")); - if (lleResult.is_ok()) { - auto lle = lleResult.unwrap(); + const toml::value& lle = data.at("LLE"); - isLibc = toml::find_or(lle, "libc", true); - } + isLibc = toml::find_or(lle, "libc", true); } - if (data.contains("GUI")) { - auto guiResult = toml::expect(data.at("GUI")); - if (guiResult.is_ok()) { - auto gui = guiResult.unwrap(); - m_icon_size = toml::find_or(gui, "iconSize", 0); - m_icon_size_grid = toml::find_or(gui, "iconSizeGrid", 0); - m_slider_pos = toml::find_or(gui, "sliderPos", 0); - m_slider_pos_grid = toml::find_or(gui, "sliderPosGrid", 0); - mw_themes = toml::find_or(gui, "theme", 0); - m_window_size_W = toml::find_or(gui, "mw_width", 0); - m_window_size_H = toml::find_or(gui, "mw_height", 0); - settings_install_dir = toml::find_or(gui, "installDir", ""); - main_window_geometry_x = toml::find_or(gui, "geometry_x", 0); - main_window_geometry_y = toml::find_or(gui, "geometry_y", 0); - main_window_geometry_w = toml::find_or(gui, "geometry_w", 0); - main_window_geometry_h = toml::find_or(gui, "geometry_h", 0); - m_pkg_viewer = toml::find_or>(gui, "pkgDirs", {}); - m_elf_viewer = toml::find_or>(gui, "elfDirs", {}); - m_recent_files = toml::find_or>(gui, "recentFiles", {}); - m_table_mode = toml::find_or(gui, "gameTableMode", 0); - } + if (data.contains("GUI")) { + const toml::value& gui = data.at("GUI"); + + m_icon_size = toml::find_or(gui, "iconSize", 0); + m_icon_size_grid = toml::find_or(gui, "iconSizeGrid", 0); + m_slider_pos = toml::find_or(gui, "sliderPos", 0); + m_slider_pos_grid = toml::find_or(gui, "sliderPosGrid", 0); + mw_themes = toml::find_or(gui, "theme", 0); + m_window_size_W = toml::find_or(gui, "mw_width", 0); + m_window_size_H = toml::find_or(gui, "mw_height", 0); + settings_install_dir = toml::find_or(gui, "installDir", ""); + main_window_geometry_x = toml::find_or(gui, "geometry_x", 0); + main_window_geometry_y = toml::find_or(gui, "geometry_y", 0); + main_window_geometry_w = toml::find_or(gui, "geometry_w", 0); + main_window_geometry_h = toml::find_or(gui, "geometry_h", 0); + m_pkg_viewer = toml::find_or>(gui, "pkgDirs", {}); + m_elf_viewer = toml::find_or>(gui, "elfDirs", {}); + m_recent_files = toml::find_or>(gui, "recentFiles", {}); + m_table_mode = toml::find_or(gui, "gameTableMode", 0); + } + + if (data.contains("Settings")) { + const toml::value& settings = data.at("Settings"); + + m_language = toml::find_or(settings, "consoleLanguage", 1); } } void save(const std::filesystem::path& path) { - toml::basic_value data; + toml::value data; std::error_code error; if (std::filesystem::exists(path, error)) { try { - data = toml::parse(path); + data = toml::parse(path); } catch (const std::exception& ex) { fmt::print("Exception trying to parse config file. Exception: {}\n", ex.what()); return; @@ -311,15 +410,20 @@ void save(const std::filesystem::path& path) { data["General"]["Fullscreen"] = isFullscreen; data["General"]["logFilter"] = logFilter; data["General"]["logType"] = logType; + data["General"]["userName"] = userName; data["General"]["showSplash"] = isShowSplash; - data["GPU"]["gpuId"] = gpuId; data["GPU"]["screenWidth"] = screenWidth; data["GPU"]["screenHeight"] = screenHeight; data["GPU"]["nullGpu"] = isNullGpu; data["GPU"]["dumpShaders"] = shouldDumpShaders; data["GPU"]["dumpPM4"] = shouldDumpPM4; + data["GPU"]["vblankDivider"] = vblankDivider; + data["Vulkan"]["gpuId"] = gpuId; data["Vulkan"]["validation"] = vkValidation; data["Vulkan"]["validation_sync"] = vkValidationSync; + data["Vulkan"]["validation_gpu"] = vkValidationGpu; + data["Vulkan"]["rdocEnable"] = rdocEnable; + data["Vulkan"]["rdocMarkersEnable"] = rdocMarkersEnable; data["Debug"]["DebugDump"] = isDebugDump; data["LLE"]["libc"] = isLibc; data["GUI"]["theme"] = mw_themes; @@ -339,8 +443,31 @@ void save(const std::filesystem::path& path) { data["GUI"]["elfDirs"] = m_elf_viewer; data["GUI"]["recentFiles"] = m_recent_files; + data["Settings"]["consoleLanguage"] = m_language; + std::ofstream file(path, std::ios::out); file << data; file.close(); } + +void setDefaultValues() { + isNeo = false; + isFullscreen = false; + screenWidth = 1280; + screenHeight = 720; + logFilter = ""; + logType = "async"; + userName = "shadPS4"; + isDebugDump = false; + isShowSplash = false; + isNullGpu = false; + shouldDumpShaders = false; + shouldDumpPM4 = false; + vblankDivider = 1; + vkValidation = false; + rdocEnable = false; + m_language = 1; + gpuId = -1; +} + } // namespace Config diff --git a/src/common/config.h b/src/common/config.h index 0a3b4905e..3006f2e2a 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -15,6 +15,7 @@ bool isNeoMode(); bool isFullscreenMode(); std::string getLogFilter(); std::string getLogType(); +std::string getUserName(); u32 getScreenWidth(); u32 getScreenHeight(); @@ -26,9 +27,34 @@ bool showSplash(); bool nullGpu(); bool dumpShaders(); bool dumpPM4(); +bool isRdocEnabled(); +bool isMarkersEnabled(); +u32 vblankDiv(); + +void setDebugDump(bool enable); +void setShowSplash(bool enable); +void setNullGpu(bool enable); +void setDumpShaders(bool enable); +void setDumpPM4(bool enable); +void setVblankDiv(u32 value); +void setGpuId(s32 selectedGpuId); +void setScreenWidth(u32 width); +void setScreenHeight(u32 height); +void setFullscreenMode(bool enable); +void setLanguage(u32 language); +void setNeoMode(bool enable); +void setUserName(std::string type); + +void setLogType(std::string type); +void setLogFilter(std::string type); + +void setVkValidation(bool enable); +void setVkSyncValidation(bool enable); +void setRdocEnabled(bool enable); bool vkValidationEnabled(); bool vkValidationSyncEnabled(); +bool vkValidationGpuEnabled(); // Gui void setMainWindowGeometry(u32 x, u32 y, u32 w, u32 h); @@ -62,4 +88,8 @@ std::vector getPkgViewer(); std::vector getElfViewer(); std::vector getRecentFiles(); +void setDefaultValues(); + +// settings +u32 GetLanguage(); }; // namespace Config diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index 0b03c86b8..a21af8bba 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -207,8 +207,8 @@ public: message_queue.EmplaceWait(entry); } else { ForEachBackend([&entry](auto& backend) { backend.Write(entry); }); + std::fflush(stdout); } - std::fflush(stdout); } private: diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp index 32576abe1..2c4a20dea 100644 --- a/src/common/logging/filter.cpp +++ b/src/common/logging/filter.cpp @@ -106,6 +106,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) { SUB(Lib, DiscMap) \ SUB(Lib, Png) \ SUB(Lib, PlayGo) \ + SUB(Lib, Random) \ SUB(Lib, Usbd) \ SUB(Lib, Ajm) \ SUB(Lib, ErrorDialog) \ diff --git a/src/common/logging/types.h b/src/common/logging/types.h index 33e52fd4c..dccb838a5 100644 --- a/src/common/logging/types.h +++ b/src/common/logging/types.h @@ -73,6 +73,7 @@ enum class Class : u8 { Lib_DiscMap, ///< The LibSceDiscMap implementation. Lib_Png, ///< The LibScePng implementation. Lib_PlayGo, ///< The LibScePlayGo implementation. + Lib_Random, ///< The libSceRandom implementation. Lib_Usbd, ///< The LibSceUsbd implementation. Lib_Ajm, ///< The LibSceAjm implementation. Lib_ErrorDialog, ///< The LibSceErrorDialog implementation. diff --git a/src/shader_recompiler/object_pool.h b/src/common/object_pool.h similarity index 98% rename from src/shader_recompiler/object_pool.h rename to src/common/object_pool.h index 1398898ad..9e25e0c4c 100644 --- a/src/shader_recompiler/object_pool.h +++ b/src/common/object_pool.h @@ -8,7 +8,7 @@ #include #include -namespace Shader { +namespace Common { template requires std::is_destructible_v @@ -104,4 +104,4 @@ private: size_t new_chunk_size{}; }; -} // namespace Shader +} // namespace Common diff --git a/src/common/path_util.cpp b/src/common/path_util.cpp index 429fe2a5c..c1e8a5c0a 100644 --- a/src/common/path_util.cpp +++ b/src/common/path_util.cpp @@ -72,6 +72,8 @@ static auto UserPaths = [] { create_path(PathType::GameDataDir, user_dir / GAMEDATA_DIR); create_path(PathType::TempDataDir, user_dir / TEMPDATA_DIR); create_path(PathType::SysModuleDir, user_dir / SYSMODULES_DIR); + create_path(PathType::DownloadDir, user_dir / DOWNLOAD_DIR); + create_path(PathType::CapturesDir, user_dir / CAPTURES_DIR); return paths; }(); diff --git a/src/common/path_util.h b/src/common/path_util.h index 57a9a73fa..263edd46e 100644 --- a/src/common/path_util.h +++ b/src/common/path_util.h @@ -18,6 +18,8 @@ enum class PathType { TempDataDir, // Where game temp data is stored. GameDataDir, // Where game data is stored. SysModuleDir, // Where system modules are stored. + DownloadDir, // Where downloads/temp files are stored. + CapturesDir, // Where rdoc captures are stored. }; constexpr auto PORTABLE_DIR = "user"; @@ -31,6 +33,8 @@ constexpr auto SAVEDATA_DIR = "savedata"; constexpr auto GAMEDATA_DIR = "data"; constexpr auto TEMPDATA_DIR = "temp"; constexpr auto SYSMODULES_DIR = "sys_modules"; +constexpr auto DOWNLOAD_DIR = "download"; +constexpr auto CAPTURES_DIR = "captures"; // Filenames constexpr auto LOG_FILE = "shad_log.txt"; diff --git a/src/common/scm_rev.cpp.in b/src/common/scm_rev.cpp.in new file mode 100644 index 000000000..7f6fba9ed --- /dev/null +++ b/src/common/scm_rev.cpp.in @@ -0,0 +1,17 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/scm_rev.h" + +#define GIT_REV "@GIT_REV@" +#define GIT_BRANCH "@GIT_BRANCH@" +#define GIT_DESC "@GIT_DESC@" + +namespace Common { + +const char g_scm_rev[] = GIT_REV; +const char g_scm_branch[] = GIT_BRANCH; +const char g_scm_desc[] = GIT_DESC; + +} // namespace + diff --git a/src/common/scm_rev.h b/src/common/scm_rev.h new file mode 100644 index 000000000..877a01272 --- /dev/null +++ b/src/common/scm_rev.h @@ -0,0 +1,12 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +namespace Common { + +extern const char g_scm_rev[]; +extern const char g_scm_branch[]; +extern const char g_scm_desc[]; + +} // namespace Common diff --git a/src/common/unique_function.h b/src/common/unique_function.h new file mode 100755 index 000000000..1891ec3c6 --- /dev/null +++ b/src/common/unique_function.h @@ -0,0 +1,61 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +namespace Common { + +/// General purpose function wrapper similar to std::function. +/// Unlike std::function, the captured values don't have to be copyable. +/// This class can be moved but not copied. +template +class UniqueFunction { + class CallableBase { + public: + virtual ~CallableBase() = default; + virtual ResultType operator()(Args&&...) = 0; + }; + + template + class Callable final : public CallableBase { + public: + Callable(Functor&& functor_) : functor{std::move(functor_)} {} + ~Callable() override = default; + + ResultType operator()(Args&&... args) override { + return functor(std::forward(args)...); + } + + private: + Functor functor; + }; + +public: + UniqueFunction() = default; + + template + UniqueFunction(Functor&& functor) + : callable{std::make_unique>(std::move(functor))} {} + + UniqueFunction& operator=(UniqueFunction&& rhs) noexcept = default; + UniqueFunction(UniqueFunction&& rhs) noexcept = default; + + UniqueFunction& operator=(const UniqueFunction&) = delete; + UniqueFunction(const UniqueFunction&) = delete; + + ResultType operator()(Args&&... args) const { + return (*callable)(std::forward(args)...); + } + + explicit operator bool() const noexcept { + return static_cast(callable); + } + +private: + std::unique_ptr callable; +}; + +} // namespace Common diff --git a/src/common/version.h b/src/common/version.h index 8492c7d97..80de187b0 100644 --- a/src/common/version.h +++ b/src/common/version.h @@ -8,6 +8,7 @@ namespace Common { -constexpr char VERSION[] = "0.1.1 WIP"; +constexpr char VERSION[] = "0.2.1 WIP"; +constexpr bool isRelease = false; } // namespace Common diff --git a/src/core/address_space.cpp b/src/core/address_space.cpp index 1bc803a1f..91c8b7a1f 100644 --- a/src/core/address_space.cpp +++ b/src/core/address_space.cpp @@ -59,11 +59,11 @@ struct AddressSpace::Impl { static constexpr size_t MaxReductions = 10; size_t reduction = 0; + size_t virtual_size = SystemManagedSize + SystemReservedSize + UserSize; for (u32 i = 0; i < MaxReductions; i++) { - req.LowestStartingAddress = reinterpret_cast(SYSTEM_MANAGED_MIN + reduction); - virtual_base = static_cast(VirtualAlloc2( - process, NULL, SystemManagedSize + SystemReservedSize + UserSize - reduction, - MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, PAGE_NOACCESS, ¶m, 1)); + virtual_base = static_cast(VirtualAlloc2(process, NULL, virtual_size - reduction, + MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, + PAGE_NOACCESS, ¶m, 1)); if (virtual_base) { break; } @@ -92,9 +92,7 @@ struct AddressSpace::Impl { const uintptr_t system_managed_addr = reinterpret_cast(system_managed_base); const uintptr_t system_reserved_addr = reinterpret_cast(system_reserved_base); const uintptr_t user_addr = reinterpret_cast(user_base); - placeholders.insert({system_managed_addr, system_managed_addr + system_managed_size}); - placeholders.insert({system_reserved_addr, system_reserved_addr + system_reserved_size}); - placeholders.insert({user_addr, user_addr + user_size}); + placeholders.insert({system_managed_addr, virtual_size - reduction}); // Allocate backing file that represents the total physical memory. backing_handle = @@ -456,8 +454,28 @@ void* AddressSpace::MapFile(VAddr virtual_addr, size_t size, size_t offset, u32 #endif } -void AddressSpace::Unmap(VAddr virtual_addr, size_t size, bool has_backing) { - return impl->Unmap(virtual_addr, size, has_backing); +void AddressSpace::Unmap(VAddr virtual_addr, size_t size, VAddr start_in_vma, VAddr end_in_vma, + PAddr phys_base, bool is_exec, bool has_backing) { +#ifdef _WIN32 + // There does not appear to be comparable support for partial unmapping on Windows. + // Unfortunately, a least one title was found to require this. The workaround is to unmap + // the entire allocation and remap the portions outside of the requested unmapping range. + impl->Unmap(virtual_addr, size, has_backing); + + // TODO: Determine if any titles require partial unmapping support for flexible allocations. + ASSERT_MSG(has_backing || (start_in_vma == 0 && end_in_vma == size), + "Partial unmapping of flexible allocations is not supported"); + + if (start_in_vma != 0) { + Map(virtual_addr, start_in_vma, 0, phys_base, is_exec); + } + + if (end_in_vma != size) { + Map(virtual_addr + end_in_vma, size - end_in_vma, 0, phys_base + end_in_vma, is_exec); + } +#else + impl->Unmap(virtual_addr + start_in_vma, end_in_vma - start_in_vma, has_backing); +#endif } void AddressSpace::Protect(VAddr virtual_addr, size_t size, MemoryPermission perms) { diff --git a/src/core/address_space.h b/src/core/address_space.h index c181b3625..53041bccb 100644 --- a/src/core/address_space.h +++ b/src/core/address_space.h @@ -22,7 +22,7 @@ constexpr VAddr CODE_BASE_OFFSET = 0x100000000ULL; constexpr VAddr SYSTEM_MANAGED_MIN = 0x00000400000ULL; constexpr VAddr SYSTEM_MANAGED_MAX = 0x07FFFFBFFFULL; -constexpr VAddr SYSTEM_RESERVED_MIN = 0x800000000ULL; +constexpr VAddr SYSTEM_RESERVED_MIN = 0x07FFFFC000ULL; #ifdef __APPLE__ // Can only comfortably reserve the first 0x7C0000000 of system reserved space. constexpr VAddr SYSTEM_RESERVED_MAX = 0xFBFFFFFFFULL; @@ -34,10 +34,7 @@ constexpr VAddr USER_MAX = 0xFBFFFFFFFFULL; static constexpr size_t SystemManagedSize = SYSTEM_MANAGED_MAX - SYSTEM_MANAGED_MIN + 1; static constexpr size_t SystemReservedSize = SYSTEM_RESERVED_MAX - SYSTEM_RESERVED_MIN + 1; -// User area size is normally larger than this. However games are unlikely to map to high -// regions of that area, so by default we allocate a smaller virtual address space (about 1/4th). -// to save space on page tables. -static constexpr size_t UserSize = 1ULL << 39; +static constexpr size_t UserSize = 1ULL << 40; /** * Represents the user virtual address space backed by a dmem memory block @@ -94,7 +91,8 @@ public: void* MapFile(VAddr virtual_addr, size_t size, size_t offset, u32 prot, uintptr_t fd); /// Unmaps specified virtual memory area. - void Unmap(VAddr virtual_addr, size_t size, bool has_backing); + void Unmap(VAddr virtual_addr, size_t size, VAddr start_in_vma, VAddr end_in_vma, + PAddr phys_base, bool is_exec, bool has_backing); void Protect(VAddr virtual_addr, size_t size, MemoryPermission perms); diff --git a/src/core/cpu_patches.cpp b/src/core/cpu_patches.cpp index 2a9cf5e29..42318822b 100644 --- a/src/core/cpu_patches.cpp +++ b/src/core/cpu_patches.cpp @@ -285,20 +285,24 @@ static void GenerateTcbAccess(const ZydisDecodedOperand* operands, Xbyak::CodeGe const auto slot = GetTcbKey(); #if defined(_WIN32) - // The following logic is based on the wine implementation of TlsGetValue - // https://github.com/wine-mirror/wine/blob/a27b9551/dlls/kernelbase/thread.c#L719 + // The following logic is based on the Kernel32.dll asm of TlsGetValue static constexpr u32 TlsSlotsOffset = 0x1480; static constexpr u32 TlsExpansionSlotsOffset = 0x1780; static constexpr u32 TlsMinimumAvailable = 64; - const u32 teb_offset = slot < TlsMinimumAvailable ? TlsSlotsOffset : TlsExpansionSlotsOffset; - const u32 tls_index = slot < TlsMinimumAvailable ? slot : slot - TlsMinimumAvailable; - // Load the pointer to the table of TLS slots. c.putSeg(gs); - c.mov(dst, ptr[reinterpret_cast(teb_offset)]); - // Load the pointer to our buffer. - c.mov(dst, qword[dst + tls_index * sizeof(LPVOID)]); + if (slot < TlsMinimumAvailable) { + // Load the pointer to TLS slots. + c.mov(dst, ptr[reinterpret_cast(TlsSlotsOffset + slot * sizeof(LPVOID))]); + } else { + const u32 tls_index = slot - TlsMinimumAvailable; + + // Load the pointer to the table of TLS expansion slots. + c.mov(dst, ptr[reinterpret_cast(TlsExpansionSlotsOffset)]); + // Load the pointer to our buffer. + c.mov(dst, qword[dst + tls_index * sizeof(LPVOID)]); + } #elif defined(__APPLE__) // The following logic is based on the Darwin implementation of _os_tsd_get_direct, used by // pthread_getspecific https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L89-L96 diff --git a/src/core/file_format/pkg.cpp b/src/core/file_format/pkg.cpp index 6d5fb0d4a..336d81019 100644 --- a/src/core/file_format/pkg.cpp +++ b/src/core/file_format/pkg.cpp @@ -350,7 +350,7 @@ bool PKG::Extract(const std::filesystem::path& filepath, const std::filesystem:: return true; } -void PKG::ExtractFiles(const int& index) { +void PKG::ExtractFiles(const int index) { int inode_number = fsTable[index].inode; int inode_type = fsTable[index].type; std::string inode_name = fsTable[index].name; diff --git a/src/core/file_format/pkg.h b/src/core/file_format/pkg.h index 3fef6c1c4..b6b09a191 100644 --- a/src/core/file_format/pkg.h +++ b/src/core/file_format/pkg.h @@ -104,7 +104,7 @@ public: ~PKG(); bool Open(const std::filesystem::path& filepath); - void ExtractFiles(const int& index); + void ExtractFiles(const int index); bool Extract(const std::filesystem::path& filepath, const std::filesystem::path& extract, std::string& failreason); diff --git a/src/core/file_format/playgo_chunk.cpp b/src/core/file_format/playgo_chunk.cpp new file mode 100644 index 000000000..43d8a4ded --- /dev/null +++ b/src/core/file_format/playgo_chunk.cpp @@ -0,0 +1,16 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/io_file.h" + +#include "playgo_chunk.h" + +bool PlaygoChunk::Open(const std::filesystem::path& filepath) { + Common::FS::IOFile file(filepath, Common::FS::FileAccessMode::Read); + if (!file.IsOpen()) { + return false; + } + file.Read(playgoHeader); + + return true; +} \ No newline at end of file diff --git a/src/core/file_format/playgo_chunk.h b/src/core/file_format/playgo_chunk.h new file mode 100644 index 000000000..d17d24bf9 --- /dev/null +++ b/src/core/file_format/playgo_chunk.h @@ -0,0 +1,31 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once +#include +#include "common/types.h" + +struct PlaygoHeader { + u32 magic; + + u16 version_major; + u16 version_minor; + u16 image_count; + u16 chunk_count; + u16 mchunk_count; + u16 scenario_count; + // TODO fill the rest +}; +class PlaygoChunk { +public: + PlaygoChunk() = default; + ~PlaygoChunk() = default; + + bool Open(const std::filesystem::path& filepath); + PlaygoHeader GetPlaygoHeader() { + return playgoHeader; + } + +private: + PlaygoHeader playgoHeader; +}; \ No newline at end of file diff --git a/src/core/file_format/trp.cpp b/src/core/file_format/trp.cpp index b4d4c95e2..f122709e4 100644 --- a/src/core/file_format/trp.cpp +++ b/src/core/file_format/trp.cpp @@ -6,7 +6,7 @@ TRP::TRP() = default; TRP::~TRP() = default; -void TRP::GetNPcommID(std::filesystem::path trophyPath, int index) { +void TRP::GetNPcommID(const std::filesystem::path& trophyPath, int index) { std::filesystem::path trpPath = trophyPath / "sce_sys/npbind.dat"; Common::FS::IOFile npbindFile(trpPath, Common::FS::FileAccessMode::Read); if (!npbindFile.IsOpen()) { @@ -27,7 +27,7 @@ static void removePadding(std::vector& vec) { } } -bool TRP::Extract(std::filesystem::path trophyPath) { +bool TRP::Extract(const std::filesystem::path& trophyPath) { std::string title = trophyPath.filename().string(); std::filesystem::path gameSysDir = trophyPath / "sce_sys/trophy/"; if (!std::filesystem::exists(gameSysDir)) { diff --git a/src/core/file_format/trp.h b/src/core/file_format/trp.h index 6d1f13bd9..56f490026 100644 --- a/src/core/file_format/trp.h +++ b/src/core/file_format/trp.h @@ -33,8 +33,8 @@ class TRP { public: TRP(); ~TRP(); - bool Extract(std::filesystem::path trophyPath); - void GetNPcommID(std::filesystem::path trophyPath, int index); + bool Extract(const std::filesystem::path& trophyPath); + void GetNPcommID(const std::filesystem::path& trophyPath, int index); private: Crypto crypto; diff --git a/src/core/file_sys/fs.cpp b/src/core/file_sys/fs.cpp index 2f57c9f34..40d8212bb 100644 --- a/src/core/file_sys/fs.cpp +++ b/src/core/file_sys/fs.cpp @@ -25,24 +25,28 @@ void MntPoints::UnmountAll() { m_mnt_pairs.clear(); } -std::filesystem::path MntPoints::GetHostPath(const std::string& guest_directory) { - const MntPair* mount = GetMount(guest_directory); +std::filesystem::path MntPoints::GetHostPath(std::string_view guest_directory) { + // Evil games like Turok2 pass double slashes e.g /app0//game.kpf + std::string corrected_path(guest_directory); + size_t pos = corrected_path.find("//"); + while (pos != std::string::npos) { + corrected_path.replace(pos, 2, "/"); + pos = corrected_path.find("//", pos + 1); + } + + const MntPair* mount = GetMount(corrected_path); if (!mount) { - return guest_directory; + return ""; } // Nothing to do if getting the mount itself. - if (guest_directory == mount->mount) { + if (corrected_path == mount->mount) { return mount->host_path; } // Remove device (e.g /app0) from path to retrieve relative path. - u32 pos = mount->mount.size() + 1; - // Evil games like Turok2 pass double slashes e.g /app0//game.kpf - if (guest_directory[pos] == '/') { - pos++; - } - const auto rel_path = std::string_view(guest_directory).substr(pos); + pos = mount->mount.size() + 1; + const auto rel_path = std::string_view(corrected_path).substr(pos); const auto host_path = mount->host_path / rel_path; if (!NeedsCaseInsensiveSearch) { return host_path; @@ -50,6 +54,7 @@ std::filesystem::path MntPoints::GetHostPath(const std::string& guest_directory) // If the path does not exist attempt to verify this. // Retrieve parent path until we find one that exists. + std::scoped_lock lk{m_mutex}; path_parts.clear(); auto current_path = host_path; while (!std::filesystem::exists(current_path)) { @@ -66,7 +71,7 @@ std::filesystem::path MntPoints::GetHostPath(const std::string& guest_directory) // exist in filesystem but in different case. auto guest_path = current_path; while (!path_parts.empty()) { - const auto& part = path_parts.back(); + const auto part = path_parts.back(); const auto add_match = [&](const auto& host_part) { current_path /= host_part; guest_path /= part; diff --git a/src/core/file_sys/fs.h b/src/core/file_sys/fs.h index d636f8bff..b0fb63242 100644 --- a/src/core/file_sys/fs.h +++ b/src/core/file_sys/fs.h @@ -31,7 +31,7 @@ public: void Unmount(const std::filesystem::path& host_folder, const std::string& guest_folder); void UnmountAll(); - std::filesystem::path GetHostPath(const std::string& guest_directory); + std::filesystem::path GetHostPath(std::string_view guest_directory); const MntPair* GetMount(const std::string& guest_path) { const auto it = std::ranges::find_if( diff --git a/src/core/libraries/app_content/app_content.cpp b/src/core/libraries/app_content/app_content.cpp index 7e9cf7a21..882f99e49 100644 --- a/src/core/libraries/app_content/app_content.cpp +++ b/src/core/libraries/app_content/app_content.cpp @@ -198,13 +198,9 @@ int PS4_SYSV_ABI sceAppContentTemporaryDataMount() { int PS4_SYSV_ABI sceAppContentTemporaryDataMount2(OrbisAppContentTemporaryDataOption option, OrbisAppContentMountPoint* mountPoint) { - if (std::string_view(mountPoint->data).empty()) // causing issues with save_data. + if (mountPoint == nullptr) return ORBIS_APP_CONTENT_ERROR_PARAMETER; - auto* param_sfo = Common::Singleton::Instance(); - std::string id(param_sfo->GetString("CONTENT_ID"), 7, 9); - const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::TempDataDir) / id; - auto* mnt = Common::Singleton::Instance(); - mnt->Mount(mount_dir, mountPoint->data); + strncpy(mountPoint->data, "/temp0", 16); LOG_INFO(Lib_AppContent, "sceAppContentTemporaryDataMount2: option = {}, mountPoint = {}", option, mountPoint->data); return ORBIS_OK; diff --git a/src/core/libraries/audio/audioout.cpp b/src/core/libraries/audio/audioout.cpp index cc7ce342a..cb676afc1 100644 --- a/src/core/libraries/audio/audioout.cpp +++ b/src/core/libraries/audio/audioout.cpp @@ -175,7 +175,6 @@ int PS4_SYSV_ABI sceAudioOutGetLastOutputTime() { } int PS4_SYSV_ABI sceAudioOutGetPortState(s32 handle, OrbisAudioOutPortState* state) { - int type = 0; int channels_num = 0; @@ -235,8 +234,11 @@ int PS4_SYSV_ABI sceAudioOutGetSystemState() { } int PS4_SYSV_ABI sceAudioOutInit() { + LOG_TRACE(Lib_AudioOut, "called"); + if (audio != nullptr) { + return ORBIS_AUDIO_OUT_ERROR_ALREADY_INIT; + } audio = std::make_unique(); - LOG_INFO(Lib_AudioOut, "called"); return ORBIS_OK; } diff --git a/src/core/libraries/avplayer/avplayer.cpp b/src/core/libraries/avplayer/avplayer.cpp index dd9f42b26..bd1f6b503 100644 --- a/src/core/libraries/avplayer/avplayer.cpp +++ b/src/core/libraries/avplayer/avplayer.cpp @@ -1,21 +1,34 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -// Generated By moduleGenerator #include "avplayer.h" + +#include "avplayer_impl.h" #include "common/logging/log.h" #include "core/libraries/error_codes.h" +#include "core/libraries/kernel/thread_management.h" #include "core/libraries/libs.h" namespace Libraries::AvPlayer { -int PS4_SYSV_ABI sceAvPlayerAddSource() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; +using namespace Kernel; + +s32 PS4_SYSV_ABI sceAvPlayerAddSource(SceAvPlayerHandle handle, const char* filename) { + LOG_TRACE(Lib_AvPlayer, "filename = {}", filename); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->AddSource(filename); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; } -int PS4_SYSV_ABI sceAvPlayerAddSourceEx() { +s32 PS4_SYSV_ABI sceAvPlayerAddSourceEx(SceAvPlayerHandle handle, SceAvPlayerUriType uriType, + SceAvPlayerSourceDetails* sourceDetails) { LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } return ORBIS_OK; } @@ -24,122 +37,244 @@ int PS4_SYSV_ABI sceAvPlayerChangeStream() { return ORBIS_OK; } -int PS4_SYSV_ABI sceAvPlayerClose() { +s32 PS4_SYSV_ABI sceAvPlayerClose(SceAvPlayerHandle handle) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + delete handle; + return ORBIS_OK; +} + +u64 PS4_SYSV_ABI sceAvPlayerCurrentTime(SceAvPlayerHandle handle) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->CurrentTime(); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; +} + +s32 PS4_SYSV_ABI sceAvPlayerDisableStream(SceAvPlayerHandle handle, u32 stream_id) { + LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceAvPlayerEnableStream(SceAvPlayerHandle handle, u32 stream_id) { + LOG_TRACE(Lib_AvPlayer, "stream_id = {}", stream_id); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->EnableStream(stream_id); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; +} + +bool PS4_SYSV_ABI sceAvPlayerGetAudioData(SceAvPlayerHandle handle, SceAvPlayerFrameInfo* p_info) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (handle == nullptr || p_info == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->GetAudioData(*p_info); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; +} + +s32 PS4_SYSV_ABI sceAvPlayerGetStreamInfo(SceAvPlayerHandle handle, u32 stream_id, + SceAvPlayerStreamInfo* p_info) { + LOG_TRACE(Lib_AvPlayer, "stream_id = {}", stream_id); + if (handle == nullptr || p_info == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->GetStreamInfo(stream_id, *p_info); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; +} + +bool PS4_SYSV_ABI sceAvPlayerGetVideoData(SceAvPlayerHandle handle, + SceAvPlayerFrameInfo* video_info) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (handle == nullptr || video_info == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->GetVideoData(*video_info); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; +} + +bool PS4_SYSV_ABI sceAvPlayerGetVideoDataEx(SceAvPlayerHandle handle, + SceAvPlayerFrameInfoEx* video_info) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (handle == nullptr || video_info == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->GetVideoData(*video_info); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; +} + +SceAvPlayerHandle PS4_SYSV_ABI sceAvPlayerInit(SceAvPlayerInitData* data) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (data == nullptr) { + return nullptr; + } + + if (data->memory_replacement.allocate == nullptr || + data->memory_replacement.allocate_texture == nullptr || + data->memory_replacement.deallocate == nullptr || + data->memory_replacement.deallocate_texture == nullptr) { + LOG_ERROR(Lib_AvPlayer, "All allocators are required for AVPlayer Initialisation."); + return nullptr; + } + + return new AvPlayer(*data); +} + +s32 PS4_SYSV_ABI sceAvPlayerInitEx(const SceAvPlayerInitDataEx* p_data, + SceAvPlayerHandle* p_player) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (p_data == nullptr || p_player == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + + if (p_data->memory_replacement.allocate == nullptr || + p_data->memory_replacement.allocate_texture == nullptr || + p_data->memory_replacement.deallocate == nullptr || + p_data->memory_replacement.deallocate_texture == nullptr) { + LOG_ERROR(Lib_AvPlayer, "All allocators are required for AVPlayer Initialisation."); + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + + SceAvPlayerInitData data = {}; + data.memory_replacement = p_data->memory_replacement; + data.file_replacement = p_data->file_replacement; + data.event_replacement = p_data->event_replacement; + data.default_language = p_data->default_language; + data.num_output_video_framebuffers = p_data->num_output_video_framebuffers; + data.auto_start = p_data->auto_start; + + *p_player = new AvPlayer(data); + return ORBIS_OK; +} + +bool PS4_SYSV_ABI sceAvPlayerIsActive(SceAvPlayerHandle handle) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (handle == nullptr) { + LOG_TRACE(Lib_AvPlayer, "returning ORBIS_AVPLAYER_ERROR_INVALID_PARAMS"); + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->IsActive(); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; +} + +s32 PS4_SYSV_ABI sceAvPlayerJumpToTime(SceAvPlayerHandle handle, uint64_t time) { + LOG_ERROR(Lib_AvPlayer, "(STUBBED) called, time (msec) = {}", time); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceAvPlayerPause(SceAvPlayerHandle handle) { + LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceAvPlayerPostInit(SceAvPlayerHandle handle, SceAvPlayerPostInitData* data) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (handle == nullptr || data == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->PostInit(*data); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; +} + +s32 PS4_SYSV_ABI sceAvPlayerPrintf(const char* format, ...) { LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); return ORBIS_OK; } -int PS4_SYSV_ABI sceAvPlayerCurrentTime() { +s32 PS4_SYSV_ABI sceAvPlayerResume(SceAvPlayerHandle handle) { + LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceAvPlayerSetAvSyncMode(SceAvPlayerHandle handle, + SceAvPlayerAvSyncMode sync_mode) { + LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceAvPlayerSetLogCallback(SceAvPlayerLogCallback log_cb, void* user_data) { LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); return ORBIS_OK; } -int PS4_SYSV_ABI sceAvPlayerDisableStream() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); +s32 PS4_SYSV_ABI sceAvPlayerSetLooping(SceAvPlayerHandle handle, bool loop_flag) { + LOG_TRACE(Lib_AvPlayer, "called, looping = {}", loop_flag); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + if (!handle->SetLooping(loop_flag)) { + return ORBIS_AVPLAYER_ERROR_OPERATION_FAILED; + } return ORBIS_OK; } -int PS4_SYSV_ABI sceAvPlayerEnableStream() { +s32 PS4_SYSV_ABI sceAvPlayerSetTrickSpeed(SceAvPlayerHandle handle, s32 trick_speed) { LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } return ORBIS_OK; } -int PS4_SYSV_ABI sceAvPlayerGetAudioData() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; +s32 PS4_SYSV_ABI sceAvPlayerStart(SceAvPlayerHandle handle) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->Start(); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; } -int PS4_SYSV_ABI sceAvPlayerGetStreamInfo() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; +s32 PS4_SYSV_ABI sceAvPlayerStop(SceAvPlayerHandle handle) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->Stop(); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; } -int PS4_SYSV_ABI sceAvPlayerGetVideoData() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; +s32 PS4_SYSV_ABI sceAvPlayerStreamCount(SceAvPlayerHandle handle) { + LOG_TRACE(Lib_AvPlayer, "called"); + if (handle == nullptr) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + const auto res = handle->GetStreamCount(); + LOG_TRACE(Lib_AvPlayer, "returning {}", res); + return res; } -int PS4_SYSV_ABI sceAvPlayerGetVideoDataEx() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerInit() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerInitEx() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerIsActive() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerJumpToTime() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerPause() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerPostInit() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerPrintf() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerResume() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerSetAvSyncMode() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerSetLogCallback() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerSetLooping() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerSetTrickSpeed() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerStart() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerStop() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerStreamCount() { - LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); - return ORBIS_OK; -} - -int PS4_SYSV_ABI sceAvPlayerVprintf() { +s32 PS4_SYSV_ABI sceAvPlayerVprintf(const char* format, va_list args) { LOG_ERROR(Lib_AvPlayer, "(STUBBED) called"); return ORBIS_OK; } diff --git a/src/core/libraries/avplayer/avplayer.h b/src/core/libraries/avplayer/avplayer.h index 39a619ee1..360f06b65 100644 --- a/src/core/libraries/avplayer/avplayer.h +++ b/src/core/libraries/avplayer/avplayer.h @@ -5,39 +5,288 @@ #include "common/types.h" +#include // va_list +#include // size_t + namespace Core::Loader { class SymbolsResolver; } namespace Libraries::AvPlayer { -int PS4_SYSV_ABI sceAvPlayerAddSource(); -int PS4_SYSV_ABI sceAvPlayerAddSourceEx(); -int PS4_SYSV_ABI sceAvPlayerChangeStream(); -int PS4_SYSV_ABI sceAvPlayerClose(); -int PS4_SYSV_ABI sceAvPlayerCurrentTime(); -int PS4_SYSV_ABI sceAvPlayerDisableStream(); -int PS4_SYSV_ABI sceAvPlayerEnableStream(); -int PS4_SYSV_ABI sceAvPlayerGetAudioData(); -int PS4_SYSV_ABI sceAvPlayerGetStreamInfo(); -int PS4_SYSV_ABI sceAvPlayerGetVideoData(); -int PS4_SYSV_ABI sceAvPlayerGetVideoDataEx(); -int PS4_SYSV_ABI sceAvPlayerInit(); -int PS4_SYSV_ABI sceAvPlayerInitEx(); -int PS4_SYSV_ABI sceAvPlayerIsActive(); -int PS4_SYSV_ABI sceAvPlayerJumpToTime(); -int PS4_SYSV_ABI sceAvPlayerPause(); -int PS4_SYSV_ABI sceAvPlayerPostInit(); -int PS4_SYSV_ABI sceAvPlayerPrintf(); -int PS4_SYSV_ABI sceAvPlayerResume(); -int PS4_SYSV_ABI sceAvPlayerSetAvSyncMode(); -int PS4_SYSV_ABI sceAvPlayerSetLogCallback(); -int PS4_SYSV_ABI sceAvPlayerSetLooping(); -int PS4_SYSV_ABI sceAvPlayerSetTrickSpeed(); -int PS4_SYSV_ABI sceAvPlayerStart(); -int PS4_SYSV_ABI sceAvPlayerStop(); -int PS4_SYSV_ABI sceAvPlayerStreamCount(); -int PS4_SYSV_ABI sceAvPlayerVprintf(); +class AvPlayer; + +using SceAvPlayerHandle = AvPlayer*; + +enum SceAvPlayerUriType { SCE_AVPLAYER_URI_TYPE_SOURCE = 0 }; + +struct SceAvPlayerUri { + const char* name; + u32 length; +}; + +enum SceAvPlayerSourceType { + SCE_AVPLAYER_SOURCE_TYPE_UNKNOWN = 0, + SCE_AVPLAYER_SOURCE_TYPE_FILE_MP4 = 1, + SCE_AVPLAYER_SOURCE_TYPE_HLS = 8 +}; + +struct SceAvPlayerSourceDetails { + SceAvPlayerUri uri; + u8 reserved1[64]; + SceAvPlayerSourceType source_type; + u8 reserved2[44]; +}; + +struct SceAvPlayerAudio { + u16 channel_count; + u8 reserved1[2]; + u32 sample_rate; + u32 size; + u8 language_code[4]; +}; + +struct SceAvPlayerVideo { + u32 width; + u32 height; + f32 aspect_ratio; + u8 language_code[4]; +}; + +struct SceAvPlayerTextPosition { + u16 top; + u16 left; + u16 bottom; + u16 right; +}; + +struct SceAvPlayerTimedText { + u8 language_code[4]; + u16 text_size; + u16 font_size; + SceAvPlayerTextPosition position; +}; + +union SceAvPlayerStreamDetails { + u8 reserved[16]; + SceAvPlayerAudio audio; + SceAvPlayerVideo video; + SceAvPlayerTimedText subs; +}; + +struct SceAvPlayerFrameInfo { + u8* pData; + u8 reserved[4]; + u64 timestamp; + SceAvPlayerStreamDetails details; +}; + +struct SceAvPlayerStreamInfo { + u32 type; + u8 reserved[4]; + SceAvPlayerStreamDetails details; + u64 duration; + u64 start_time; +}; + +struct SceAvPlayerAudioEx { + u16 channel_count; + u8 reserved[2]; + u32 sample_rate; + u32 size; + u8 language_code[4]; + u8 reserved1[64]; +}; + +struct SceAvPlayerVideoEx { + u32 width; + u32 height; + f32 aspect_ratio; + u8 language_code[4]; + u32 framerate; + u32 crop_left_offset; + u32 crop_right_offset; + u32 crop_top_offset; + u32 crop_bottom_offset; + u32 pitch; + u8 luma_bit_depth; + u8 chroma_bit_depth; + bool video_full_range_flag; + u8 reserved1[37]; +}; + +struct SceAvPlayerTimedTextEx { + u8 language_code[4]; + u8 reserved[12]; + u8 reserved1[64]; +}; + +union SceAvPlayerStreamDetailsEx { + SceAvPlayerAudioEx audio; + SceAvPlayerVideoEx video; + SceAvPlayerTimedTextEx subs; + u8 reserved1[80]; +}; + +struct SceAvPlayerFrameInfoEx { + void* pData; + u8 reserved[4]; + u64 timestamp; + SceAvPlayerStreamDetailsEx details; +}; + +typedef void* PS4_SYSV_ABI (*SceAvPlayerAllocate)(void* p, u32 align, u32 size); +typedef void PS4_SYSV_ABI (*SceAvPlayerDeallocate)(void* p, void* mem); +typedef void* PS4_SYSV_ABI (*SceAvPlayerAllocateTexture)(void* p, u32 align, u32 size); +typedef void PS4_SYSV_ABI (*SceAvPlayerDeallocateTexture)(void* p, void* mem); + +struct SceAvPlayerMemAllocator { + void* object_ptr; + SceAvPlayerAllocate allocate; + SceAvPlayerDeallocate deallocate; + SceAvPlayerAllocateTexture allocate_texture; + SceAvPlayerDeallocateTexture deallocate_texture; +}; + +typedef s32 PS4_SYSV_ABI (*SceAvPlayerOpenFile)(void* p, const char* name); +typedef s32 PS4_SYSV_ABI (*SceAvPlayerCloseFile)(void* p); +typedef s32 PS4_SYSV_ABI (*SceAvPlayerReadOffsetFile)(void* p, u8* buf, u64 pos, u32 len); +typedef u64 PS4_SYSV_ABI (*SceAvPlayerSizeFile)(void* p); + +struct SceAvPlayerFileReplacement { + void* object_ptr; + SceAvPlayerOpenFile open; + SceAvPlayerCloseFile close; + SceAvPlayerReadOffsetFile readOffset; + SceAvPlayerSizeFile size; +}; + +typedef void PS4_SYSV_ABI (*SceAvPlayerEventCallback)(void* p, s32 event, s32 src_id, void* data); + +struct SceAvPlayerEventReplacement { + void* object_ptr; + SceAvPlayerEventCallback event_callback; +}; + +enum SceAvPlayerDebuglevels { + SCE_AVPLAYER_DBG_NONE, + SCE_AVPLAYER_DBG_INFO, + SCE_AVPLAYER_DBG_WARNINGS, + SCE_AVPLAYER_DBG_ALL +}; + +struct SceAvPlayerInitData { + SceAvPlayerMemAllocator memory_replacement; + SceAvPlayerFileReplacement file_replacement; + SceAvPlayerEventReplacement event_replacement; + SceAvPlayerDebuglevels debug_level; + u32 base_priority; + s32 num_output_video_framebuffers; + bool auto_start; + u8 reserved[3]; + const char* default_language; +}; + +struct SceAvPlayerInitDataEx { + size_t this_size; + SceAvPlayerMemAllocator memory_replacement; + SceAvPlayerFileReplacement file_replacement; + SceAvPlayerEventReplacement event_replacement; + const char* default_language; + SceAvPlayerDebuglevels debug_level; + u32 audio_decoder_priority; + u32 audio_decoder_affinity; + u32 video_decoder_priority; + u32 video_decoder_affinity; + u32 demuxer_priority; + u32 demuxer_affinity; + u32 controller_priority; + u32 controller_affinity; + u32 http_streaming_priority; + u32 http_streaming_affinity; + u32 file_streaming_priority; + u32 file_streaming_affinity; + s32 num_output_video_framebuffers; + bool auto_start; + u8 reserved[3]; +}; + +enum SceAvPlayerStreamType { + SCE_AVPLAYER_VIDEO, + SCE_AVPLAYER_AUDIO, + SCE_AVPLAYER_TIMEDTEXT, + SCE_AVPLAYER_UNKNOWN +}; + +enum SceAvPlayerVideoDecoderType { + SCE_AVPLAYER_VIDEO_DECODER_TYPE_DEFAULT = 0, + SCE_AVPLAYER_VIDEO_DECODER_TYPE_RESERVED1, + SCE_AVPLAYER_VIDEO_DECODER_TYPE_SOFTWARE, + SCE_AVPLAYER_VIDEO_DECODER_TYPE_SOFTWARE2 +}; + +enum SceAvPlayerAudioDecoderType { + SCE_AVPLAYER_AUDIO_DECODER_TYPE_DEFAULT = 0, + SCE_AVPLAYER_AUDIO_DECODER_TYPE_RESERVED1, + SCE_AVPLAYER_AUDIO_DECODER_TYPE_RESERVED2 +}; + +struct SceAvPlayerDecoderInit { + union { + SceAvPlayerVideoDecoderType video_type; + SceAvPlayerAudioDecoderType audio_type; + u8 reserved[4]; + } decoderType; + union { + struct { + s32 cpu_affinity_mask; + s32 cpu_thread_priority; + u8 decode_pipeline_depth; + u8 compute_pipe_id; + u8 compute_queue_id; + u8 enable_interlaced; + u8 reserved[16]; + } avcSw2; + struct { + u8 audio_channel_order; + u8 reserved[27]; + } aac; + u8 reserved[28]; + } decoderParams; +}; + +struct SceAvPlayerHTTPCtx { + u32 http_context_id; + u32 ssl_context_id; +}; + +struct SceAvPlayerPostInitData { + u32 demux_video_buffer_size; + SceAvPlayerDecoderInit video_decoder_init; + SceAvPlayerDecoderInit audio_decoder_init; + SceAvPlayerHTTPCtx http_context; + u8 reserved[56]; +}; + +enum SceAvPlayerAvSyncMode { + SCE_AVPLAYER_AV_SYNC_MODE_DEFAULT = 0, + SCE_AVPLAYER_AV_SYNC_MODE_NONE +}; + +typedef int PS4_SYSV_ABI (*SceAvPlayerLogCallback)(void* p, const char* format, va_list args); + +enum SceAvPlayerEvents { + SCE_AVPLAYER_STATE_STOP = 0x01, + SCE_AVPLAYER_STATE_READY = 0x02, + SCE_AVPLAYER_STATE_PLAY = 0x03, + SCE_AVPLAYER_STATE_PAUSE = 0x04, + SCE_AVPLAYER_STATE_BUFFERING = 0x05, + SCE_AVPLAYER_TIMED_TEXT_DELIVERY = 0x10, + SCE_AVPLAYER_WARNING_ID = 0x20, + SCE_AVPLAYER_ENCRYPTION = 0x30, + SCE_AVPLAYER_DRM_ERROR = 0x40 +}; void RegisterlibSceAvPlayer(Core::Loader::SymbolsResolver* sym); -} // namespace Libraries::AvPlayer \ No newline at end of file + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/avplayer/avplayer_common.cpp b/src/core/libraries/avplayer/avplayer_common.cpp new file mode 100644 index 000000000..306603e29 --- /dev/null +++ b/src/core/libraries/avplayer/avplayer_common.cpp @@ -0,0 +1,61 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "avplayer.h" +#include "avplayer_common.h" + +#include // std::equal +#include // std::tolower +#include // std::string_view + +namespace Libraries::AvPlayer { + +using namespace Kernel; + +static bool ichar_equals(char a, char b) { + return std::tolower(static_cast(a)) == + std::tolower(static_cast(b)); +} + +static bool iequals(std::string_view l, std::string_view r) { + return std::ranges::equal(l, r, ichar_equals); +} + +SceAvPlayerSourceType GetSourceType(std::string_view path) { + if (path.empty()) { + return SCE_AVPLAYER_SOURCE_TYPE_UNKNOWN; + } + + std::string_view name = path; + if (path.find("://") != std::string_view::npos) { + // This path is a URI. Strip HTTP parameters from it. + // schema://server.domain/path/file.ext/and/beyond?param=value#paragraph -> + // -> schema://server.domain/path/to/file.ext/and/beyond + name = path.substr(0, path.find_first_of("?#")); + if (name.empty()) { + return SCE_AVPLAYER_SOURCE_TYPE_UNKNOWN; + } + } + + // schema://server.domain/path/to/file.ext/and/beyond -> .ext/and/beyond + auto ext = name.substr(name.rfind('.')); + if (ext.empty()) { + return SCE_AVPLAYER_SOURCE_TYPE_UNKNOWN; + } + + // .ext/and/beyond -> .ext + ext = ext.substr(0, ext.find('/')); + + if (iequals(ext, ".mp4") || iequals(ext, ".m4v") || iequals(ext, ".m3d") || + iequals(ext, ".m4a") || iequals(ext, ".mov")) { + return SCE_AVPLAYER_SOURCE_TYPE_FILE_MP4; + } + + if (iequals(ext, ".m3u8")) { + return SCE_AVPLAYER_SOURCE_TYPE_HLS; + } + + return SCE_AVPLAYER_SOURCE_TYPE_UNKNOWN; +} + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/avplayer/avplayer_common.h b/src/core/libraries/avplayer/avplayer_common.h new file mode 100644 index 000000000..a53696ecf --- /dev/null +++ b/src/core/libraries/avplayer/avplayer_common.h @@ -0,0 +1,91 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "avplayer.h" + +#include "common/assert.h" +#include "common/logging/log.h" +#include "core/libraries/kernel/thread_management.h" + +#include +#include +#include + +#define AVPLAYER_IS_ERROR(x) ((x) < 0) + +namespace Libraries::AvPlayer { + +enum class AvState { + Initial, + AddingSource, + Ready, + Play, + Stop, + EndOfFile, + Pause, + C0x08, + Jump, + TrickMode, + C0x0B, + Buffering, + Starting, + Error, +}; + +enum class AvEventType { + ChangeFlowState = 21, + WarningId = 22, + RevertState = 30, + AddSource = 40, + Error = 255, +}; + +union AvPlayerEventData { + u32 num_frames; // 20 + AvState state; // AvEventType::ChangeFlowState + s32 error; // AvEventType::WarningId + u32 attempt; // AvEventType::AddSource +}; + +struct AvPlayerEvent { + AvEventType event; + AvPlayerEventData payload; +}; + +template +class AvPlayerQueue { +public: + size_t Size() { + return m_queue.size(); + } + + void Push(T&& value) { + std::lock_guard guard(m_mutex); + m_queue.emplace(std::forward(value)); + } + + std::optional Pop() { + if (Size() == 0) { + return std::nullopt; + } + std::lock_guard guard(m_mutex); + auto result = std::move(m_queue.front()); + m_queue.pop(); + return result; + } + + void Clear() { + std::lock_guard guard(m_mutex); + m_queue = {}; + } + +private: + std::mutex m_mutex{}; + std::queue m_queue{}; +}; + +SceAvPlayerSourceType GetSourceType(std::string_view path); + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/avplayer/avplayer_data_streamer.h b/src/core/libraries/avplayer/avplayer_data_streamer.h new file mode 100644 index 000000000..04097bb4d --- /dev/null +++ b/src/core/libraries/avplayer/avplayer_data_streamer.h @@ -0,0 +1,20 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "avplayer.h" + +#include "common/types.h" + +struct AVIOContext; + +namespace Libraries::AvPlayer { + +class IDataStreamer { +public: + virtual ~IDataStreamer() = default; + virtual AVIOContext* GetContext() = 0; +}; + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/avplayer/avplayer_file_streamer.cpp b/src/core/libraries/avplayer/avplayer_file_streamer.cpp new file mode 100644 index 000000000..dc1386a47 --- /dev/null +++ b/src/core/libraries/avplayer/avplayer_file_streamer.cpp @@ -0,0 +1,86 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "avplayer_file_streamer.h" + +#include "avplayer_common.h" + +#include + +extern "C" { +#include +#include +} + +#include // std::max, std::min + +#define AVPLAYER_AVIO_BUFFER_SIZE 4096 + +namespace Libraries::AvPlayer { + +AvPlayerFileStreamer::AvPlayerFileStreamer(const SceAvPlayerFileReplacement& file_replacement, + std::string_view path) + : m_file_replacement(file_replacement) { + const auto ptr = m_file_replacement.object_ptr; + m_fd = m_file_replacement.open(ptr, path.data()); + ASSERT(m_fd >= 0); + m_file_size = m_file_replacement.size(ptr); + // avio_buffer is deallocated in `avio_context_free` + const auto avio_buffer = reinterpret_cast(av_malloc(AVPLAYER_AVIO_BUFFER_SIZE)); + m_avio_context = + avio_alloc_context(avio_buffer, AVPLAYER_AVIO_BUFFER_SIZE, 0, this, + &AvPlayerFileStreamer::ReadPacket, nullptr, &AvPlayerFileStreamer::Seek); +} + +AvPlayerFileStreamer::~AvPlayerFileStreamer() { + if (m_avio_context != nullptr) { + avio_context_free(&m_avio_context); + } + if (m_file_replacement.close != nullptr && m_fd >= 0) { + const auto close = m_file_replacement.close; + const auto ptr = m_file_replacement.object_ptr; + close(ptr); + } +} + +s32 AvPlayerFileStreamer::ReadPacket(void* opaque, u8* buffer, s32 size) { + const auto self = reinterpret_cast(opaque); + if (self->m_position >= self->m_file_size) { + return AVERROR_EOF; + } + if (self->m_position + size > self->m_file_size) { + size = self->m_file_size - self->m_position; + } + const auto read_offset = self->m_file_replacement.readOffset; + const auto ptr = self->m_file_replacement.object_ptr; + const auto bytes_read = read_offset(ptr, buffer, self->m_position, size); + if (bytes_read == 0 && size != 0) { + return AVERROR_EOF; + } + self->m_position += bytes_read; + return bytes_read; +} + +s64 AvPlayerFileStreamer::Seek(void* opaque, s64 offset, int whence) { + const auto self = reinterpret_cast(opaque); + if (whence & AVSEEK_SIZE) { + return self->m_file_size; + } + + if (whence == SEEK_CUR) { + self->m_position = + std::min(u64(std::max(s64(0), s64(self->m_position) + offset)), self->m_file_size); + return self->m_position; + } else if (whence == SEEK_SET) { + self->m_position = std::min(u64(std::max(s64(0), offset)), self->m_file_size); + return self->m_position; + } else if (whence == SEEK_END) { + self->m_position = + std::min(u64(std::max(s64(0), s64(self->m_file_size) + offset)), self->m_file_size); + return self->m_position; + } + + return -1; +} + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/avplayer/avplayer_file_streamer.h b/src/core/libraries/avplayer/avplayer_file_streamer.h new file mode 100644 index 000000000..658ce8c1e --- /dev/null +++ b/src/core/libraries/avplayer/avplayer_file_streamer.h @@ -0,0 +1,37 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "avplayer.h" +#include "avplayer_data_streamer.h" + +#include +#include + +struct AVIOContext; + +namespace Libraries::AvPlayer { + +class AvPlayerFileStreamer : public IDataStreamer { +public: + AvPlayerFileStreamer(const SceAvPlayerFileReplacement& file_replacement, std::string_view path); + ~AvPlayerFileStreamer(); + + AVIOContext* GetContext() override { + return m_avio_context; + } + +private: + static s32 ReadPacket(void* opaque, u8* buffer, s32 size); + static s64 Seek(void* opaque, s64 buffer, int whence); + + SceAvPlayerFileReplacement m_file_replacement; + + int m_fd = -1; + u64 m_position{}; + u64 m_file_size{}; + AVIOContext* m_avio_context{}; +}; + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/avplayer/avplayer_impl.cpp b/src/core/libraries/avplayer/avplayer_impl.cpp new file mode 100644 index 000000000..cdfff8277 --- /dev/null +++ b/src/core/libraries/avplayer/avplayer_impl.cpp @@ -0,0 +1,200 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "avplayer_common.h" +#include "avplayer_file_streamer.h" +#include "avplayer_impl.h" + +#include "common/logging/log.h" +#include "core/libraries/error_codes.h" +#include "core/libraries/kernel/libkernel.h" + +using namespace Libraries::Kernel; + +namespace Libraries::AvPlayer { + +void* PS4_SYSV_ABI AvPlayer::Allocate(void* handle, u32 alignment, u32 size) { + const auto* const self = reinterpret_cast(handle); + const auto allocate = self->m_init_data_original.memory_replacement.allocate; + const auto ptr = self->m_init_data_original.memory_replacement.object_ptr; + return allocate(ptr, alignment, size); +} + +void PS4_SYSV_ABI AvPlayer::Deallocate(void* handle, void* memory) { + const auto* const self = reinterpret_cast(handle); + const auto deallocate = self->m_init_data_original.memory_replacement.deallocate; + const auto ptr = self->m_init_data_original.memory_replacement.object_ptr; + return deallocate(ptr, memory); +} + +void* PS4_SYSV_ABI AvPlayer::AllocateTexture(void* handle, u32 alignment, u32 size) { + const auto* const self = reinterpret_cast(handle); + const auto allocate = self->m_init_data_original.memory_replacement.allocate_texture; + const auto ptr = self->m_init_data_original.memory_replacement.object_ptr; + return allocate(ptr, alignment, size); +} + +void PS4_SYSV_ABI AvPlayer::DeallocateTexture(void* handle, void* memory) { + const auto* const self = reinterpret_cast(handle); + const auto deallocate = self->m_init_data_original.memory_replacement.deallocate_texture; + const auto ptr = self->m_init_data_original.memory_replacement.object_ptr; + return deallocate(ptr, memory); +} + +int PS4_SYSV_ABI AvPlayer::OpenFile(void* handle, const char* filename) { + auto const self = reinterpret_cast(handle); + std::lock_guard guard(self->m_file_io_mutex); + + const auto open = self->m_init_data_original.file_replacement.open; + const auto ptr = self->m_init_data_original.file_replacement.object_ptr; + return open(ptr, filename); +} + +int PS4_SYSV_ABI AvPlayer::CloseFile(void* handle) { + auto const self = reinterpret_cast(handle); + std::lock_guard guard(self->m_file_io_mutex); + + const auto close = self->m_init_data_original.file_replacement.close; + const auto ptr = self->m_init_data_original.file_replacement.object_ptr; + return close(ptr); +} + +int PS4_SYSV_ABI AvPlayer::ReadOffsetFile(void* handle, u8* buffer, u64 position, u32 length) { + auto const self = reinterpret_cast(handle); + std::lock_guard guard(self->m_file_io_mutex); + + const auto read_offset = self->m_init_data_original.file_replacement.readOffset; + const auto ptr = self->m_init_data_original.file_replacement.object_ptr; + return read_offset(ptr, buffer, position, length); +} + +u64 PS4_SYSV_ABI AvPlayer::SizeFile(void* handle) { + auto const self = reinterpret_cast(handle); + std::lock_guard guard(self->m_file_io_mutex); + + const auto size = self->m_init_data_original.file_replacement.size; + const auto ptr = self->m_init_data_original.file_replacement.object_ptr; + return size(ptr); +} + +SceAvPlayerInitData AvPlayer::StubInitData(const SceAvPlayerInitData& data) { + SceAvPlayerInitData result = data; + result.memory_replacement.object_ptr = this; + result.memory_replacement.allocate = &AvPlayer::Allocate; + result.memory_replacement.deallocate = &AvPlayer::Deallocate; + result.memory_replacement.allocate_texture = &AvPlayer::AllocateTexture; + result.memory_replacement.deallocate_texture = &AvPlayer::DeallocateTexture; + if (data.file_replacement.open == nullptr || data.file_replacement.close == nullptr || + data.file_replacement.readOffset == nullptr || data.file_replacement.size == nullptr) { + result.file_replacement = {}; + } else { + result.file_replacement.object_ptr = this; + result.file_replacement.open = &AvPlayer::OpenFile; + result.file_replacement.close = &AvPlayer::CloseFile; + result.file_replacement.readOffset = &AvPlayer::ReadOffsetFile; + result.file_replacement.size = &AvPlayer::SizeFile; + } + return result; +} + +AvPlayer::AvPlayer(const SceAvPlayerInitData& data) + : m_init_data(StubInitData(data)), m_init_data_original(data), + m_state(std::make_unique(m_init_data)) {} + +s32 AvPlayer::PostInit(const SceAvPlayerPostInitData& data) { + m_post_init_data = data; + return ORBIS_OK; +} + +s32 AvPlayer::AddSource(std::string_view path) { + if (path.empty()) { + return ORBIS_AVPLAYER_ERROR_INVALID_PARAMS; + } + if (AVPLAYER_IS_ERROR(m_state->AddSource(path, GetSourceType(path)))) { + return ORBIS_AVPLAYER_ERROR_OPERATION_FAILED; + } + return ORBIS_OK; +} + +s32 AvPlayer::GetStreamCount() { + if (m_state == nullptr) { + return ORBIS_AVPLAYER_ERROR_OPERATION_FAILED; + } + const auto res = m_state->GetStreamCount(); + if (AVPLAYER_IS_ERROR(res)) { + return ORBIS_AVPLAYER_ERROR_OPERATION_FAILED; + } + return res; +} + +s32 AvPlayer::GetStreamInfo(u32 stream_index, SceAvPlayerStreamInfo& info) { + if (AVPLAYER_IS_ERROR(m_state->GetStreamInfo(stream_index, info))) { + return ORBIS_AVPLAYER_ERROR_OPERATION_FAILED; + } + return ORBIS_OK; +} + +s32 AvPlayer::EnableStream(u32 stream_index) { + if (m_state == nullptr) { + return ORBIS_AVPLAYER_ERROR_OPERATION_FAILED; + } + if (!m_state->EnableStream(stream_index)) { + return ORBIS_AVPLAYER_ERROR_OPERATION_FAILED; + } + return ORBIS_OK; +} + +s32 AvPlayer::Start() { + return m_state->Start(); +} + +bool AvPlayer::GetVideoData(SceAvPlayerFrameInfo& video_info) { + if (m_state == nullptr) { + return false; + } + return m_state->GetVideoData(video_info); +} + +bool AvPlayer::GetVideoData(SceAvPlayerFrameInfoEx& video_info) { + if (m_state == nullptr) { + return false; + } + return m_state->GetVideoData(video_info); +} + +bool AvPlayer::GetAudioData(SceAvPlayerFrameInfo& audio_info) { + if (m_state == nullptr) { + return false; + } + return m_state->GetAudioData(audio_info); +} + +bool AvPlayer::IsActive() { + if (m_state == nullptr) { + return false; + } + return m_state->IsActive(); +} + +u64 AvPlayer::CurrentTime() { + if (m_state == nullptr) { + return 0; + } + return m_state->CurrentTime(); +} + +s32 AvPlayer::Stop() { + if (m_state == nullptr || !m_state->Stop()) { + return ORBIS_AVPLAYER_ERROR_OPERATION_FAILED; + } + return ORBIS_OK; +} + +bool AvPlayer::SetLooping(bool is_looping) { + if (m_state == nullptr) { + return false; + } + return m_state->SetLooping(is_looping); +} + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/avplayer/avplayer_impl.h b/src/core/libraries/avplayer/avplayer_impl.h new file mode 100644 index 000000000..09989d399 --- /dev/null +++ b/src/core/libraries/avplayer/avplayer_impl.h @@ -0,0 +1,68 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "avplayer.h" +#include "avplayer_data_streamer.h" +#include "avplayer_state.h" + +#include "core/libraries/kernel/thread_management.h" + +#include + +extern "C" { +#include +#include +} + +#include +#include + +namespace Libraries::AvPlayer { + +class AvPlayer { +public: + AvPlayer(const SceAvPlayerInitData& data); + + s32 PostInit(const SceAvPlayerPostInitData& data); + s32 AddSource(std::string_view filename); + s32 GetStreamCount(); + s32 GetStreamInfo(u32 stream_index, SceAvPlayerStreamInfo& info); + s32 EnableStream(u32 stream_index); + s32 Start(); + bool GetAudioData(SceAvPlayerFrameInfo& audio_info); + bool GetVideoData(SceAvPlayerFrameInfo& video_info); + bool GetVideoData(SceAvPlayerFrameInfoEx& video_info); + bool IsActive(); + u64 CurrentTime(); + s32 Stop(); + bool SetLooping(bool is_looping); + +private: + using ScePthreadMutex = Kernel::ScePthreadMutex; + + // Memory Replacement + static void* PS4_SYSV_ABI Allocate(void* handle, u32 alignment, u32 size); + static void PS4_SYSV_ABI Deallocate(void* handle, void* memory); + static void* PS4_SYSV_ABI AllocateTexture(void* handle, u32 alignment, u32 size); + static void PS4_SYSV_ABI DeallocateTexture(void* handle, void* memory); + + // File Replacement + static int PS4_SYSV_ABI OpenFile(void* handle, const char* filename); + static int PS4_SYSV_ABI CloseFile(void* handle); + static int PS4_SYSV_ABI ReadOffsetFile(void* handle, u8* buffer, u64 position, u32 length); + static u64 PS4_SYSV_ABI SizeFile(void* handle); + + SceAvPlayerInitData StubInitData(const SceAvPlayerInitData& data); + + SceAvPlayerInitData m_init_data{}; + SceAvPlayerInitData m_init_data_original{}; + SceAvPlayerPostInitData m_post_init_data{}; + std::mutex m_file_io_mutex{}; + + std::atomic_bool m_has_source{}; + std::unique_ptr m_state{}; +}; + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/avplayer/avplayer_source.cpp b/src/core/libraries/avplayer/avplayer_source.cpp new file mode 100644 index 000000000..776d389f0 --- /dev/null +++ b/src/core/libraries/avplayer/avplayer_source.cpp @@ -0,0 +1,730 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "avplayer_source.h" + +#include "avplayer_file_streamer.h" + +#include "common/singleton.h" +#include "core/file_sys/fs.h" +#include "core/libraries/kernel/time_management.h" + +#include + +extern "C" { +#include +#include +#include +#include +#include +} + +namespace Libraries::AvPlayer { + +using namespace Kernel; + +AvPlayerSource::AvPlayerSource(AvPlayerStateCallback& state, std::string_view path, + const SceAvPlayerInitData& init_data, + SceAvPlayerSourceType source_type) + : m_state(state), m_memory_replacement(init_data.memory_replacement), + m_num_output_video_framebuffers( + std::min(std::max(2, init_data.num_output_video_framebuffers), 16)) { + AVFormatContext* context = avformat_alloc_context(); + if (init_data.file_replacement.open != nullptr) { + m_up_data_streamer = + std::make_unique(init_data.file_replacement, path); + context->pb = m_up_data_streamer->GetContext(); + ASSERT(!AVPLAYER_IS_ERROR(avformat_open_input(&context, nullptr, nullptr, nullptr))); + } else { + const auto mnt = Common::Singleton::Instance(); + const auto filepath = mnt->GetHostPath(path); + ASSERT(!AVPLAYER_IS_ERROR( + avformat_open_input(&context, filepath.string().c_str(), nullptr, nullptr))); + } + m_avformat_context = AVFormatContextPtr(context, &ReleaseAVFormatContext); +} + +AvPlayerSource::~AvPlayerSource() { + Stop(); +} + +bool AvPlayerSource::FindStreamInfo() { + if (m_avformat_context == nullptr) { + LOG_ERROR(Lib_AvPlayer, "Could not find stream info. NULL context."); + return false; + } + if (m_avformat_context->nb_streams > 0) { + return true; + } + return avformat_find_stream_info(m_avformat_context.get(), nullptr) == 0; +} + +s32 AvPlayerSource::GetStreamCount() { + if (m_avformat_context == nullptr) { + LOG_ERROR(Lib_AvPlayer, "Could not get stream count. NULL context."); + return -1; + } + LOG_INFO(Lib_AvPlayer, "Stream Count: {}", m_avformat_context->nb_streams); + return m_avformat_context->nb_streams; +} + +static s32 CodecTypeToStreamType(AVMediaType codec_type) { + switch (codec_type) { + case AVMediaType::AVMEDIA_TYPE_VIDEO: + return SCE_AVPLAYER_VIDEO; + case AVMediaType::AVMEDIA_TYPE_AUDIO: + return SCE_AVPLAYER_AUDIO; + case AVMediaType::AVMEDIA_TYPE_SUBTITLE: + return SCE_AVPLAYER_TIMEDTEXT; + default: + LOG_ERROR(Lib_AvPlayer, "Unexpected AVMediaType {}", magic_enum::enum_name(codec_type)); + return -1; + } +} + +static f32 AVRationalToF32(const AVRational& rational) { + return f32(rational.num) / rational.den; +} + +s32 AvPlayerSource::GetStreamInfo(u32 stream_index, SceAvPlayerStreamInfo& info) { + info = {}; + if (m_avformat_context == nullptr || stream_index >= m_avformat_context->nb_streams) { + LOG_ERROR(Lib_AvPlayer, "Could not get stream {} info.", stream_index); + return -1; + } + const auto p_stream = m_avformat_context->streams[stream_index]; + if (p_stream == nullptr || p_stream->codecpar == nullptr) { + LOG_ERROR(Lib_AvPlayer, "Could not get stream {} info. NULL stream.", stream_index); + return -1; + } + info.type = CodecTypeToStreamType(p_stream->codecpar->codec_type); + info.start_time = p_stream->start_time; + info.duration = p_stream->duration; + const auto p_lang_node = av_dict_get(p_stream->metadata, "language", nullptr, 0); + if (p_lang_node != nullptr) { + LOG_INFO(Lib_AvPlayer, "Stream {} language = {}", stream_index, p_lang_node->value); + } else { + LOG_WARNING(Lib_AvPlayer, "Stream {} language is unknown", stream_index); + } + switch (info.type) { + case SCE_AVPLAYER_VIDEO: + LOG_INFO(Lib_AvPlayer, "Stream {} is a video stream.", stream_index); + info.details.video.aspect_ratio = + f32(p_stream->codecpar->width) / p_stream->codecpar->height; + info.details.video.width = p_stream->codecpar->width; + info.details.video.height = p_stream->codecpar->height; + if (p_lang_node != nullptr) { + std::memcpy(info.details.video.language_code, p_lang_node->value, + std::min(strlen(p_lang_node->value), size_t(3))); + } + break; + case SCE_AVPLAYER_AUDIO: + LOG_INFO(Lib_AvPlayer, "Stream {} is an audio stream.", stream_index); + info.details.audio.channel_count = p_stream->codecpar->ch_layout.nb_channels; + info.details.audio.sample_rate = p_stream->codecpar->sample_rate; + info.details.audio.size = 0; // sceAvPlayerGetStreamInfo() is expected to set this to 0 + if (p_lang_node != nullptr) { + std::memcpy(info.details.audio.language_code, p_lang_node->value, + std::min(strlen(p_lang_node->value), size_t(3))); + } + break; + case SCE_AVPLAYER_TIMEDTEXT: + LOG_WARNING(Lib_AvPlayer, "Stream {} is a timedtext stream.", stream_index); + info.details.subs.font_size = 12; + info.details.subs.text_size = 12; + if (p_lang_node != nullptr) { + std::memcpy(info.details.subs.language_code, p_lang_node->value, + std::min(strlen(p_lang_node->value), size_t(3))); + } + break; + default: + LOG_ERROR(Lib_AvPlayer, "Stream {} type is unknown: {}.", stream_index, info.type); + return -1; + } + return 0; +} + +bool AvPlayerSource::EnableStream(u32 stream_index) { + if (m_avformat_context == nullptr || stream_index >= m_avformat_context->nb_streams) { + return false; + } + const auto stream = m_avformat_context->streams[stream_index]; + const auto decoder = avcodec_find_decoder(stream->codecpar->codec_id); + if (decoder == nullptr) { + return false; + } + switch (stream->codecpar->codec_type) { + case AVMediaType::AVMEDIA_TYPE_VIDEO: { + m_video_stream_index = stream_index; + m_video_codec_context = + AVCodecContextPtr(avcodec_alloc_context3(decoder), &ReleaseAVCodecContext); + if (avcodec_parameters_to_context(m_video_codec_context.get(), stream->codecpar) < 0) { + LOG_ERROR(Lib_AvPlayer, "Could not copy stream {} avcodec parameters to context.", + stream_index); + return false; + } + if (avcodec_open2(m_video_codec_context.get(), decoder, nullptr) < 0) { + LOG_ERROR(Lib_AvPlayer, "Could not open avcodec for video stream {}.", stream_index); + return false; + } + const auto width = m_video_codec_context->width; + const auto size = (width * m_video_codec_context->height * 3) / 2; + for (u64 index = 0; index < m_num_output_video_framebuffers; ++index) { + m_video_buffers.Push(FrameBuffer(m_memory_replacement, 0x100, size)); + } + LOG_INFO(Lib_AvPlayer, "Video stream {} enabled", stream_index); + break; + } + case AVMediaType::AVMEDIA_TYPE_AUDIO: { + m_audio_stream_index = stream_index; + m_audio_codec_context = + AVCodecContextPtr(avcodec_alloc_context3(decoder), &ReleaseAVCodecContext); + if (avcodec_parameters_to_context(m_audio_codec_context.get(), stream->codecpar) < 0) { + LOG_ERROR(Lib_AvPlayer, "Could not copy stream {} avcodec parameters to context.", + stream_index); + return false; + } + if (avcodec_open2(m_audio_codec_context.get(), decoder, nullptr) < 0) { + LOG_ERROR(Lib_AvPlayer, "Could not open avcodec for audio stream {}.", stream_index); + return false; + } + const auto num_channels = m_audio_codec_context->ch_layout.nb_channels; + const auto align = num_channels * sizeof(u16); + const auto size = num_channels * sizeof(u16) * 1024; + for (u64 index = 0; index < 4; ++index) { + m_audio_buffers.Push(FrameBuffer(m_memory_replacement, 0x100, size)); + } + LOG_INFO(Lib_AvPlayer, "Audio stream {} enabled", stream_index); + break; + } + default: + LOG_WARNING(Lib_AvPlayer, "Unknown stream type {} for stream {}", + magic_enum::enum_name(stream->codecpar->codec_type), stream_index); + break; + } + return true; +} + +void AvPlayerSource::SetLooping(bool is_looping) { + m_is_looping = is_looping; +} + +std::optional AvPlayerSource::HasFrames(u32 num_frames) { + return m_video_packets.Size() > num_frames || m_is_eof; +} + +s32 AvPlayerSource::Start() { + std::unique_lock lock(m_state_mutex); + + if (m_audio_codec_context == nullptr && m_video_codec_context == nullptr) { + LOG_ERROR(Lib_AvPlayer, "Could not start playback. NULL context."); + return -1; + } + m_demuxer_thread = std::jthread([this](std::stop_token stop) { this->DemuxerThread(stop); }); + m_video_decoder_thread = + std::jthread([this](std::stop_token stop) { this->VideoDecoderThread(stop); }); + m_audio_decoder_thread = + std::jthread([this](std::stop_token stop) { this->AudioDecoderThread(stop); }); + m_start_time = std::chrono::high_resolution_clock::now(); + return 0; +} + +bool AvPlayerSource::Stop() { + std::unique_lock lock(m_state_mutex); + + if (!HasRunningThreads()) { + LOG_WARNING(Lib_AvPlayer, "Could not stop playback: already stopped."); + return false; + } + + m_video_decoder_thread.request_stop(); + m_audio_decoder_thread.request_stop(); + m_demuxer_thread.request_stop(); + if (m_demuxer_thread.joinable()) { + m_demuxer_thread.join(); + } + if (m_video_decoder_thread.joinable()) { + m_video_decoder_thread.join(); + } + if (m_audio_decoder_thread.joinable()) { + m_audio_decoder_thread.join(); + } + if (m_current_audio_frame.has_value()) { + m_audio_buffers.Push(std::move(m_current_audio_frame.value())); + m_current_audio_frame.reset(); + } + if (m_current_video_frame.has_value()) { + m_video_buffers.Push(std::move(m_current_video_frame.value())); + m_current_video_frame.reset(); + } + m_stop_cv.Notify(); + + m_audio_packets.Clear(); + m_video_packets.Clear(); + m_audio_frames.Clear(); + m_video_frames.Clear(); + return true; +} + +bool AvPlayerSource::GetVideoData(SceAvPlayerFrameInfo& video_info) { + if (!IsActive()) { + return false; + } + + SceAvPlayerFrameInfoEx info{}; + if (!GetVideoData(info)) { + return false; + } + video_info = {}; + video_info.timestamp = u64(info.timestamp); + video_info.pData = reinterpret_cast(info.pData); + video_info.details.video.aspect_ratio = info.details.video.aspect_ratio; + video_info.details.video.width = info.details.video.width; + video_info.details.video.height = info.details.video.height; + return true; +} + +static void CopyNV12Data(u8* dst, const AVFrame& src) { + std::memcpy(dst, src.data[0], src.width * src.height); + std::memcpy(dst + src.width * src.height, src.data[1], (src.width * src.height) / 2); +} + +bool AvPlayerSource::GetVideoData(SceAvPlayerFrameInfoEx& video_info) { + if (!IsActive()) { + return false; + } + + m_video_frames_cv.Wait([this] { return m_video_frames.Size() != 0 || m_is_eof; }); + + auto frame = m_video_frames.Pop(); + if (!frame.has_value()) { + LOG_WARNING(Lib_AvPlayer, "Could get video frame. EOF reached."); + return false; + } + + { + using namespace std::chrono; + auto elapsed_time = + duration_cast(high_resolution_clock::now() - m_start_time).count(); + if (elapsed_time < frame->info.timestamp) { + if (m_stop_cv.WaitFor(milliseconds(frame->info.timestamp - elapsed_time), + [&] { return elapsed_time >= frame->info.timestamp; })) { + return false; + } + } + } + + // return the buffer to the queue + if (m_current_video_frame.has_value()) { + m_video_buffers.Push(std::move(m_current_video_frame.value())); + m_video_buffers_cv.Notify(); + } + m_current_video_frame = std::move(frame->buffer); + video_info = frame->info; + return true; +} + +bool AvPlayerSource::GetAudioData(SceAvPlayerFrameInfo& audio_info) { + if (!IsActive()) { + return false; + } + + m_audio_frames_cv.Wait([this] { return m_audio_frames.Size() != 0 || m_is_eof; }); + + auto frame = m_audio_frames.Pop(); + if (!frame.has_value()) { + LOG_WARNING(Lib_AvPlayer, "Could get audio frame. EOF reached."); + return false; + } + + { + using namespace std::chrono; + auto elapsed_time = + duration_cast(high_resolution_clock::now() - m_start_time).count(); + if (elapsed_time < frame->info.timestamp) { + if (m_stop_cv.WaitFor(milliseconds(frame->info.timestamp - elapsed_time), + [&] { return elapsed_time >= frame->info.timestamp; })) { + return false; + } + } + } + + // return the buffer to the queue + if (m_current_audio_frame.has_value()) { + m_audio_buffers.Push(std::move(m_current_audio_frame.value())); + m_audio_buffers_cv.Notify(); + } + m_current_audio_frame = std::move(frame->buffer); + + audio_info = {}; + audio_info.timestamp = frame->info.timestamp; + audio_info.pData = reinterpret_cast(frame->info.pData); + audio_info.details.audio.size = frame->info.details.audio.size; + audio_info.details.audio.channel_count = frame->info.details.audio.channel_count; + return true; +} + +u64 AvPlayerSource::CurrentTime() { + using namespace std::chrono; + return duration_cast(high_resolution_clock::now() - m_start_time).count(); +} + +bool AvPlayerSource::IsActive() { + return !m_is_eof || m_audio_packets.Size() != 0 || m_video_packets.Size() != 0 || + m_video_frames.Size() != 0 || m_audio_frames.Size() != 0; +} + +void AvPlayerSource::ReleaseAVPacket(AVPacket* packet) { + if (packet != nullptr) { + av_packet_free(&packet); + } +} + +void AvPlayerSource::ReleaseAVFrame(AVFrame* frame) { + if (frame != nullptr) { + av_frame_free(&frame); + } +} + +void AvPlayerSource::ReleaseAVCodecContext(AVCodecContext* context) { + if (context != nullptr) { + avcodec_free_context(&context); + } +} + +void AvPlayerSource::ReleaseSWRContext(SwrContext* context) { + if (context != nullptr) { + swr_free(&context); + } +} + +void AvPlayerSource::ReleaseSWSContext(SwsContext* context) { + if (context != nullptr) { + sws_freeContext(context); + } +} + +void AvPlayerSource::ReleaseAVFormatContext(AVFormatContext* context) { + if (context != nullptr) { + avformat_close_input(&context); + } +} + +void AvPlayerSource::DemuxerThread(std::stop_token stop) { + using namespace std::chrono; + if (!m_audio_stream_index.has_value() && !m_video_stream_index.has_value()) { + LOG_WARNING(Lib_AvPlayer, "Could not start DEMUXER thread. No streams enabled."); + return; + } + LOG_INFO(Lib_AvPlayer, "Demuxer Thread started"); + + while (!stop.stop_requested()) { + if (m_video_packets.Size() > 30 && m_audio_packets.Size() > 8) { + std::this_thread::sleep_for(milliseconds(5)); + continue; + } + AVPacketPtr up_packet(av_packet_alloc(), &ReleaseAVPacket); + const auto res = av_read_frame(m_avformat_context.get(), up_packet.get()); + if (res < 0) { + if (res == AVERROR_EOF) { + if (m_is_looping) { + LOG_INFO(Lib_AvPlayer, "EOF reached in demuxer. Looping the source..."); + avio_seek(m_avformat_context->pb, 0, SEEK_SET); + if (m_video_stream_index.has_value()) { + const auto index = m_video_stream_index.value(); + const auto stream = m_avformat_context->streams[index]; + avformat_seek_file(m_avformat_context.get(), index, 0, 0, stream->duration, + 0); + } + if (m_audio_stream_index.has_value()) { + const auto index = m_audio_stream_index.value(); + const auto stream = m_avformat_context->streams[index]; + avformat_seek_file(m_avformat_context.get(), index, 0, 0, stream->duration, + 0); + } + continue; + } else { + LOG_INFO(Lib_AvPlayer, "EOF reached in demuxer. Exiting."); + break; + } + } else { + LOG_ERROR(Lib_AvPlayer, "Could not read AV frame: error = {}", res); + m_state.OnError(); + return; + } + break; + } + if (up_packet->stream_index == m_video_stream_index) { + m_video_packets.Push(std::move(up_packet)); + m_video_packets_cv.Notify(); + } else if (up_packet->stream_index == m_audio_stream_index) { + m_audio_packets.Push(std::move(up_packet)); + m_audio_packets_cv.Notify(); + } + } + + m_is_eof = true; + + m_video_packets_cv.Notify(); + m_audio_packets_cv.Notify(); + m_video_frames_cv.Notify(); + m_audio_frames_cv.Notify(); + + if (m_video_decoder_thread.joinable()) { + m_video_decoder_thread.join(); + } + if (m_audio_decoder_thread.joinable()) { + m_audio_decoder_thread.join(); + } + m_state.OnEOF(); + + LOG_INFO(Lib_AvPlayer, "Demuxer Thread exited normaly"); +} + +AvPlayerSource::AVFramePtr AvPlayerSource::ConvertVideoFrame(const AVFrame& frame) { + auto nv12_frame = AVFramePtr{av_frame_alloc(), &ReleaseAVFrame}; + nv12_frame->pts = frame.pts; + nv12_frame->pkt_dts = frame.pkt_dts < 0 ? 0 : frame.pkt_dts; + nv12_frame->format = AV_PIX_FMT_NV12; + nv12_frame->width = frame.width; + nv12_frame->height = frame.height; + nv12_frame->sample_aspect_ratio = frame.sample_aspect_ratio; + + av_frame_get_buffer(nv12_frame.get(), 0); + + if (m_sws_context == nullptr) { + m_sws_context = + SWSContextPtr(sws_getContext(frame.width, frame.height, AVPixelFormat(frame.format), + frame.width, frame.height, AV_PIX_FMT_NV12, + SWS_FAST_BILINEAR, nullptr, nullptr, nullptr), + &ReleaseSWSContext); + } + const auto res = sws_scale(m_sws_context.get(), frame.data, frame.linesize, 0, frame.height, + nv12_frame->data, nv12_frame->linesize); + if (res < 0) { + LOG_ERROR(Lib_AvPlayer, "Could not convert to NV12: {}", av_err2str(res)); + return AVFramePtr{nullptr, &ReleaseAVFrame}; + } + return nv12_frame; +} + +Frame AvPlayerSource::PrepareVideoFrame(FrameBuffer buffer, const AVFrame& frame) { + ASSERT(frame.format == AV_PIX_FMT_NV12); + + auto p_buffer = buffer.GetBuffer(); + CopyNV12Data(p_buffer, frame); + + const auto pkt_dts = u64(frame.pkt_dts) * 1000; + const auto stream = m_avformat_context->streams[m_video_stream_index.value()]; + const auto time_base = stream->time_base; + const auto den = time_base.den; + const auto num = time_base.num; + const auto timestamp = (num != 0 && den > 1) ? (pkt_dts * num) / den : pkt_dts; + + return Frame{ + .buffer = std::move(buffer), + .info = + { + .pData = p_buffer, + .timestamp = timestamp, + .details = + { + .video = + { + .width = u32(frame.width), + .height = u32(frame.height), + .aspect_ratio = AVRationalToF32(frame.sample_aspect_ratio), + .pitch = u32(frame.linesize[0]), + .luma_bit_depth = 8, + .chroma_bit_depth = 8, + }, + }, + }, + }; +} + +void AvPlayerSource::VideoDecoderThread(std::stop_token stop) { + using namespace std::chrono; + LOG_INFO(Lib_AvPlayer, "Video Decoder Thread started"); + while ((!m_is_eof || m_video_packets.Size() != 0) && !stop.stop_requested()) { + if (!m_video_packets_cv.Wait(stop, + [this] { return m_video_packets.Size() != 0 || m_is_eof; })) { + continue; + } + const auto packet = m_video_packets.Pop(); + if (!packet.has_value()) { + continue; + } + + auto res = avcodec_send_packet(m_video_codec_context.get(), packet->get()); + if (res < 0 && res != AVERROR(EAGAIN)) { + m_state.OnError(); + LOG_ERROR(Lib_AvPlayer, "Could not send packet to the video codec. Error = {}", + av_err2str(res)); + return; + } + while (res >= 0) { + if (!m_video_buffers_cv.Wait(stop, [this] { return m_video_buffers.Size() != 0; })) { + break; + } + if (m_video_buffers.Size() == 0) { + continue; + } + auto up_frame = AVFramePtr(av_frame_alloc(), &ReleaseAVFrame); + res = avcodec_receive_frame(m_video_codec_context.get(), up_frame.get()); + if (res < 0) { + if (res == AVERROR_EOF) { + LOG_INFO(Lib_AvPlayer, "EOF reached in video decoder"); + return; + } else if (res != AVERROR(EAGAIN)) { + LOG_ERROR(Lib_AvPlayer, + "Could not receive frame from the video codec. Error = {}", + av_err2str(res)); + m_state.OnError(); + return; + } + } else { + auto buffer = m_video_buffers.Pop(); + if (!buffer.has_value()) { + // Video buffers queue was cleared. This means that player was stopped. + break; + } + if (up_frame->format != AV_PIX_FMT_NV12) { + const auto nv12_frame = ConvertVideoFrame(*up_frame); + m_video_frames.Push(PrepareVideoFrame(std::move(buffer.value()), *nv12_frame)); + } else { + m_video_frames.Push(PrepareVideoFrame(std::move(buffer.value()), *up_frame)); + } + m_video_frames_cv.Notify(); + } + } + } + + LOG_INFO(Lib_AvPlayer, "Video Decoder Thread exited normaly"); +} + +AvPlayerSource::AVFramePtr AvPlayerSource::ConvertAudioFrame(const AVFrame& frame) { + auto pcm16_frame = AVFramePtr{av_frame_alloc(), &ReleaseAVFrame}; + pcm16_frame->pts = frame.pts; + pcm16_frame->pkt_dts = frame.pkt_dts < 0 ? 0 : frame.pkt_dts; + pcm16_frame->format = AV_SAMPLE_FMT_S16; + pcm16_frame->ch_layout = frame.ch_layout; + pcm16_frame->sample_rate = frame.sample_rate; + + if (m_swr_context == nullptr) { + SwrContext* swr_context = nullptr; + AVChannelLayout in_ch_layout = frame.ch_layout; + AVChannelLayout out_ch_layout = frame.ch_layout; + swr_alloc_set_opts2(&swr_context, &out_ch_layout, AV_SAMPLE_FMT_S16, frame.sample_rate, + &in_ch_layout, AVSampleFormat(frame.format), frame.sample_rate, 0, + nullptr); + m_swr_context = SWRContextPtr(swr_context, &ReleaseSWRContext); + swr_init(m_swr_context.get()); + } + const auto res = swr_convert_frame(m_swr_context.get(), pcm16_frame.get(), &frame); + if (res < 0) { + LOG_ERROR(Lib_AvPlayer, "Could not convert to NV12: {}", av_err2str(res)); + return AVFramePtr{nullptr, &ReleaseAVFrame}; + } + return pcm16_frame; +} + +Frame AvPlayerSource::PrepareAudioFrame(FrameBuffer buffer, const AVFrame& frame) { + ASSERT(frame.format == AV_SAMPLE_FMT_S16); + ASSERT(frame.nb_samples <= 1024); + + auto p_buffer = buffer.GetBuffer(); + const auto size = frame.ch_layout.nb_channels * frame.nb_samples * sizeof(u16); + std::memcpy(p_buffer, frame.data[0], size); + + const auto pkt_dts = u64(frame.pkt_dts) * 1000; + const auto stream = m_avformat_context->streams[m_audio_stream_index.value()]; + const auto time_base = stream->time_base; + const auto den = time_base.den; + const auto num = time_base.num; + const auto timestamp = (num != 0 && den > 1) ? (pkt_dts * num) / den : pkt_dts; + + return Frame{ + .buffer = std::move(buffer), + .info = + { + .pData = p_buffer, + .timestamp = timestamp, + .details = + { + .audio = + { + .channel_count = u16(frame.ch_layout.nb_channels), + .size = u32(size), + }, + }, + }, + }; +} + +void AvPlayerSource::AudioDecoderThread(std::stop_token stop) { + using namespace std::chrono; + LOG_INFO(Lib_AvPlayer, "Audio Decoder Thread started"); + while ((!m_is_eof || m_audio_packets.Size() != 0) && !stop.stop_requested()) { + if (!m_audio_packets_cv.Wait(stop, + [this] { return m_audio_packets.Size() != 0 || m_is_eof; })) { + continue; + } + const auto packet = m_audio_packets.Pop(); + if (!packet.has_value()) { + continue; + } + auto res = avcodec_send_packet(m_audio_codec_context.get(), packet->get()); + if (res < 0 && res != AVERROR(EAGAIN)) { + m_state.OnError(); + LOG_ERROR(Lib_AvPlayer, "Could not send packet to the audio codec. Error = {}", + av_err2str(res)); + return; + } + while (res >= 0) { + if (!m_audio_buffers_cv.Wait(stop, [this] { return m_audio_buffers.Size() != 0; })) { + break; + } + if (m_audio_buffers.Size() == 0) { + continue; + } + + auto up_frame = AVFramePtr(av_frame_alloc(), &ReleaseAVFrame); + res = avcodec_receive_frame(m_audio_codec_context.get(), up_frame.get()); + if (res < 0) { + if (res == AVERROR_EOF) { + LOG_INFO(Lib_AvPlayer, "EOF reached in audio decoder"); + return; + } else if (res != AVERROR(EAGAIN)) { + m_state.OnError(); + LOG_ERROR(Lib_AvPlayer, + "Could not receive frame from the audio codec. Error = {}", + av_err2str(res)); + return; + } + } else { + auto buffer = m_audio_buffers.Pop(); + if (!buffer.has_value()) { + // Audio buffers queue was cleared. This means that player was stopped. + break; + } + if (up_frame->format != AV_SAMPLE_FMT_S16) { + const auto pcm16_frame = ConvertAudioFrame(*up_frame); + m_audio_frames.Push(PrepareAudioFrame(std::move(buffer.value()), *pcm16_frame)); + } else { + m_audio_frames.Push(PrepareAudioFrame(std::move(buffer.value()), *up_frame)); + } + m_audio_frames_cv.Notify(); + } + } + } + + LOG_INFO(Lib_AvPlayer, "Audio Decoder Thread exited normaly"); +} + +bool AvPlayerSource::HasRunningThreads() const { + return m_demuxer_thread.joinable() || m_video_decoder_thread.joinable() || + m_audio_decoder_thread.joinable(); +} + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/avplayer/avplayer_source.h b/src/core/libraries/avplayer/avplayer_source.h new file mode 100644 index 000000000..7144e7ee4 --- /dev/null +++ b/src/core/libraries/avplayer/avplayer_source.h @@ -0,0 +1,219 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "avplayer.h" +#include "avplayer_common.h" +#include "avplayer_data_streamer.h" + +#include "common/polyfill_thread.h" +#include "common/types.h" +#include "core/libraries/kernel/thread_management.h" + +#include +#include +#include +#include +#include +#include + +struct AVCodecContext; +struct AVFormatContext; +struct AVFrame; +struct AVIOContext; +struct AVPacket; +struct SwrContext; +struct SwsContext; + +namespace Libraries::AvPlayer { + +class AvPlayerStateCallback { +public: + virtual ~AvPlayerStateCallback() = default; + + virtual void OnWarning(u32 id) = 0; + virtual void OnError() = 0; + virtual void OnEOF() = 0; +}; + +class FrameBuffer { +public: + FrameBuffer(const SceAvPlayerMemAllocator& memory_replacement, u32 align, u32 size) noexcept + : m_memory_replacement(memory_replacement), + m_data(Allocate(memory_replacement, align, size)) { + ASSERT_MSG(m_data, "Could not allocated frame buffer."); + } + + ~FrameBuffer() { + if (m_data != nullptr) { + Deallocate(m_memory_replacement, m_data); + m_data = {}; + } + } + + FrameBuffer(const FrameBuffer&) noexcept = delete; + FrameBuffer& operator=(const FrameBuffer&) noexcept = delete; + + FrameBuffer(FrameBuffer&& r) noexcept + : m_memory_replacement(r.m_memory_replacement), m_data(r.m_data) { + r.m_data = nullptr; + }; + + FrameBuffer& operator=(FrameBuffer&& r) noexcept { + std::swap(m_data, r.m_data); + return *this; + } + + u8* GetBuffer() const noexcept { + return m_data; + } + +private: + static u8* Allocate(const SceAvPlayerMemAllocator& memory_replacement, u32 align, u32 size) { + return reinterpret_cast( + memory_replacement.allocate(memory_replacement.object_ptr, align, size)); + } + + static void Deallocate(const SceAvPlayerMemAllocator& memory_replacement, void* ptr) { + memory_replacement.deallocate(memory_replacement.object_ptr, ptr); + } + + const SceAvPlayerMemAllocator& m_memory_replacement; + u8* m_data = nullptr; +}; + +struct Frame { + FrameBuffer buffer; + SceAvPlayerFrameInfoEx info; +}; + +class EventCV { +public: + template + void Wait(Pred pred) { + std::unique_lock lock(m_mutex); + m_cv.wait(lock, std::move(pred)); + } + + template + bool Wait(std::stop_token stop, Pred pred) { + std::unique_lock lock(m_mutex); + return m_cv.wait(lock, std::move(stop), std::move(pred)); + } + + template + bool WaitFor(std::chrono::duration timeout, Pred pred) { + std::unique_lock lock(m_mutex); + return m_cv.wait_for(lock, timeout, std::move(pred)); + } + + void Notify() { + std::unique_lock lock(m_mutex); + m_cv.notify_all(); + } + +private: + std::mutex m_mutex{}; + std::condition_variable_any m_cv{}; +}; + +class AvPlayerSource { +public: + AvPlayerSource(AvPlayerStateCallback& state, std::string_view path, + const SceAvPlayerInitData& init_data, SceAvPlayerSourceType source_type); + ~AvPlayerSource(); + + bool FindStreamInfo(); + s32 GetStreamCount(); + s32 GetStreamInfo(u32 stream_index, SceAvPlayerStreamInfo& info); + bool EnableStream(u32 stream_index); + void SetLooping(bool is_looping); + std::optional HasFrames(u32 num_frames); + s32 Start(); + bool Stop(); + bool GetAudioData(SceAvPlayerFrameInfo& audio_info); + bool GetVideoData(SceAvPlayerFrameInfo& video_info); + bool GetVideoData(SceAvPlayerFrameInfoEx& video_info); + u64 CurrentTime(); + bool IsActive(); + +private: + using ScePthread = Kernel::ScePthread; + + static void ReleaseAVPacket(AVPacket* packet); + static void ReleaseAVFrame(AVFrame* frame); + static void ReleaseAVCodecContext(AVCodecContext* context); + static void ReleaseSWRContext(SwrContext* context); + static void ReleaseSWSContext(SwsContext* context); + static void ReleaseAVFormatContext(AVFormatContext* context); + + using AVPacketPtr = std::unique_ptr; + using AVFramePtr = std::unique_ptr; + using AVCodecContextPtr = std::unique_ptr; + using SWRContextPtr = std::unique_ptr; + using SWSContextPtr = std::unique_ptr; + using AVFormatContextPtr = std::unique_ptr; + + void DemuxerThread(std::stop_token stop); + void VideoDecoderThread(std::stop_token stop); + void AudioDecoderThread(std::stop_token stop); + + bool HasRunningThreads() const; + + AVFramePtr ConvertAudioFrame(const AVFrame& frame); + AVFramePtr ConvertVideoFrame(const AVFrame& frame); + + Frame PrepareAudioFrame(FrameBuffer buffer, const AVFrame& frame); + Frame PrepareVideoFrame(FrameBuffer buffer, const AVFrame& frame); + + AvPlayerStateCallback& m_state; + + SceAvPlayerMemAllocator m_memory_replacement{}; + u32 m_num_output_video_framebuffers{}; + + std::atomic_bool m_is_looping = false; + std::atomic_bool m_is_eof = false; + + std::unique_ptr m_up_data_streamer; + + AvPlayerQueue m_audio_buffers; + AvPlayerQueue m_video_buffers; + + AvPlayerQueue m_audio_packets; + AvPlayerQueue m_video_packets; + + AvPlayerQueue m_audio_frames; + AvPlayerQueue m_video_frames; + + std::optional m_current_video_frame; + std::optional m_current_audio_frame; + + std::optional m_video_stream_index{}; + std::optional m_audio_stream_index{}; + + EventCV m_audio_packets_cv{}; + EventCV m_audio_frames_cv{}; + EventCV m_audio_buffers_cv{}; + + EventCV m_video_packets_cv{}; + EventCV m_video_frames_cv{}; + EventCV m_video_buffers_cv{}; + + EventCV m_stop_cv{}; + + std::mutex m_state_mutex{}; + std::jthread m_demuxer_thread{}; + std::jthread m_video_decoder_thread{}; + std::jthread m_audio_decoder_thread{}; + + AVFormatContextPtr m_avformat_context{nullptr, &ReleaseAVFormatContext}; + AVCodecContextPtr m_video_codec_context{nullptr, &ReleaseAVCodecContext}; + AVCodecContextPtr m_audio_codec_context{nullptr, &ReleaseAVCodecContext}; + SWRContextPtr m_swr_context{nullptr, &ReleaseSWRContext}; + SWSContextPtr m_sws_context{nullptr, &ReleaseSWSContext}; + + std::chrono::high_resolution_clock::time_point m_start_time{}; +}; + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/avplayer/avplayer_state.cpp b/src/core/libraries/avplayer/avplayer_state.cpp new file mode 100644 index 000000000..884cd9408 --- /dev/null +++ b/src/core/libraries/avplayer/avplayer_state.cpp @@ -0,0 +1,493 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "avplayer_file_streamer.h" +#include "avplayer_source.h" +#include "avplayer_state.h" + +#include "core/libraries/error_codes.h" +#include "core/libraries/kernel/time_management.h" + +#include + +namespace Libraries::AvPlayer { + +using namespace Kernel; + +void PS4_SYSV_ABI AvPlayerState::AutoPlayEventCallback(void* opaque, s32 event_id, s32 source_id, + void* event_data) { + auto const self = reinterpret_cast(opaque); + + if (event_id == SCE_AVPLAYER_STATE_READY) { + s32 video_stream_index = -1; + s32 audio_stream_index = -1; + s32 timedtext_stream_index = -1; + const s32 stream_count = self->GetStreamCount(); + if (AVPLAYER_IS_ERROR(stream_count)) { + return; + } + if (stream_count == 0) { + self->Stop(); + return; + } + for (u32 stream_index = 0; stream_index < stream_count; ++stream_index) { + SceAvPlayerStreamInfo info{}; + self->GetStreamInfo(stream_index, info); + + const std::string_view default_language( + reinterpret_cast(self->m_default_language)); + switch (info.type) { + case SCE_AVPLAYER_VIDEO: + if (video_stream_index == -1) { + video_stream_index = stream_index; + } + if (!default_language.empty() && + default_language == reinterpret_cast(info.details.video.language_code)) { + video_stream_index = stream_index; + } + break; + case SCE_AVPLAYER_AUDIO: + if (audio_stream_index == -1) { + audio_stream_index = stream_index; + } + if (!default_language.empty() && + default_language == reinterpret_cast(info.details.video.language_code)) { + audio_stream_index = stream_index; + } + break; + case SCE_AVPLAYER_TIMEDTEXT: + if (default_language.empty()) { + timedtext_stream_index = stream_index; + break; + } + if (default_language == reinterpret_cast(info.details.video.language_code)) { + timedtext_stream_index = stream_index; + } + break; + } + } + + if (video_stream_index != -1) { + self->EnableStream(video_stream_index); + } + if (audio_stream_index != -1) { + self->EnableStream(audio_stream_index); + } + if (timedtext_stream_index != -1) { + self->EnableStream(timedtext_stream_index); + } + self->Start(); + return; + } + + // Pass other events to the game + const auto callback = self->m_event_replacement.event_callback; + const auto ptr = self->m_event_replacement.object_ptr; + if (callback != nullptr) { + callback(ptr, event_id, 0, event_data); + } +} + +// Called inside GAME thread +AvPlayerState::AvPlayerState(const SceAvPlayerInitData& init_data) + : m_init_data(init_data), m_event_replacement(init_data.event_replacement) { + if (m_event_replacement.event_callback == nullptr || init_data.auto_start) { + m_auto_start = true; + m_init_data.event_replacement.event_callback = &AvPlayerState::AutoPlayEventCallback; + m_init_data.event_replacement.object_ptr = this; + } + if (init_data.default_language != nullptr) { + std::memcpy(m_default_language, init_data.default_language, sizeof(m_default_language)); + } + SetState(AvState::Initial); + StartControllerThread(); +} + +AvPlayerState::~AvPlayerState() { + { + std::unique_lock lock(m_source_mutex); + m_up_source.reset(); + } + if (m_controller_thread.joinable()) { + m_controller_thread.request_stop(); + m_controller_thread.join(); + } + m_event_queue.Clear(); +} + +// Called inside GAME thread +s32 AvPlayerState::AddSource(std::string_view path, SceAvPlayerSourceType source_type) { + if (path.empty()) { + LOG_ERROR(Lib_AvPlayer, "File path is empty."); + return -1; + } + + { + std::unique_lock lock(m_source_mutex); + if (m_up_source != nullptr) { + LOG_ERROR(Lib_AvPlayer, "Only one source is supported."); + return -1; + } + + m_up_source = std::make_unique(*this, path, m_init_data, source_type); + } + AddSourceEvent(); + return 0; +} + +// Called inside GAME thread +s32 AvPlayerState::GetStreamCount() { + std::shared_lock lock(m_source_mutex); + if (m_up_source == nullptr) { + LOG_ERROR(Lib_AvPlayer, "Could not get stream count. No source."); + return -1; + } + return m_up_source->GetStreamCount(); +} + +// Called inside GAME thread +s32 AvPlayerState::GetStreamInfo(u32 stream_index, SceAvPlayerStreamInfo& info) { + std::shared_lock lock(m_source_mutex); + if (m_up_source == nullptr) { + LOG_ERROR(Lib_AvPlayer, "Could not get stream {} info. No source.", stream_index); + return -1; + } + return m_up_source->GetStreamInfo(stream_index, info); +} + +// Called inside GAME thread +s32 AvPlayerState::Start() { + std::shared_lock lock(m_source_mutex); + if (m_up_source == nullptr || m_up_source->Start() < 0) { + LOG_ERROR(Lib_AvPlayer, "Could not start playback."); + return -1; + } + SetState(AvState::Play); + OnPlaybackStateChanged(AvState::Play); + return 0; +} + +void AvPlayerState::AvControllerThread(std::stop_token stop) { + using std::chrono::milliseconds; + + while (!stop.stop_requested()) { + if (m_event_queue.Size() != 0) { + ProcessEvent(); + continue; + } + std::this_thread::sleep_for(milliseconds(5)); + UpdateBufferingState(); + } +} + +// Called inside GAME thread +void AvPlayerState::AddSourceEvent() { + SetState(AvState::AddingSource); + m_event_queue.Push(AvPlayerEvent{ + .event = AvEventType::AddSource, + }); +} + +void AvPlayerState::WarningEvent(s32 id) { + m_event_queue.Push(AvPlayerEvent{ + .event = AvEventType::WarningId, + .payload = + { + .error = id, + }, + }); +} + +// Called inside GAME thread +void AvPlayerState::StartControllerThread() { + m_controller_thread = + std::jthread([this](std::stop_token stop) { this->AvControllerThread(stop); }); +} + +// Called inside GAME thread +bool AvPlayerState::EnableStream(u32 stream_index) { + std::shared_lock lock(m_source_mutex); + if (m_up_source == nullptr) { + return false; + } + return m_up_source->EnableStream(stream_index); +} + +// Called inside GAME thread +bool AvPlayerState::Stop() { + std::shared_lock lock(m_source_mutex); + if (m_up_source == nullptr || m_current_state == AvState::Stop) { + return false; + } + if (!SetState(AvState::Stop)) { + return false; + } + OnPlaybackStateChanged(AvState::Stop); + return m_up_source->Stop(); +} + +bool AvPlayerState::GetVideoData(SceAvPlayerFrameInfo& video_info) { + std::shared_lock lock(m_source_mutex); + if (m_up_source == nullptr) { + return false; + } + return m_up_source->GetVideoData(video_info); +} + +bool AvPlayerState::GetVideoData(SceAvPlayerFrameInfoEx& video_info) { + std::shared_lock lock(m_source_mutex); + if (m_up_source == nullptr) { + return false; + } + return m_up_source->GetVideoData(video_info); +} + +bool AvPlayerState::GetAudioData(SceAvPlayerFrameInfo& audio_info) { + std::shared_lock lock(m_source_mutex); + if (m_up_source == nullptr) { + return false; + } + return m_up_source->GetAudioData(audio_info); +} + +bool AvPlayerState::IsActive() { + std::shared_lock lock(m_source_mutex); + if (m_up_source == nullptr) { + return false; + } + return m_current_state != AvState::Stop && m_current_state != AvState::Error && + m_current_state != AvState::EndOfFile && m_up_source->IsActive(); +} + +u64 AvPlayerState::CurrentTime() { + std::shared_lock lock(m_source_mutex); + if (m_up_source == nullptr) { + LOG_ERROR(Lib_AvPlayer, "Could not get current time. No source."); + return 0; + } + return m_up_source->CurrentTime(); +} + +bool AvPlayerState::SetLooping(bool is_looping) { + std::shared_lock lock(m_source_mutex); + if (m_up_source == nullptr) { + LOG_ERROR(Lib_AvPlayer, "Could not set loop flag. No source."); + return false; + } + m_up_source->SetLooping(is_looping); + return true; +} + +// May be called from different threads +void AvPlayerState::OnWarning(u32 id) { + // Forward to CONTROLLER thread + WarningEvent(id); +} + +void AvPlayerState::OnError() { + SetState(AvState::Error); + OnPlaybackStateChanged(AvState::Error); +} + +void AvPlayerState::OnEOF() { + SetState(AvState::EndOfFile); +} + +// Called inside CONTROLLER thread +void AvPlayerState::OnPlaybackStateChanged(AvState state) { + switch (state) { + case AvState::Ready: { + EmitEvent(SCE_AVPLAYER_STATE_READY); + break; + } + case AvState::Play: { + EmitEvent(SCE_AVPLAYER_STATE_PLAY); + break; + } + case AvState::Stop: { + EmitEvent(SCE_AVPLAYER_STATE_STOP); + break; + } + case AvState::Pause: { + EmitEvent(SCE_AVPLAYER_STATE_PAUSE); + break; + } + case AvState::Buffering: { + EmitEvent(SCE_AVPLAYER_STATE_BUFFERING); + break; + } + default: + break; + } +} + +// Called inside CONTROLLER and GAME threads +bool AvPlayerState::SetState(AvState state) { + std::lock_guard guard(m_state_machine_mutex); + + if (!IsStateTransitionValid(state)) { + LOG_ERROR(Lib_AvPlayer, "Invalid state transition: {} -> {}", + magic_enum::enum_name(m_current_state.load()), magic_enum::enum_name(state)); + return false; + } + m_previous_state.store(m_current_state); + m_current_state.store(state); + return true; +} + +// Called inside CONTROLLER thread +std::optional AvPlayerState::OnBufferingCheckEvent(u32 num_frames) { + std::shared_lock lock(m_source_mutex); + if (!m_up_source) { + return std::nullopt; + } + return m_up_source->HasFrames(num_frames); +} + +// Called inside CONTROLLER thread +void AvPlayerState::EmitEvent(SceAvPlayerEvents event_id, void* event_data) { + LOG_INFO(Lib_AvPlayer, "Sending event to the game: id = {}", magic_enum::enum_name(event_id)); + const auto callback = m_init_data.event_replacement.event_callback; + if (callback) { + const auto ptr = m_init_data.event_replacement.object_ptr; + callback(ptr, event_id, 0, event_data); + } +} + +// Called inside CONTROLLER thread +void AvPlayerState::ProcessEvent() { + if (m_current_state == AvState::Jump) { + return; + } + + std::lock_guard guard(m_event_handler_mutex); + + auto event = m_event_queue.Pop(); + if (!event.has_value()) { + return; + } + switch (event->event) { + case AvEventType::WarningId: { + OnWarning(event->payload.error); + break; + } + case AvEventType::RevertState: { + SetState(m_previous_state.load()); + break; + } + case AvEventType::AddSource: { + std::shared_lock lock(m_source_mutex); + if (m_up_source->FindStreamInfo()) { + SetState(AvState::Ready); + OnPlaybackStateChanged(AvState::Ready); + } else { + OnWarning(ORBIS_AVPLAYER_ERROR_NOT_SUPPORTED); + SetState(AvState::Error); + } + break; + } + case AvEventType::Error: { + OnWarning(event->payload.error); + SetState(AvState::Error); + break; + } + default: + break; + } +} + +// Called inside CONTROLLER thread +void AvPlayerState::UpdateBufferingState() { + if (m_current_state == AvState::Buffering) { + const auto has_frames = OnBufferingCheckEvent(10); + if (!has_frames.has_value()) { + return; + } + if (has_frames.value()) { + const auto state = + m_previous_state >= AvState::C0x0B ? m_previous_state.load() : AvState::Play; + SetState(state); + OnPlaybackStateChanged(state); + } + } else if (m_current_state == AvState::Play) { + const auto has_frames = OnBufferingCheckEvent(0); + if (!has_frames.has_value()) { + return; + } + if (!has_frames.value()) { + SetState(AvState::Buffering); + OnPlaybackStateChanged(AvState::Buffering); + } + } +} + +bool AvPlayerState::IsStateTransitionValid(AvState state) { + switch (state) { + case AvState::Play: { + switch (m_current_state.load()) { + case AvState::Stop: + case AvState::EndOfFile: + // case AvState::C0x08: + case AvState::Error: + return false; + default: + return true; + } + } + case AvState::Pause: { + switch (m_current_state.load()) { + case AvState::Stop: + case AvState::EndOfFile: + // case AvState::C0x08: + case AvState::Starting: + case AvState::Error: + return false; + default: + return true; + } + } + case AvState::Jump: { + switch (m_current_state.load()) { + case AvState::Stop: + case AvState::EndOfFile: + // case AvState::C0x08: + case AvState::TrickMode: + case AvState::Starting: + case AvState::Error: + return false; + default: + return true; + } + } + case AvState::TrickMode: { + switch (m_current_state.load()) { + case AvState::Stop: + case AvState::EndOfFile: + // case AvState::C0x08: + case AvState::Jump: + case AvState::Starting: + case AvState::Error: + return false; + default: + return true; + } + } + case AvState::Buffering: { + switch (m_current_state.load()) { + case AvState::Stop: + case AvState::EndOfFile: + case AvState::Pause: + // case AvState::C0x08: + case AvState::Starting: + case AvState::Error: + return false; + default: + return true; + } + } + default: + return true; + } +} + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/avplayer/avplayer_state.h b/src/core/libraries/avplayer/avplayer_state.h new file mode 100644 index 000000000..ff80b6cea --- /dev/null +++ b/src/core/libraries/avplayer/avplayer_state.h @@ -0,0 +1,88 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "avplayer.h" +#include "avplayer_data_streamer.h" +#include "avplayer_source.h" + +#include "common/polyfill_thread.h" +#include "core/libraries/kernel/thread_management.h" + +#include +#include +#include + +namespace Libraries::AvPlayer { + +class Stream; +class AvDecoder; + +class AvPlayerState : public AvPlayerStateCallback { +public: + AvPlayerState(const SceAvPlayerInitData& init_data); + ~AvPlayerState(); + + s32 AddSource(std::string_view filename, SceAvPlayerSourceType source_type); + s32 GetStreamCount(); + s32 GetStreamInfo(u32 stream_index, SceAvPlayerStreamInfo& info); + bool EnableStream(u32 stream_index); + s32 Start(); + bool Stop(); + bool GetAudioData(SceAvPlayerFrameInfo& audio_info); + bool GetVideoData(SceAvPlayerFrameInfo& video_info); + bool GetVideoData(SceAvPlayerFrameInfoEx& video_info); + bool IsActive(); + u64 CurrentTime(); + bool SetLooping(bool is_looping); + +private: + using ScePthreadMutex = Kernel::ScePthreadMutex; + using ScePthread = Kernel::ScePthread; + + // Event Replacement + static void PS4_SYSV_ABI AutoPlayEventCallback(void* handle, s32 event_id, s32 source_id, + void* event_data); + + void OnWarning(u32 id) override; + void OnError() override; + void OnEOF() override; + + void OnPlaybackStateChanged(AvState state); + std::optional OnBufferingCheckEvent(u32 num_frames); + + void EmitEvent(SceAvPlayerEvents event_id, void* event_data = nullptr); + bool SetState(AvState state); + + void AvControllerThread(std::stop_token stop); + + void AddSourceEvent(); + void WarningEvent(s32 id); + + void StartControllerThread(); + void ProcessEvent(); + void UpdateBufferingState(); + bool IsStateTransitionValid(AvState state); + + std::unique_ptr m_up_source; + + SceAvPlayerInitData m_init_data{}; + SceAvPlayerEventReplacement m_event_replacement{}; + bool m_auto_start{}; + u8 m_default_language[4]{}; + + std::atomic m_current_state; + std::atomic m_previous_state; + u32 m_thread_priority; + u32 m_thread_affinity; + std::atomic_uint32_t m_some_event_result{}; + + std::shared_mutex m_source_mutex{}; + std::mutex m_state_machine_mutex{}; + std::mutex m_event_handler_mutex{}; + std::jthread m_controller_thread{}; + AvPlayerQueue m_event_queue{}; +}; + +} // namespace Libraries::AvPlayer diff --git a/src/core/libraries/error_codes.h b/src/core/libraries/error_codes.h index 63016213d..74aeef674 100644 --- a/src/core/libraries/error_codes.h +++ b/src/core/libraries/error_codes.h @@ -233,6 +233,9 @@ constexpr int SCE_KERNEL_ERROR_ESDKVERSION = 0x80020063; constexpr int SCE_KERNEL_ERROR_ESTART = 0x80020064; constexpr int SCE_KERNEL_ERROR_ESTOP = 0x80020065; +// libSceRandom error codes +constexpr int SCE_RANDOM_ERROR_INVALID = 0x817C0016; + // videoOut constexpr int SCE_VIDEO_OUT_ERROR_INVALID_VALUE = 0x80290001; // invalid argument constexpr int SCE_VIDEO_OUT_ERROR_INVALID_ADDRESS = 0x80290002; // invalid addresses @@ -454,5 +457,18 @@ constexpr int ORBIS_NP_TROPHY_ERROR_HANDLE_EXCEEDS_MAX = 0x80551624; constexpr int ORBIS_NP_TROPHY_ERROR_CONTEXT_ALREADY_EXISTS = 0x80551613; constexpr int ORBIS_NP_TROPHY_ERROR_CONTEXT_EXCEEDS_MAX = 0x80551622; +// AvPlayer library +constexpr int ORBIS_AVPLAYER_ERROR_INVALID_PARAMS = 0x806A0001; +constexpr int ORBIS_AVPLAYER_ERROR_OPERATION_FAILED = 0x806A0002; +constexpr int ORBIS_AVPLAYER_ERROR_NO_MEMORY = 0x806A0003; +constexpr int ORBIS_AVPLAYER_ERROR_NOT_SUPPORTED = 0x806A0004; +constexpr int ORBIS_AVPLAYER_ERROR_WAR_FILE_NONINTERLEAVED = 0x806A00A0; +constexpr int ORBIS_AVPLAYER_ERROR_WAR_LOOPING_BACK = 0x806A00A1; +constexpr int ORBIS_AVPLAYER_ERROR_WAR_JUMP_COMPLETE = 0x806A00A3; +constexpr int ORBIS_AVPLAYER_ERROR_INFO_MARLIN_ENCRY = 0x806A00B0; +constexpr int ORBIS_AVPLAYER_ERROR_INFO_PLAYREADY_ENCRY = 0x806A00B4; +constexpr int ORBIS_AVPLAYER_ERROR_INFO_AES_ENCRY = 0x806A00B5; +constexpr int ORBIS_AVPLAYER_ERROR_INFO_OTHER_ENCRY = 0x806A00BF; + // AppContent library constexpr int ORBIS_APP_CONTENT_ERROR_PARAMETER = 0x80D90002; diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 866a96989..c2ee6d592 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -20,13 +20,12 @@ extern Frontend::WindowSDL* g_window; std::unique_ptr renderer; +std::unique_ptr liverpool; namespace Libraries::GnmDriver { using namespace AmdGpu; -static std::unique_ptr liverpool; - enum GnmEventIdents : u64 { Compute0RelMem = 0x00, Compute1RelMem = 0x01, @@ -957,9 +956,9 @@ int PS4_SYSV_ABI sceGnmGetGpuBlockStatus() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; +u32 PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() { + LOG_TRACE(Lib_GnmDriver, "called"); + return Config::isNeoMode() ? 911'000'000 : 800'000'000; } int PS4_SYSV_ABI sceGnmGetGpuInfoStatus() { @@ -1707,8 +1706,18 @@ int PS4_SYSV_ABI sceGnmSetupMipStatsReport() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmSetVgtControl() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); +s32 PS4_SYSV_ABI sceGnmSetVgtControl(u32* cmdbuf, u32 size, u32 prim_group_sz_minus_one, + u32 partial_vs_wave_mode, u32 wd_switch_only_on_eop_mode) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (!cmdbuf || size != 3 || (prim_group_sz_minus_one >= 0x100) || + ((wd_switch_only_on_eop_mode | partial_vs_wave_mode) >= 2)) { + return -1; + } + + const u32 reg_value = + ((partial_vs_wave_mode & 1) << 0x10) | (prim_group_sz_minus_one & 0xffffu); + PM4CmdSetData::SetContextReg(cmdbuf, 0x2aau, reg_value); // IA_MULTI_VGT_PARAM return ORBIS_OK; } @@ -2131,6 +2140,7 @@ int PS4_SYSV_ABI sceGnmSubmitDone() { if (!liverpool->IsGpuIdle()) { submission_lock = true; } + liverpool->SubmitDone(); send_init_packet = true; ++frames_submitted; return ORBIS_OK; diff --git a/src/core/libraries/gnmdriver/gnmdriver.h b/src/core/libraries/gnmdriver/gnmdriver.h index 8100b1164..84872297e 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.h +++ b/src/core/libraries/gnmdriver/gnmdriver.h @@ -85,7 +85,7 @@ int PS4_SYSV_ABI sceGnmGetDebugTimestamp(); int PS4_SYSV_ABI sceGnmGetEqEventType(); int PS4_SYSV_ABI sceGnmGetEqTimeStamp(); int PS4_SYSV_ABI sceGnmGetGpuBlockStatus(); -int PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency(); +u32 PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency(); int PS4_SYSV_ABI sceGnmGetGpuInfoStatus(); int PS4_SYSV_ABI sceGnmGetLastWaitedAddress(); int PS4_SYSV_ABI sceGnmGetNumTcaUnits(); @@ -161,7 +161,8 @@ int PS4_SYSV_ABI sceGnmSetResourceUserData(); int PS4_SYSV_ABI sceGnmSetSpiEnableSqCounters(); int PS4_SYSV_ABI sceGnmSetSpiEnableSqCountersForUnitInstance(); int PS4_SYSV_ABI sceGnmSetupMipStatsReport(); -int PS4_SYSV_ABI sceGnmSetVgtControl(); +s32 PS4_SYSV_ABI sceGnmSetVgtControl(u32* cmdbuf, u32 size, u32 prim_group_sz_minus_one, + u32 partial_vs_wave_mode, u32 wd_switch_only_on_eop_mode); s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u32 shader_modifier); int PS4_SYSV_ABI sceGnmSetWaveLimitMultiplier(); int PS4_SYSV_ABI sceGnmSetWaveLimitMultipliers(); diff --git a/src/core/libraries/kernel/event_queue.cpp b/src/core/libraries/kernel/event_queue.cpp index 6bd88459b..3555fddc9 100644 --- a/src/core/libraries/kernel/event_queue.cpp +++ b/src/core/libraries/kernel/event_queue.cpp @@ -78,9 +78,7 @@ bool EqueueInternal::TriggerEvent(u64 ident, s16 filter, void* trigger_data) { std::scoped_lock lock{m_mutex}; for (auto& event : m_events) { - ASSERT_MSG(event.event.filter == filter, - "Event to trigger doesn't match to queue events"); - if (event.event.ident == ident) { + if ((event.event.ident == ident) && (event.event.filter == filter)) { event.Trigger(trigger_data); has_found = true; } diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index 8734b9649..f83863479 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -53,6 +53,9 @@ int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) { if (std::string_view{path} == "/dev/stdout") { return 2002; } + if (std::string_view{path} == "/dev/urandom") { + return 2003; + } u32 handle = h->CreateHandle(); auto* file = h->GetFile(handle); if (directory) { @@ -113,6 +116,9 @@ int PS4_SYSV_ABI sceKernelClose(int d) { if (d < 3) { // d probably hold an error code return ORBIS_KERNEL_ERROR_EPERM; } + if (d == 2003) { // dev/urandom case + return SCE_OK; + } auto* h = Common::Singleton::Instance(); auto* file = h->GetFile(d); if (file == nullptr) { @@ -223,6 +229,13 @@ s64 PS4_SYSV_ABI posix_lseek(int d, s64 offset, int whence) { } s64 PS4_SYSV_ABI sceKernelRead(int d, void* buf, size_t nbytes) { + if (d == 2003) // dev urandom case + { + auto rbuf = static_cast(buf); + for (size_t i = 0; i < nbytes; i++) + rbuf[i] = std::rand() & 0xFF; + return nbytes; + } auto* h = Common::Singleton::Instance(); auto* file = h->GetFile(d); if (file == nullptr) { @@ -459,7 +472,30 @@ s64 PS4_SYSV_ABI sceKernelPwrite(int d, void* buf, size_t nbytes, s64 offset) { return file->f.WriteRaw(buf, nbytes); } +s32 PS4_SYSV_ABI sceKernelRename(const char* from, const char* to) { + auto* mnt = Common::Singleton::Instance(); + const auto src_path = mnt->GetHostPath(from); + if (!std::filesystem::exists(src_path)) { + return ORBIS_KERNEL_ERROR_ENOENT; + } + const auto dst_path = mnt->GetHostPath(to); + const bool src_is_dir = std::filesystem::is_directory(src_path); + const bool dst_is_dir = std::filesystem::is_directory(dst_path); + if (src_is_dir && !dst_is_dir) { + return ORBIS_KERNEL_ERROR_ENOTDIR; + } + if (!src_is_dir && dst_is_dir) { + return ORBIS_KERNEL_ERROR_EISDIR; + } + if (dst_is_dir && !std::filesystem::is_empty(dst_path)) { + return ORBIS_KERNEL_ERROR_ENOTEMPTY; + } + std::filesystem::copy(src_path, dst_path, std::filesystem::copy_options::overwrite_existing); + return ORBIS_OK; +} + void fileSystemSymbolsRegister(Core::Loader::SymbolsResolver* sym) { + std::srand(std::time(nullptr)); LIB_FUNCTION("1G3lF1Gg1k8", "libkernel", 1, "libkernel", 1, 1, sceKernelOpen); LIB_FUNCTION("wuCroIGjt2g", "libScePosix", 1, "libkernel", 1, 1, posix_open); LIB_FUNCTION("UK2Tl2DWUns", "libkernel", 1, "libkernel", 1, 1, sceKernelClose); @@ -479,6 +515,7 @@ void fileSystemSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("kBwCPsYX-m4", "libkernel", 1, "libkernel", 1, 1, sceKernelFStat); LIB_FUNCTION("mqQMh1zPPT8", "libScePosix", 1, "libkernel", 1, 1, posix_fstat); LIB_FUNCTION("VW3TVZiM4-E", "libkernel", 1, "libkernel", 1, 1, sceKernelFtruncate); + LIB_FUNCTION("52NcYU9+lEo", "libkernel", 1, "libkernel", 1, 1, sceKernelRename); LIB_FUNCTION("E6ao34wPw+U", "libScePosix", 1, "libkernel", 1, 1, posix_stat); LIB_FUNCTION("+r3rMFwItV4", "libkernel", 1, "libkernel", 1, 1, sceKernelPread); diff --git a/src/core/libraries/kernel/libkernel.cpp b/src/core/libraries/kernel/libkernel.cpp index f44d928bb..9657ba04b 100644 --- a/src/core/libraries/kernel/libkernel.cpp +++ b/src/core/libraries/kernel/libkernel.cpp @@ -7,6 +7,7 @@ #include #include "common/assert.h" +#include "common/debug.h" #include "common/logging/log.h" #include "common/polyfill_thread.h" #include "common/singleton.h" @@ -84,6 +85,9 @@ static PS4_SYSV_ABI void stack_chk_fail() { int PS4_SYSV_ABI sceKernelMunmap(void* addr, size_t len) { LOG_INFO(Kernel_Vmm, "addr = {}, len = {:#x}", fmt::ptr(addr), len); + if (len == 0) { + return ORBIS_OK; + } auto* memory = Core::Memory::Instance(); memory->UnmapMemory(std::bit_cast(addr), len); return SCE_OK; @@ -356,7 +360,6 @@ int PS4_SYSV_ABI posix_connect() { } int PS4_SYSV_ABI _sigprocmask() { - LOG_DEBUG(Lib_Kernel, "STUBBED"); return ORBIS_OK; } @@ -405,6 +408,10 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("VOx8NGmHXTs", "libkernel", 1, "libkernel", 1, 1, sceKernelGetCpumode); LIB_FUNCTION("Xjoosiw+XPI", "libkernel", 1, "libkernel", 1, 1, sceKernelUuidCreate); + LIB_FUNCTION("2SKEx6bSq-4", "libkernel", 1, "libkernel", 1, 1, sceKernelBatchMap); + LIB_FUNCTION("kBJzF8x4SyE", "libkernel", 1, "libkernel", 1, 1, sceKernelBatchMap2); + LIB_FUNCTION("DGMG3JshrZU", "libkernel", 1, "libkernel", 1, 1, sceKernelSetVirtualRangeName); + // equeue LIB_FUNCTION("D0OdFMjp46I", "libkernel", 1, "libkernel", 1, 1, sceKernelCreateEqueue); LIB_FUNCTION("jpFjmgAC5AE", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteEqueue); diff --git a/src/core/libraries/kernel/memory_management.cpp b/src/core/libraries/kernel/memory_management.cpp index cdee3f465..826d47973 100644 --- a/src/core/libraries/kernel/memory_management.cpp +++ b/src/core/libraries/kernel/memory_management.cpp @@ -3,6 +3,7 @@ #include #include "common/alignment.h" +#include "common/assert.h" #include "common/logging/log.h" #include "common/singleton.h" #include "core/libraries/error_codes.h" @@ -73,13 +74,22 @@ s32 PS4_SYSV_ABI sceKernelAvailableDirectMemorySize(u64 searchStart, u64 searchE size_t* sizeOut) { LOG_WARNING(Kernel_Vmm, "called searchStart = {:#x}, searchEnd = {:#x}, alignment = {:#x}", searchStart, searchEnd, alignment); + + if (searchEnd <= searchStart) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + if (searchEnd > SCE_KERNEL_MAIN_DMEM_SIZE) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + auto* memory = Core::Memory::Instance(); PAddr physAddr; - s32 size = memory->DirectQueryAvailable(searchStart, searchEnd, alignment, &physAddr, sizeOut); + s32 result = + memory->DirectQueryAvailable(searchStart, searchEnd, alignment, &physAddr, sizeOut); *physAddrOut = static_cast(physAddr); - return size; + return result; } s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtualQueryInfo* info, @@ -211,9 +221,9 @@ s32 PS4_SYSV_ABI sceKernelAvailableFlexibleMemorySize(size_t* out_size) { return ORBIS_OK; } -void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func) { +void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func[]) { auto* linker = Common::Singleton::Instance(); - linker->SetHeapApiFunc(func); + linker->SetHeapAPI(func); } int PS4_SYSV_ABI sceKernelGetDirectMemoryType(u64 addr, int* directMemoryTypeOut, @@ -225,4 +235,77 @@ int PS4_SYSV_ABI sceKernelGetDirectMemoryType(u64 addr, int* directMemoryTypeOut directMemoryEndOut); } +s32 PS4_SYSV_ABI sceKernelBatchMap(OrbisKernelBatchMapEntry* entries, int numEntries, + int* numEntriesOut) { + return sceKernelBatchMap2(entries, numEntries, numEntriesOut, + MemoryFlags::SCE_KERNEL_MAP_FIXED); // 0x10, 0x410? +} + +int PS4_SYSV_ABI sceKernelMunmap(void* addr, size_t len); + +s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEntries, + int* numEntriesOut, int flags) { + int processed = 0; + int result = 0; + for (int i = 0; i < numEntries; i++) { + if (entries == nullptr || entries[i].length == 0 || entries[i].operation > 4) { + result = ORBIS_KERNEL_ERROR_EINVAL; + break; // break and assign a value to numEntriesOut. + } + + if (entries[i].operation == MemoryOpTypes::ORBIS_KERNEL_MAP_OP_MAP_DIRECT) { + result = sceKernelMapNamedDirectMemory(&entries[i].start, entries[i].length, + entries[i].protection, flags, + static_cast(entries[i].offset), 0, ""); + LOG_INFO( + Kernel_Vmm, + "BatchMap: entry = {}, operation = {}, len = {:#x}, offset = {:#x}, type = {}, " + "result = {}", + i, entries[i].operation, entries[i].length, entries[i].offset, (u8)entries[i].type, + result); + + if (result == 0) + processed++; + } else if (entries[i].operation == MemoryOpTypes::ORBIS_KERNEL_MAP_OP_UNMAP) { + result = sceKernelMunmap(entries[i].start, entries[i].length); + LOG_INFO(Kernel_Vmm, "BatchMap: entry = {}, operation = {}, len = {:#x}, result = {}", + i, entries[i].operation, entries[i].length, result); + + if (result == 0) + processed++; + } else if (entries[i].operation == MemoryOpTypes::ORBIS_KERNEL_MAP_OP_MAP_FLEXIBLE) { + result = sceKernelMapNamedFlexibleMemory(&entries[i].start, entries[i].length, + entries[i].protection, flags, ""); + LOG_INFO(Kernel_Vmm, + "BatchMap: entry = {}, operation = {}, len = {:#x}, type = {}, " + "result = {}", + i, entries[i].operation, entries[i].length, (u8)entries[i].type, result); + + if (result == 0) + processed++; + } else { + UNREACHABLE_MSG("called: Unimplemented Operation = {}", entries[i].operation); + } + } + if (numEntriesOut != NULL) { // can be zero. do not return an error code. + *numEntriesOut = processed; + } + return result; +} + +s32 PS4_SYSV_ABI sceKernelSetVirtualRangeName(const void* addr, size_t len, const char* name) { + static constexpr size_t MaxNameSize = 32; + if (std::strlen(name) > MaxNameSize) { + LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!"); + return ORBIS_KERNEL_ERROR_ENAMETOOLONG; + } + + if (name == nullptr) { + LOG_ERROR(Kernel_Vmm, "name is invalid!"); + return ORBIS_KERNEL_ERROR_EFAULT; + } + auto* memory = Core::Memory::Instance(); + memory->NameVirtualRange(std::bit_cast(addr), len, name); + return ORBIS_OK; +} } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/memory_management.h b/src/core/libraries/kernel/memory_management.h index 2a17f6ed8..378449cc5 100644 --- a/src/core/libraries/kernel/memory_management.h +++ b/src/core/libraries/kernel/memory_management.h @@ -6,7 +6,7 @@ #include "common/bit_field.h" #include "common/types.h" -constexpr u64 SCE_KERNEL_MAIN_DMEM_SIZE = 5376_MB; // ~ 6GB +constexpr u64 SCE_KERNEL_MAIN_DMEM_SIZE = 6_GB; // ~ 6GB namespace Libraries::Kernel { @@ -31,6 +31,14 @@ enum MemoryProtection : u32 { SCE_KERNEL_PROT_GPU_RW = 0x30 // Permit reads/writes from the GPU }; +enum MemoryOpTypes : u32 { + ORBIS_KERNEL_MAP_OP_MAP_DIRECT = 0, + ORBIS_KERNEL_MAP_OP_UNMAP = 1, + ORBIS_KERNEL_MAP_OP_PROTECT = 2, + ORBIS_KERNEL_MAP_OP_MAP_FLEXIBLE = 3, + ORBIS_KERNEL_MAP_OP_TYPE_PROTECT = 4 +}; + struct OrbisQueryInfo { uintptr_t start; uintptr_t end; @@ -53,6 +61,16 @@ struct OrbisVirtualQueryInfo { std::array name; }; +struct OrbisKernelBatchMapEntry { + void* start; + size_t offset; + size_t length; + char protection; + char type; + short reserved; + int operation; +}; + u64 PS4_SYSV_ABI sceKernelGetDirectMemorySize(); int PS4_SYSV_ABI sceKernelAllocateDirectMemory(s64 searchStart, s64 searchEnd, u64 len, u64 alignment, int memoryType, s64* physAddrOut); @@ -80,9 +98,16 @@ int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info, size_t infoSize); s32 PS4_SYSV_ABI sceKernelAvailableFlexibleMemorySize(size_t* sizeOut); -void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func); +void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func[]); int PS4_SYSV_ABI sceKernelGetDirectMemoryType(u64 addr, int* directMemoryTypeOut, void** directMemoryStartOut, void** directMemoryEndOut); +s32 PS4_SYSV_ABI sceKernelBatchMap(OrbisKernelBatchMapEntry* entries, int numEntries, + int* numEntriesOut); +s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEntries, + int* numEntriesOut, int flags); + +s32 PS4_SYSV_ABI sceKernelSetVirtualRangeName(const void* addr, size_t len, const char* name); + } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index e536412fd..6319b7c2f 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -394,6 +394,18 @@ int PS4_SYSV_ABI scePthreadSetaffinity(ScePthread thread, const /*SceKernelCpuma return result; } +int PS4_SYSV_ABI scePthreadGetaffinity(ScePthread thread, /*SceKernelCpumask*/ u64* mask) { + LOG_INFO(Kernel_Pthread, "called"); + + if (thread == nullptr) { + return SCE_KERNEL_ERROR_ESRCH; + } + + auto result = scePthreadAttrGetaffinity(&thread->attr, mask); + + return result; +} + ScePthreadMutex* createMutex(ScePthreadMutex* addr) { if (addr == nullptr || *addr != nullptr) { return addr; @@ -409,13 +421,21 @@ ScePthreadMutex* createMutex(ScePthreadMutex* addr) { return addr; } -int PS4_SYSV_ABI scePthreadMutexInit(ScePthreadMutex* mutex, const ScePthreadMutexattr* attr, +int PS4_SYSV_ABI scePthreadMutexInit(ScePthreadMutex* mutex, const ScePthreadMutexattr* mutex_attr, const char* name) { + const ScePthreadMutexattr* attr; + if (mutex == nullptr) { return SCE_KERNEL_ERROR_EINVAL; } - if (attr == nullptr) { + if (mutex_attr == nullptr) { attr = g_pthread_cxt->getDefaultMutexattr(); + } else { + if (*mutex_attr == nullptr) { + attr = g_pthread_cxt->getDefaultMutexattr(); + } else { + attr = mutex_attr; + } } *mutex = new PthreadMutexInternal{}; @@ -427,11 +447,7 @@ int PS4_SYSV_ABI scePthreadMutexInit(ScePthreadMutex* mutex, const ScePthreadMut int result = pthread_mutex_init(&(*mutex)->pth_mutex, &(*attr)->pth_mutex_attr); - static auto mutex_loc = MUTEX_LOCATION("mutex"); - (*mutex)->tracy_lock = std::make_unique(&mutex_loc); - if (name != nullptr) { - (*mutex)->tracy_lock->CustomName(name, std::strlen(name)); LOG_INFO(Kernel_Pthread, "name={}, result={}", name, result); } @@ -457,7 +473,7 @@ int PS4_SYSV_ABI scePthreadMutexDestroy(ScePthreadMutex* mutex) { int result = pthread_mutex_destroy(&(*mutex)->pth_mutex); - LOG_INFO(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result); + LOG_DEBUG(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result); delete *mutex; *mutex = nullptr; @@ -543,15 +559,11 @@ int PS4_SYSV_ABI scePthreadMutexLock(ScePthreadMutex* mutex) { return SCE_KERNEL_ERROR_EINVAL; } - (*mutex)->tracy_lock->BeforeLock(); - int result = pthread_mutex_lock(&(*mutex)->pth_mutex); if (result != 0) { LOG_TRACE(Kernel_Pthread, "Locked name={}, result={}", (*mutex)->name, result); } - (*mutex)->tracy_lock->AfterLock(); - switch (result) { case 0: return SCE_OK; @@ -577,8 +589,6 @@ int PS4_SYSV_ABI scePthreadMutexUnlock(ScePthreadMutex* mutex) { LOG_TRACE(Kernel_Pthread, "Unlocking name={}, result={}", (*mutex)->name, result); } - (*mutex)->tracy_lock->AfterUnlock(); - switch (result) { case 0: return SCE_OK; @@ -723,7 +733,10 @@ int PS4_SYSV_ABI scePthreadCondDestroy(ScePthreadCond* cond) { } int result = pthread_cond_destroy(&(*cond)->cond); - LOG_INFO(Kernel_Pthread, "scePthreadCondDestroy, result={}", result); + LOG_DEBUG(Kernel_Pthread, "scePthreadCondDestroy, result={}", result); + + delete *cond; + *cond = nullptr; switch (result) { case 0: @@ -785,14 +798,27 @@ int PS4_SYSV_ABI posix_pthread_mutex_destroy(ScePthreadMutex* mutex) { int PS4_SYSV_ABI posix_pthread_cond_wait(ScePthreadCond* cond, ScePthreadMutex* mutex) { int result = scePthreadCondWait(cond, mutex); if (result < 0) { - UNREACHABLE(); + int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP + ? result + -SCE_KERNEL_ERROR_UNKNOWN + : POSIX_EOTHER; + return rt; + } + return result; +} + +int PS4_SYSV_ABI posix_pthread_cond_timedwait(ScePthreadCond* cond, ScePthreadMutex* mutex, + u64 usec) { + int result = scePthreadCondTimedwait(cond, mutex, usec); + if (result < 0) { + int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP + ? result + -SCE_KERNEL_ERROR_UNKNOWN + : POSIX_EOTHER; + return rt; } return result; } int PS4_SYSV_ABI posix_pthread_cond_broadcast(ScePthreadCond* cond) { - LOG_INFO(Kernel_Pthread, - "posix posix_pthread_cond_broadcast redirect to scePthreadCondBroadcast"); int result = scePthreadCondBroadcast(cond); if (result != 0) { int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP @@ -804,7 +830,6 @@ int PS4_SYSV_ABI posix_pthread_cond_broadcast(ScePthreadCond* cond) { } int PS4_SYSV_ABI posix_pthread_mutexattr_init(ScePthreadMutexattr* attr) { - // LOG_INFO(Kernel_Pthread, "posix pthread_mutexattr_init redirect to scePthreadMutexattrInit"); int result = scePthreadMutexattrInit(attr); if (result < 0) { int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP @@ -816,7 +841,6 @@ int PS4_SYSV_ABI posix_pthread_mutexattr_init(ScePthreadMutexattr* attr) { } int PS4_SYSV_ABI posix_pthread_mutexattr_settype(ScePthreadMutexattr* attr, int type) { - // LOG_INFO(Kernel_Pthread, "posix pthread_mutex_init redirect to scePthreadMutexInit"); int result = scePthreadMutexattrSettype(attr, type); if (result < 0) { int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP @@ -841,7 +865,6 @@ int PS4_SYSV_ABI posix_pthread_once(pthread_once_t* once_control, void (*init_ro int PS4_SYSV_ABI posix_pthread_mutexattr_setprotocol(ScePthreadMutexattr* attr, int protocol) { int result = scePthreadMutexattrSetprotocol(attr, protocol); - LOG_INFO(Kernel_Pthread, "redirect to scePthreadMutexattrSetprotocol: result = {}", result); if (result < 0) { UNREACHABLE(); } @@ -1071,6 +1094,19 @@ int PS4_SYSV_ABI scePthreadAttrGetstack(ScePthreadAttr* attr, void** addr, size_ return SCE_KERNEL_ERROR_EINVAL; } +int PS4_SYSV_ABI scePthreadAttrSetstack(ScePthreadAttr* attr, void* addr, size_t size) { + if (attr == nullptr || *attr == nullptr || addr == nullptr || size < 0x4000) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + int result = pthread_attr_setstack(&(*attr)->pth_attr, addr, size); + LOG_INFO(Kernel_Pthread, "scePthreadAttrSetstack: result = {}", result); + + if (result == 0) { + return ORBIS_OK; + } + return ORBIS_KERNEL_ERROR_EINVAL; +} + int PS4_SYSV_ABI scePthreadJoin(ScePthread thread, void** res) { int result = pthread_join(thread->pth, res); LOG_INFO(Kernel_Pthread, "scePthreadJoin result = {}", result); @@ -1125,7 +1161,7 @@ int PS4_SYSV_ABI scePthreadCondWait(ScePthreadCond* cond, ScePthreadMutex* mutex } int result = pthread_cond_wait(&(*cond)->cond, &(*mutex)->pth_mutex); - LOG_INFO(Kernel_Pthread, "scePthreadCondWait, result={}", result); + LOG_DEBUG(Kernel_Pthread, "scePthreadCondWait, result={}", result); switch (result) { case 0: @@ -1145,7 +1181,7 @@ int PS4_SYSV_ABI scePthreadCondattrDestroy(ScePthreadCondattr* attr) { } int result = pthread_condattr_destroy(&(*attr)->cond_attr); - LOG_INFO(Kernel_Pthread, "scePthreadCondattrDestroy: result = {} ", result); + LOG_DEBUG(Kernel_Pthread, "scePthreadCondattrDestroy: result = {} ", result); switch (result) { case 0: @@ -1168,8 +1204,6 @@ int PS4_SYSV_ABI scePthreadMutexTrylock(ScePthreadMutex* mutex) { LOG_TRACE(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result); } - (*mutex)->tracy_lock->AfterTryLock(result == 0); - switch (result) { case 0: return ORBIS_OK; @@ -1228,6 +1262,40 @@ int PS4_SYSV_ABI posix_pthread_attr_destroy(ScePthreadAttr* attr) { return result; } +int PS4_SYSV_ABI posix_pthread_attr_setschedparam(ScePthreadAttr* attr, + const SceKernelSchedParam* param) { + int result = scePthreadAttrSetschedparam(attr, param); + if (result < 0) { + int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP + ? result + -SCE_KERNEL_ERROR_UNKNOWN + : POSIX_EOTHER; + return rt; + } + return result; +} + +int PS4_SYSV_ABI posix_pthread_attr_setinheritsched(ScePthreadAttr* attr, int inheritSched) { + int result = scePthreadAttrSetinheritsched(attr, inheritSched); + if (result < 0) { + int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP + ? result + -SCE_KERNEL_ERROR_UNKNOWN + : POSIX_EOTHER; + return rt; + } + return result; +} + +int PS4_SYSV_ABI posix_pthread_setprio(ScePthread thread, int prio) { + int result = scePthreadSetprio(thread, prio); + if (result < 0) { + int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP + ? result + -SCE_KERNEL_ERROR_UNKNOWN + : POSIX_EOTHER; + return rt; + } + return result; +} + int PS4_SYSV_ABI posix_pthread_attr_setdetachstate(ScePthreadAttr* attr, int detachstate) { // LOG_INFO(Kernel_Pthread, "posix pthread_mutexattr_init redirect to scePthreadMutexattrInit"); int result = scePthreadAttrSetdetachstate(attr, detachstate); @@ -1243,8 +1311,6 @@ int PS4_SYSV_ABI posix_pthread_attr_setdetachstate(ScePthreadAttr* attr, int det int PS4_SYSV_ABI posix_pthread_create_name_np(ScePthread* thread, const ScePthreadAttr* attr, PthreadEntryFunc start_routine, void* arg, const char* name) { - LOG_INFO(Kernel_Pthread, "posix pthread_create redirect to scePthreadCreate: name = {}", name); - int result = scePthreadCreate(thread, attr, start_routine, arg, name); if (result != 0) { int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP @@ -1291,17 +1357,11 @@ int PS4_SYSV_ABI posix_pthread_cond_init(ScePthreadCond* cond, const ScePthreadC int PS4_SYSV_ABI posix_pthread_cond_signal(ScePthreadCond* cond) { int result = scePthreadCondSignal(cond); - LOG_INFO(Kernel_Pthread, - "posix posix_pthread_cond_signal redirect to scePthreadCondSignal, result = {}", - result); return result; } int PS4_SYSV_ABI posix_pthread_cond_destroy(ScePthreadCond* cond) { int result = scePthreadCondDestroy(cond); - LOG_INFO(Kernel_Pthread, - "posix posix_pthread_cond_destroy redirect to scePthreadCondDestroy, result = {}", - result); return result; } @@ -1321,14 +1381,60 @@ int PS4_SYSV_ABI posix_sem_wait(sem_t* sem) { return sem_wait(sem); } +int PS4_SYSV_ABI posix_sem_trywait(sem_t* sem) { + return sem_trywait(sem); +} + +#ifndef HAVE_SEM_TIMEDWAIT +int sem_timedwait(sem_t* sem, const struct timespec* abstime) { + int rc; + while ((rc = sem_trywait(sem)) == EAGAIN) { + struct timespec curr_time; + clock_gettime(CLOCK_REALTIME, &curr_time); + + s64 remaining_ns = 0; + remaining_ns += + (static_cast(abstime->tv_sec) - static_cast(curr_time.tv_sec)) * 1000000000L; + remaining_ns += static_cast(abstime->tv_nsec) - static_cast(curr_time.tv_nsec); + + if (remaining_ns <= 0) { + return ETIMEDOUT; + } + + struct timespec sleep_time; + sleep_time.tv_sec = 0; + if (remaining_ns < 5000000L) { + sleep_time.tv_nsec = remaining_ns; + } else { + sleep_time.tv_nsec = 5000000; + } + + nanosleep(&sleep_time, nullptr); + } + return rc; +} +#endif + +int PS4_SYSV_ABI posix_sem_timedwait(sem_t* sem, const timespec* t) { + return sem_timedwait(sem, t); +} + int PS4_SYSV_ABI posix_sem_post(sem_t* sem) { return sem_post(sem); } +int PS4_SYSV_ABI posix_sem_destroy(sem_t* sem) { + return sem_destroy(sem); +} + int PS4_SYSV_ABI posix_sem_getvalue(sem_t* sem, int* sval) { return sem_getvalue(sem, sval); } +int PS4_SYSV_ABI posix_pthread_attr_getstacksize(const pthread_attr_t* attr, size_t* size) { + return pthread_attr_getstacksize(attr, size); +} + int PS4_SYSV_ABI scePthreadGetschedparam(ScePthread thread, int* policy, SceKernelSchedParam* param) { return pthread_getschedparam(thread->pth, policy, param); @@ -1350,6 +1456,11 @@ int PS4_SYSV_ABI scePthreadOnce(int* once_control, void (*init_routine)(void)) { UNREACHABLE(); } +[[noreturn]] void PS4_SYSV_ABI posix_pthread_exit(void* value_ptr) { + pthread_exit(value_ptr); + UNREACHABLE(); +} + int PS4_SYSV_ABI scePthreadGetthreadid() { return (int)(size_t)g_pthread_self; } @@ -1383,6 +1494,26 @@ int PS4_SYSV_ABI posix_pthread_condattr_setclock(ScePthreadCondattr* attr, clock return SCE_OK; } +int PS4_SYSV_ABI posix_pthread_getschedparam(ScePthread thread, int* policy, + SceKernelSchedParam* param) { + return scePthreadGetschedparam(thread, policy, param); +} + +int PS4_SYSV_ABI posix_pthread_setschedparam(ScePthread thread, int policy, + const SceKernelSchedParam* param) { + return scePthreadSetschedparam(thread, policy, param); +} + +int PS4_SYSV_ABI posix_pthread_attr_getschedpolicy(const ScePthreadAttr* attr, int* policy) { + return scePthreadAttrGetschedpolicy(attr, policy); +} + +int PS4_SYSV_ABI scePthreadRename(ScePthread thread, const char* name) { + thread->name = name; + LOG_INFO(Kernel_Pthread, "scePthreadRename: name = {}", thread->name); + return SCE_OK; +} + void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("lZzFeSxPl08", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_setcancelstate); LIB_FUNCTION("0TyVk4MSLt0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_init); @@ -1393,6 +1524,7 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("WrOLvHU0yQM", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_setspecific); LIB_FUNCTION("4+h9EzwKF4I", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetschedpolicy); LIB_FUNCTION("-Wreprtu0Qs", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetdetachstate); + LIB_FUNCTION("JaRMy+QcpeU", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetdetachstate); LIB_FUNCTION("eXbUSpEaTsA", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetinheritsched); LIB_FUNCTION("DzES9hQF4f4", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetschedparam); LIB_FUNCTION("nsYoNRywwNg", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrInit); @@ -1401,11 +1533,15 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("4qGrR6eoP9Y", "libkernel", 1, "libkernel", 1, 1, scePthreadDetach); LIB_FUNCTION("3PtV6p3QNX4", "libkernel", 1, "libkernel", 1, 1, scePthreadEqual); LIB_FUNCTION("3kg7rT0NQIs", "libkernel", 1, "libkernel", 1, 1, scePthreadExit); + LIB_FUNCTION("FJrT5LuUBAU", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_exit); LIB_FUNCTION("7Xl257M4VNI", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_equal); LIB_FUNCTION("h9CcP3J0oVM", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_join); LIB_FUNCTION("EI-5-jlq2dE", "libkernel", 1, "libkernel", 1, 1, scePthreadGetthreadid); LIB_FUNCTION("1tKyG7RlMJo", "libkernel", 1, "libkernel", 1, 1, scePthreadGetprio); LIB_FUNCTION("W0Hpm2X0uPE", "libkernel", 1, "libkernel", 1, 1, scePthreadSetprio); + LIB_FUNCTION("GBUY7ywdULE", "libkernel", 1, "libkernel", 1, 1, scePthreadRename); + LIB_FUNCTION("F+yfmduIBB8", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetstackaddr); + LIB_FUNCTION("El+cQ20DynU", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetguardsize); LIB_FUNCTION("aI+OeCz8xrQ", "libkernel", 1, "libkernel", 1, 1, scePthreadSelf); LIB_FUNCTION("EotR8a3ASf4", "libkernel", 1, "libkernel", 1, 1, posix_pthread_self); @@ -1421,11 +1557,13 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("OxhIB8LB-PQ", "libkernel", 1, "libkernel", 1, 1, posix_pthread_create); LIB_FUNCTION("OxhIB8LB-PQ", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_create); LIB_FUNCTION("bt3CTBKmGyI", "libkernel", 1, "libkernel", 1, 1, scePthreadSetaffinity); + LIB_FUNCTION("rcrVFJsQWRY", "libkernel", 1, "libkernel", 1, 1, scePthreadGetaffinity); LIB_FUNCTION("6UgtwV+0zb4", "libkernel", 1, "libkernel", 1, 1, scePthreadCreate); LIB_FUNCTION("T72hz6ffq08", "libkernel", 1, "libkernel", 1, 1, scePthreadYield); LIB_FUNCTION("B5GmVDKwpn0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_yield); LIB_FUNCTION("-quPa4SEJUw", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetstack); + LIB_FUNCTION("Bvn74vj6oLo", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetstack); LIB_FUNCTION("Ru36fiTtJzA", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetstackaddr); LIB_FUNCTION("-fA+7ZlGDQs", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetstacksize); LIB_FUNCTION("14bOACANTBo", "libkernel", 1, "libkernel", 1, 1, scePthreadOnce); @@ -1462,6 +1600,7 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("ltCfaGr2JGE", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_destroy); LIB_FUNCTION("Op8TBGY5KHg", "libkernel", 1, "libkernel", 1, 1, posix_pthread_cond_wait); LIB_FUNCTION("Op8TBGY5KHg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_wait); + LIB_FUNCTION("27bAgiJmOh0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_timedwait); LIB_FUNCTION("mkx2fVhNMsg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_broadcast); LIB_FUNCTION("dQHWEsJtoE4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutexattr_init); LIB_FUNCTION("mDmgMOGVUqg", "libScePosix", 1, "libkernel", 1, 1, @@ -1476,6 +1615,8 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("EjllaAqAPZo", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_condattr_setclock); LIB_FUNCTION("Z4QosVuAsA0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_once); + LIB_FUNCTION("RtLRV-pBTTY", "libScePosix", 1, "libkernel", 1, 1, + posix_pthread_attr_getschedpolicy); // openorbis weird functions LIB_FUNCTION("7H0iTOciTLo", "libkernel", 1, "libkernel", 1, 1, posix_pthread_mutex_lock); @@ -1485,15 +1626,27 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("E+tyo3lp5Lw", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_attr_setdetachstate); LIB_FUNCTION("zHchY8ft5pk", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_attr_destroy); + LIB_FUNCTION("euKRgm0Vn2M", "libScePosix", 1, "libkernel", 1, 1, + posix_pthread_attr_setschedparam); + LIB_FUNCTION("7ZlAakEf0Qg", "libScePosix", 1, "libkernel", 1, 1, + posix_pthread_attr_setinheritsched); + LIB_FUNCTION("a2P9wYGeZvc", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_setprio); LIB_FUNCTION("Jmi+9w9u0E4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_create_name_np); LIB_FUNCTION("OxhIB8LB-PQ", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_create); LIB_FUNCTION("+U1R4WtXvoc", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_detach); LIB_FUNCTION("CBNtXOoef-E", "libScePosix", 1, "libkernel", 1, 1, posix_sched_get_priority_max); LIB_FUNCTION("m0iS6jNsXds", "libScePosix", 1, "libkernel", 1, 1, posix_sched_get_priority_min); + LIB_FUNCTION("FIs3-UQT9sg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_getschedparam); + LIB_FUNCTION("Xs9hdiD7sAA", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_setschedparam); LIB_FUNCTION("pDuPEf3m4fI", "libScePosix", 1, "libkernel", 1, 1, posix_sem_init); LIB_FUNCTION("YCV5dGGBcCo", "libScePosix", 1, "libkernel", 1, 1, posix_sem_wait); + LIB_FUNCTION("WBWzsRifCEA", "libScePosix", 1, "libkernel", 1, 1, posix_sem_trywait); + LIB_FUNCTION("w5IHyvahg-o", "libScePosix", 1, "libkernel", 1, 1, posix_sem_timedwait); LIB_FUNCTION("IKP8typ0QUk", "libScePosix", 1, "libkernel", 1, 1, posix_sem_post); + LIB_FUNCTION("cDW233RAwWo", "libScePosix", 1, "libkernel", 1, 1, posix_sem_destroy); LIB_FUNCTION("Bq+LRV-N6Hk", "libScePosix", 1, "libkernel", 1, 1, posix_sem_getvalue); + LIB_FUNCTION("0qOtCR-ZHck", "libScePosix", 1, "libkernel", 1, 1, + posix_pthread_attr_getstacksize); // libs RwlockSymbolsRegister(sym); SemaphoreSymbolsRegister(sym); diff --git a/src/core/libraries/kernel/thread_management.h b/src/core/libraries/kernel/thread_management.h index 8303c9ef1..c5935275f 100644 --- a/src/core/libraries/kernel/thread_management.h +++ b/src/core/libraries/kernel/thread_management.h @@ -9,7 +9,6 @@ #include #include #include -#include "common/debug.h" #include "common/types.h" namespace Core::Loader { @@ -74,7 +73,6 @@ struct PthreadMutexInternal { u8 reserved[256]; std::string name; pthread_mutex_t pth_mutex; - std::unique_ptr tracy_lock; }; struct PthreadMutexattrInternal { @@ -169,9 +167,12 @@ ScePthread PS4_SYSV_ABI scePthreadSelf(); int PS4_SYSV_ABI scePthreadAttrSetaffinity(ScePthreadAttr* pattr, const /*SceKernelCpumask*/ u64 mask); int PS4_SYSV_ABI scePthreadSetaffinity(ScePthread thread, const /*SceKernelCpumask*/ u64 mask); +int PS4_SYSV_ABI scePthreadGetaffinity(ScePthread thread, /*SceKernelCpumask*/ u64* mask); int PS4_SYSV_ABI scePthreadCreate(ScePthread* thread, const ScePthreadAttr* attr, PthreadEntryFunc start_routine, void* arg, const char* name); +int PS4_SYSV_ABI scePthreadSetprio(ScePthread thread, int prio); + /*** * Mutex calls */ diff --git a/src/core/libraries/kernel/threads/semaphore.cpp b/src/core/libraries/kernel/threads/semaphore.cpp index bfa6a68db..5304dc57b 100644 --- a/src/core/libraries/kernel/threads/semaphore.cpp +++ b/src/core/libraries/kernel/threads/semaphore.cpp @@ -9,7 +9,6 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/libraries/error_codes.h" -#include "core/libraries/kernel/thread_management.h" #include "core/libraries/libs.h" namespace Libraries::Kernel { @@ -41,7 +40,6 @@ public: AddWaiter(waiter); // Perform the wait. - std::exchange(lk, std::unique_lock{waiter.mutex}); return waiter.Wait(lk, timeout); } @@ -59,10 +57,9 @@ public: it++; continue; } - std::scoped_lock lk2{waiter.mutex}; + it = wait_list.erase(it); token_count -= waiter.need_count; waiter.cv.notify_one(); - it = wait_list.erase(it); } return true; @@ -84,8 +81,6 @@ public: public: struct WaitingThread : public ListBaseHook { - std::mutex mutex; - std::string name; std::condition_variable cv; u32 priority; s32 need_count; @@ -93,7 +88,6 @@ public: bool was_cancled{}; explicit WaitingThread(s32 need_count, bool is_fifo) : need_count{need_count} { - name = scePthreadSelf()->name; if (is_fifo) { return; } @@ -177,10 +171,16 @@ s32 PS4_SYSV_ABI sceKernelCreateSema(OrbisKernelSema* sem, const char* pName, u3 } s32 PS4_SYSV_ABI sceKernelWaitSema(OrbisKernelSema sem, s32 needCount, u32* pTimeout) { + if (!sem) { + return ORBIS_KERNEL_ERROR_ESRCH; + } return sem->Wait(true, needCount, pTimeout); } s32 PS4_SYSV_ABI sceKernelSignalSema(OrbisKernelSema sem, s32 signalCount) { + if (!sem) { + return ORBIS_KERNEL_ERROR_ESRCH; + } if (!sem->Signal(signalCount)) { return ORBIS_KERNEL_ERROR_EINVAL; } @@ -188,10 +188,16 @@ s32 PS4_SYSV_ABI sceKernelSignalSema(OrbisKernelSema sem, s32 signalCount) { } s32 PS4_SYSV_ABI sceKernelPollSema(OrbisKernelSema sem, s32 needCount) { + if (!sem) { + return ORBIS_KERNEL_ERROR_ESRCH; + } return sem->Wait(false, needCount, nullptr); } int PS4_SYSV_ABI sceKernelCancelSema(OrbisKernelSema sem, s32 setCount, s32* pNumWaitThreads) { + if (!sem) { + return ORBIS_KERNEL_ERROR_ESRCH; + } return sem->Cancel(setCount, pNumWaitThreads); } diff --git a/src/core/libraries/kernel/time_management.cpp b/src/core/libraries/kernel/time_management.cpp index bc1617d36..c4854937b 100644 --- a/src/core/libraries/kernel/time_management.cpp +++ b/src/core/libraries/kernel/time_management.cpp @@ -214,6 +214,22 @@ int PS4_SYSV_ABI posix_clock_getres(u32 clock_id, OrbisKernelTimespec* res) { return SCE_KERNEL_ERROR_EINVAL; } +int PS4_SYSV_ABI sceKernelConvertLocaltimeToUtc(time_t param_1, int64_t param_2, time_t* seconds, + OrbisKernelTimezone* timezone, int* dst_seconds) { + LOG_INFO(Kernel, "called"); + if (timezone) { + sceKernelGettimezone(timezone); + param_1 -= (timezone->tz_minuteswest + timezone->tz_dsttime) * 60; + if (seconds) + *seconds = param_1; + if (dst_seconds) + *dst_seconds = timezone->tz_dsttime * 60; + } else { + return SCE_KERNEL_ERROR_EINVAL; + } + return SCE_OK; +} + void timeSymbolsRegister(Core::Loader::SymbolsResolver* sym) { clock = std::make_unique(); initial_ptc = clock->GetUptime(); @@ -239,6 +255,7 @@ void timeSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("lLMT9vJAck0", "libkernel", 1, "libkernel", 1, 1, posix_clock_gettime); LIB_FUNCTION("lLMT9vJAck0", "libScePosix", 1, "libkernel", 1, 1, posix_clock_gettime); LIB_FUNCTION("smIj7eqzZE8", "libScePosix", 1, "libkernel", 1, 1, posix_clock_getres); + LIB_FUNCTION("0NTHN1NKONI", "libkernel", 1, "libkernel", 1, 1, sceKernelConvertLocaltimeToUtc); } } // namespace Libraries::Kernel diff --git a/src/core/libraries/libs.cpp b/src/core/libraries/libs.cpp index f9325297c..20efd3c0e 100644 --- a/src/core/libraries/libs.cpp +++ b/src/core/libraries/libs.cpp @@ -20,6 +20,7 @@ #include "core/libraries/np_trophy/np_trophy.h" #include "core/libraries/pad/pad.h" #include "core/libraries/playgo/playgo.h" +#include "core/libraries/random/random.h" #include "core/libraries/rtc/rtc.h" #include "core/libraries/save_data/savedata.h" #include "core/libraries/screenshot/screenshot.h" @@ -43,8 +44,8 @@ namespace Libraries { void InitHLELibs(Core::Loader::SymbolsResolver* sym) { LOG_INFO(Lib_Kernel, "Initializing HLE libraries"); Libraries::Kernel::LibKernel_Register(sym); - Libraries::VideoOut::RegisterLib(sym); Libraries::GnmDriver::RegisterlibSceGnmDriver(sym); + Libraries::VideoOut::RegisterLib(sym); if (!Config::isLleLibc()) { Libraries::LibC::libcSymbolsRegister(sym); } @@ -71,6 +72,7 @@ void InitHLELibs(Core::Loader::SymbolsResolver* sym) { Libraries::AppContent::RegisterlibSceAppContent(sym); Libraries::PngDec::RegisterlibScePngDec(sym); Libraries::PlayGo::RegisterlibScePlayGo(sym); + Libraries::Random::RegisterlibSceRandom(sym); Libraries::Usbd::RegisterlibSceUsbd(sym); Libraries::Pad::RegisterlibScePad(sym); Libraries::Ajm::RegisterlibSceAjm(sym); diff --git a/src/core/libraries/network/net.cpp b/src/core/libraries/network/net.cpp index 1569a51ce..958f9264e 100644 --- a/src/core/libraries/network/net.cpp +++ b/src/core/libraries/network/net.cpp @@ -559,7 +559,7 @@ int PS4_SYSV_ABI sceNetEpollDestroy() { } int PS4_SYSV_ABI sceNetEpollWait() { - LOG_ERROR(Lib_Net, "(STUBBED) called"); + LOG_TRACE(Lib_Net, "(STUBBED) called"); return ORBIS_OK; } diff --git a/src/core/libraries/network/netctl.cpp b/src/core/libraries/network/netctl.cpp index ab1cb8ae6..a1c8e81c0 100644 --- a/src/core/libraries/network/netctl.cpp +++ b/src/core/libraries/network/netctl.cpp @@ -79,7 +79,7 @@ int PS4_SYSV_ABI sceNetCtlUnregisterCallbackV6() { } int PS4_SYSV_ABI sceNetCtlCheckCallback() { - LOG_ERROR(Lib_NetCtl, "(STUBBED) called"); + LOG_TRACE(Lib_NetCtl, "(STUBBED) called"); return ORBIS_OK; } diff --git a/src/core/libraries/np_manager/np_manager.cpp b/src/core/libraries/np_manager/np_manager.cpp index ee4b3d6b6..c657fbf60 100644 --- a/src/core/libraries/np_manager/np_manager.cpp +++ b/src/core/libraries/np_manager/np_manager.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later // Generated By moduleGenerator +#include "common/config.h" #include "common/logging/log.h" #include "core/libraries/error_codes.h" #include "core/libraries/libs.h" @@ -870,7 +871,7 @@ int PS4_SYSV_ABI sceNpAsmTerminate() { } int PS4_SYSV_ABI sceNpCheckCallback() { - LOG_ERROR(Lib_NpManager, "(STUBBED) called"); + LOG_TRACE(Lib_NpManager, "(STUBBED) called"); return ORBIS_OK; } @@ -974,8 +975,11 @@ int PS4_SYSV_ABI sceNpGetGamePresenceStatusA() { return ORBIS_OK; } -int PS4_SYSV_ABI sceNpGetNpId() { - LOG_ERROR(Lib_NpManager, "(STUBBED) called"); +int PS4_SYSV_ABI sceNpGetNpId(OrbisUserServiceUserId userId, OrbisNpId* npId) { + LOG_ERROR(Lib_NpManager, "(DUMMY) called"); + + std::string name = Config::getUserName(); + strcpy(npId->handle.data, name.c_str()); return ORBIS_OK; } @@ -3510,4 +3514,4 @@ void RegisterlibSceNpManager(Core::Loader::SymbolsResolver* sym) { sceNpUnregisterStateCallbackForToolkit); }; -} // namespace Libraries::NpManager \ No newline at end of file +} // namespace Libraries::NpManager diff --git a/src/core/libraries/np_manager/np_manager.h b/src/core/libraries/np_manager/np_manager.h index 5b11355ad..5955a40b4 100644 --- a/src/core/libraries/np_manager/np_manager.h +++ b/src/core/libraries/np_manager/np_manager.h @@ -11,6 +11,22 @@ class SymbolsResolver; namespace Libraries::NpManager { +constexpr int ORBIS_NP_ONLINEID_MAX_LENGTH = 16; + +typedef int OrbisUserServiceUserId; + +struct OrbisNpOnlineId { + char data[ORBIS_NP_ONLINEID_MAX_LENGTH]; + char term; + char dummy[3]; +}; + +struct OrbisNpId { + OrbisNpOnlineId handle; + u8 opt[8]; + u8 reserved[8]; +}; + int PS4_SYSV_ABI Func_EF4378573542A508(); int PS4_SYSV_ABI _sceNpIpcCreateMemoryFromKernel(); int PS4_SYSV_ABI _sceNpIpcCreateMemoryFromPool(); @@ -204,7 +220,7 @@ int PS4_SYSV_ABI sceNpGetAccountLanguage2(); int PS4_SYSV_ABI sceNpGetAccountLanguageA(); int PS4_SYSV_ABI sceNpGetGamePresenceStatus(); int PS4_SYSV_ABI sceNpGetGamePresenceStatusA(); -int PS4_SYSV_ABI sceNpGetNpId(); +int PS4_SYSV_ABI sceNpGetNpId(OrbisUserServiceUserId userId, OrbisNpId* npId); int PS4_SYSV_ABI sceNpGetNpReachabilityState(); int PS4_SYSV_ABI sceNpGetOnlineId(); int PS4_SYSV_ABI sceNpGetParentalControlInfo(); diff --git a/src/core/libraries/pad/pad.cpp b/src/core/libraries/pad/pad.cpp index e318e1523..c9e332d21 100644 --- a/src/core/libraries/pad/pad.cpp +++ b/src/core/libraries/pad/pad.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later // Generated By moduleGenerator +#include #include #include "common/logging/log.h" #include "core/libraries/error_codes.h" @@ -85,7 +86,18 @@ int PS4_SYSV_ABI scePadGetCapability() { int PS4_SYSV_ABI scePadGetControllerInformation(s32 handle, OrbisPadControllerInformation* pInfo) { LOG_INFO(Lib_Pad, "called handle = {}", handle); - std::memset(pInfo, 0, sizeof(OrbisPadControllerInformation)); + if (handle < 0) { + pInfo->touchPadInfo.pixelDensity = 1; + pInfo->touchPadInfo.resolution.x = 1920; + pInfo->touchPadInfo.resolution.y = 950; + pInfo->stickInfo.deadZoneLeft = 2; + pInfo->stickInfo.deadZoneRight = 2; + pInfo->connectionType = ORBIS_PAD_PORT_TYPE_STANDARD; + pInfo->connectedCount = 1; + pInfo->connected = false; + pInfo->deviceClass = ORBIS_PAD_DEVICE_CLASS_STANDARD; + return SCE_OK; + } pInfo->touchPadInfo.pixelDensity = 1; pInfo->touchPadInfo.resolution.x = 1920; pInfo->touchPadInfo.resolution.y = 950; @@ -239,7 +251,6 @@ int PS4_SYSV_ABI scePadOutputReport() { } int PS4_SYSV_ABI scePadRead(s32 handle, OrbisPadData* pData, s32 num) { - std::memset(pData, 0, sizeof(OrbisPadData)); int connected_count = 0; bool connected = false; Input::State states[64]; @@ -306,11 +317,9 @@ int PS4_SYSV_ABI scePadReadHistory() { int PS4_SYSV_ABI scePadReadState(s32 handle, OrbisPadData* pData) { auto* controller = Common::Singleton::Instance(); - int connectedCount = 0; bool isConnected = false; Input::State state; - std::memset(pData, 0, sizeof(OrbisPadData)); controller->ReadState(&state, &isConnected, &connectedCount); pData->buttons = state.buttonsState; pData->leftStick.x = state.axes[static_cast(Input::Axis::LeftX)]; @@ -322,7 +331,7 @@ int PS4_SYSV_ABI scePadReadState(s32 handle, OrbisPadData* pData) { pData->orientation.x = 0; pData->orientation.y = 0; pData->orientation.z = 0; - pData->orientation.w = 0; + pData->orientation.w = 1; pData->acceleration.x = 0.0f; pData->acceleration.y = 0.0f; pData->acceleration.z = 0.0f; @@ -410,8 +419,14 @@ int PS4_SYSV_ABI scePadSetForceIntercepted() { } int PS4_SYSV_ABI scePadSetLightBar(s32 handle, const OrbisPadLightBarParam* pParam) { - LOG_ERROR(Lib_Pad, "(STUBBED) called"); - return ORBIS_OK; + if (pParam != nullptr) { + LOG_INFO(Lib_Pad, "scePadSetLightBar called handle = {} rgb = {} {} {}", handle, pParam->r, + pParam->g, pParam->b); + auto* controller = Common::Singleton::Instance(); + controller->SetLightBarRGB(pParam->r, pParam->g, pParam->b); + return ORBIS_OK; + } + return ORBIS_PAD_ERROR_INVALID_ARG; } int PS4_SYSV_ABI scePadSetLightBarBaseBrightness() { @@ -470,8 +485,14 @@ int PS4_SYSV_ABI scePadSetUserColor() { } int PS4_SYSV_ABI scePadSetVibration(s32 handle, const OrbisPadVibrationParam* pParam) { - LOG_ERROR(Lib_Pad, "(STUBBED) called"); - return ORBIS_OK; + if (pParam != nullptr) { + LOG_INFO(Lib_Pad, "scePadSetVibration called handle = {} data = {} , {}", handle, + pParam->smallMotor, pParam->largeMotor); + auto* controller = Common::Singleton::Instance(); + controller->SetVibration(pParam->smallMotor, pParam->largeMotor); + return ORBIS_OK; + } + return ORBIS_PAD_ERROR_INVALID_ARG; } int PS4_SYSV_ABI scePadSetVibrationForce() { @@ -665,4 +686,4 @@ void RegisterlibScePad(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("7xA+hFtvBCA", "libScePad", 1, "libScePad", 1, 1, Func_EF103E845B6F0420); }; -} // namespace Libraries::Pad \ No newline at end of file +} // namespace Libraries::Pad diff --git a/src/core/libraries/playgo/playgo.cpp b/src/core/libraries/playgo/playgo.cpp index 1a335a2a3..af98253f4 100644 --- a/src/core/libraries/playgo/playgo.cpp +++ b/src/core/libraries/playgo/playgo.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "common/logging/log.h" #include "common/singleton.h" #include "core/libraries/error_codes.h" @@ -8,8 +9,6 @@ #include "playgo.h" namespace Libraries::PlayGo { -// this lib is used to play as the game is being installed. -// can be skipped by just returning and assigning the correct values. s32 PS4_SYSV_ABI sceDbgPlayGoRequestNextChunk() { LOG_ERROR(Lib_PlayGo, "(STUBBED)called"); @@ -52,9 +51,17 @@ s32 PS4_SYSV_ABI scePlayGoGetLocus(OrbisPlayGoHandle handle, const OrbisPlayGoCh uint32_t numberOfEntries, OrbisPlayGoLocus* outLoci) { LOG_ERROR(Lib_PlayGo, "(STUBBED)called handle = {}, chunkIds = {}, numberOfEntries = {}", handle, *chunkIds, numberOfEntries); - // assign all now so that scePlayGoGetLocus is not called again for every single entry - std::fill(outLoci, outLoci + numberOfEntries, - OrbisPlayGoLocusValue::ORBIS_PLAYGO_LOCUS_LOCAL_FAST); + + auto* playgo = Common::Singleton::Instance(); + + for (uint32_t i = 0; i < numberOfEntries; i++) { + if (chunkIds[i] <= playgo->GetPlaygoHeader().mchunk_count) { + outLoci[i] = OrbisPlayGoLocusValue::ORBIS_PLAYGO_LOCUS_LOCAL_FAST; + } else { + outLoci[i] = ORBIS_PLAYGO_LOCUS_NOT_DOWNLOADED; + return ORBIS_PLAYGO_ERROR_BAD_CHUNK_ID; + } + } return ORBIS_OK; } @@ -64,13 +71,14 @@ s32 PS4_SYSV_ABI scePlayGoGetProgress(OrbisPlayGoHandle handle, const OrbisPlayG handle, *chunkIds, numberOfEntries); outProgress->progressSize = 0x10000; // todo? outProgress->totalSize = 0x10000; - return 0; + return ORBIS_OK; } s32 PS4_SYSV_ABI scePlayGoGetToDoList(OrbisPlayGoHandle handle, OrbisPlayGoToDo* outTodoList, u32 numberOfEntries, u32* outEntries) { - LOG_ERROR(Lib_PlayGo, "(STUBBED)called"); - if (handle != shadMagic) + LOG_ERROR(Lib_PlayGo, "(STUBBED)called handle = {} numberOfEntries = {}", handle, + numberOfEntries); + if (handle != 1) return ORBIS_PLAYGO_ERROR_BAD_HANDLE; if (outTodoList == nullptr) return ORBIS_PLAYGO_ERROR_BAD_POINTER; @@ -88,7 +96,7 @@ s32 PS4_SYSV_ABI scePlayGoInitialize(OrbisPlayGoInitParams* param) { } s32 PS4_SYSV_ABI scePlayGoOpen(OrbisPlayGoHandle* outHandle, const void* param) { - *outHandle = shadMagic; + *outHandle = 1; LOG_INFO(Lib_PlayGo, "(STUBBED)called"); return ORBIS_OK; } @@ -141,4 +149,4 @@ void RegisterlibScePlayGo(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("MPe0EeBGM-E", "libScePlayGo", 1, "libScePlayGo", 1, 0, scePlayGoTerminate); }; -} // namespace Libraries::PlayGo \ No newline at end of file +} // namespace Libraries::PlayGo diff --git a/src/core/libraries/random/random.cpp b/src/core/libraries/random/random.cpp new file mode 100644 index 000000000..8147c5183 --- /dev/null +++ b/src/core/libraries/random/random.cpp @@ -0,0 +1,27 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/logging/log.h" +#include "core/libraries/error_codes.h" +#include "core/libraries/libs.h" +#include "random.h" + +namespace Libraries::Random { + +s32 PS4_SYSV_ABI sceRandomGetRandomNumber(u8* buf, size_t size) { + LOG_TRACE(Lib_Random, "called"); + if (size > SCE_RANDOM_MAX_SIZE) { + return SCE_RANDOM_ERROR_INVALID; + } + + for (auto i = 0; i < size; ++i) { + buf[i] = std::rand() & 0xFF; + } + return ORBIS_OK; +} + +void RegisterlibSceRandom(Core::Loader::SymbolsResolver* sym) { + LIB_FUNCTION("PI7jIZj4pcE", "libSceRandom", 1, "libSceRandom", 1, 1, sceRandomGetRandomNumber); +}; + +} // namespace Libraries::Random diff --git a/src/core/libraries/random/random.h b/src/core/libraries/random/random.h new file mode 100644 index 000000000..b5f87f877 --- /dev/null +++ b/src/core/libraries/random/random.h @@ -0,0 +1,17 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once +#include "common/types.h" + +namespace Core::Loader { +class SymbolsResolver; +} + +namespace Libraries::Random { +constexpr int32_t SCE_RANDOM_MAX_SIZE = 64; + +s32 PS4_SYSV_ABI sceRandomGetRandomNumber(u8* buf, size_t size); + +void RegisterlibSceRandom(Core::Loader::SymbolsResolver* sym); +} // namespace Libraries::Random \ No newline at end of file diff --git a/src/core/libraries/save_data/savedata.cpp b/src/core/libraries/save_data/savedata.cpp index db6d0964d..20496d76d 100644 --- a/src/core/libraries/save_data/savedata.cpp +++ b/src/core/libraries/save_data/savedata.cpp @@ -15,7 +15,7 @@ #include "error_codes.h" namespace Libraries::SaveData { - +bool is_rw_mode = false; static constexpr std::string_view g_mount_point = "/savedata0"; // temp mount point (todo) std::string game_serial; @@ -180,27 +180,31 @@ int PS4_SYSV_ABI sceSaveDataDirNameSearch(const OrbisSaveDataDirNameSearchCond* OrbisSaveDataDirNameSearchResult* result) { if (cond == nullptr) return ORBIS_SAVE_DATA_ERROR_PARAMETER; - LOG_ERROR(Lib_SaveData, "TODO sceSaveDataDirNameSearch: Add params"); + LOG_INFO(Lib_SaveData, "called"); const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) / std::to_string(cond->userId) / game_serial; if (!mount_dir.empty() && std::filesystem::exists(mount_dir)) { - if (cond->dirName == nullptr) { // look for all dirs if no dir is provided. + if (cond->dirName == nullptr || std::string_view(cond->dirName->data) + .empty()) { // look for all dirs if no dir is provided. for (int i = 0; const auto& entry : std::filesystem::directory_iterator(mount_dir)) { - if (std::filesystem::is_directory(entry.path())) { - i++; - result->dirNamesNum = 0; // why is it 1024? is it max? + if (std::filesystem::is_directory(entry.path()) && + entry.path().filename().string() != "sdmemory") { + // sceSaveDataDirNameSearch does not search for dataMemory1/2 dirs. // copy dir name to be used by sceSaveDataMount in read mode. strncpy(result->dirNames[i].data, entry.path().filename().string().c_str(), 32); - result->hitNum = i + 1; - result->dirNamesNum = i + 1; // to confirm - result->setNum = i + 1; // to confirm + i++; + result->hitNum = i; + result->dirNamesNum = i; + result->setNum = i; } } } else { // Need a game to test. + LOG_ERROR(Lib_SaveData, "Check Me. sceSaveDataDirNameSearch: dirName = {}", + cond->dirName->data); strncpy(result->dirNames[0].data, cond->dirName->data, 32); result->hitNum = 1; - result->dirNamesNum = 1; // to confirm - result->setNum = 1; // to confirm + result->dirNamesNum = 1; + result->setNum = 1; } } else { result->hitNum = 0; @@ -303,8 +307,51 @@ int PS4_SYSV_ABI sceSaveDataGetMountInfo(const OrbisSaveDataMountPoint* mountPoi return ORBIS_OK; } -int PS4_SYSV_ABI sceSaveDataGetParam() { - LOG_ERROR(Lib_SaveData, "(STUBBED) called"); +int PS4_SYSV_ABI sceSaveDataGetParam(const OrbisSaveDataMountPoint* mountPoint, + const OrbisSaveDataParamType paramType, void* paramBuf, + const size_t paramBufSize, size_t* gotSize) { + + if (mountPoint == nullptr) + return ORBIS_SAVE_DATA_ERROR_PARAMETER; + + auto* mnt = Common::Singleton::Instance(); + const auto mount_dir = mnt->GetHostPath(mountPoint->data); + Common::FS::IOFile file(mount_dir / "param.txt", Common::FS::FileAccessMode::Read); + OrbisSaveDataParam params; + file.Read(params); + + LOG_INFO(Lib_SaveData, "called"); + + switch (paramType) { + case ORBIS_SAVE_DATA_PARAM_TYPE_ALL: { + memcpy(paramBuf, ¶ms, sizeof(OrbisSaveDataParam)); + *gotSize = sizeof(OrbisSaveDataParam); + } break; + case ORBIS_SAVE_DATA_PARAM_TYPE_TITLE: { + std::memcpy(paramBuf, ¶ms.title, ORBIS_SAVE_DATA_TITLE_MAXSIZE); + *gotSize = ORBIS_SAVE_DATA_TITLE_MAXSIZE; + } break; + case ORBIS_SAVE_DATA_PARAM_TYPE_SUB_TITLE: { + std::memcpy(paramBuf, ¶ms.subTitle, ORBIS_SAVE_DATA_SUBTITLE_MAXSIZE); + *gotSize = ORBIS_SAVE_DATA_SUBTITLE_MAXSIZE; + } break; + case ORBIS_SAVE_DATA_PARAM_TYPE_DETAIL: { + std::memcpy(paramBuf, ¶ms.detail, ORBIS_SAVE_DATA_DETAIL_MAXSIZE); + *gotSize = ORBIS_SAVE_DATA_DETAIL_MAXSIZE; + } break; + case ORBIS_SAVE_DATA_PARAM_TYPE_USER_PARAM: { + std::memcpy(paramBuf, ¶ms.userParam, sizeof(u32)); + *gotSize = sizeof(u32); + } break; + case ORBIS_SAVE_DATA_PARAM_TYPE_MTIME: { + std::memcpy(paramBuf, ¶ms.mtime, sizeof(time_t)); + *gotSize = sizeof(time_t); + } break; + default: { + UNREACHABLE_MSG("Unknown Param = {}", paramType); + } break; + } + return ORBIS_OK; } @@ -321,7 +368,7 @@ int PS4_SYSV_ABI sceSaveDataGetSaveDataCount() { int PS4_SYSV_ABI sceSaveDataGetSaveDataMemory(const u32 userId, void* buf, const size_t bufSize, const int64_t offset) { const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) / - std::to_string(userId) / game_serial / "save_mem1.sav"; + std::to_string(userId) / game_serial / "sdmemory/save_mem1.sav"; Common::FS::IOFile file(mount_dir, Common::FS::FileAccessMode::Read); if (!file.IsOpen()) { @@ -336,7 +383,7 @@ int PS4_SYSV_ABI sceSaveDataGetSaveDataMemory(const u32 userId, void* buf, const int PS4_SYSV_ABI sceSaveDataGetSaveDataMemory2(OrbisSaveDataMemoryGet2* getParam) { const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) / - std::to_string(getParam->userId) / game_serial; + std::to_string(getParam->userId) / game_serial / "sdmemory"; if (getParam == nullptr) return ORBIS_SAVE_DATA_ERROR_PARAMETER; if (getParam->data != nullptr) { @@ -443,6 +490,7 @@ s32 saveDataMount(u32 user_id, char* dir_name, u32 mount_mode, case ORBIS_SAVE_DATA_MOUNT_MODE_RDWR: case ORBIS_SAVE_DATA_MOUNT_MODE_RDWR | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF: case ORBIS_SAVE_DATA_MOUNT_MODE_RDONLY | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF: { + is_rw_mode = (mount_mode == ORBIS_SAVE_DATA_MOUNT_MODE_RDWR) ? true : false; if (!std::filesystem::exists(mount_dir)) { return ORBIS_SAVE_DATA_ERROR_NOT_FOUND; } @@ -460,10 +508,6 @@ s32 saveDataMount(u32 user_id, char* dir_name, u32 mount_mode, case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF | ORBIS_SAVE_DATA_MOUNT_MODE_COPY_ICON: { if (std::filesystem::exists(mount_dir)) { - g_mount_point.copy(mount_result->mount_point.data, 16); - mnt->Mount(mount_dir, mount_result->mount_point.data); - mount_result->required_blocks = 0; - mount_result->mount_status = 0; return ORBIS_SAVE_DATA_ERROR_EXISTS; } if (std::filesystem::create_directories(mount_dir)) { @@ -483,7 +527,7 @@ s32 saveDataMount(u32 user_id, char* dir_name, u32 mount_mode, mount_result->mount_status = 1; } break; default: - UNREACHABLE(); + UNREACHABLE_MSG("Unknown mount mode = {}", mount_mode); } mount_result->required_blocks = 0; @@ -583,15 +627,46 @@ int PS4_SYSV_ABI sceSaveDataSetEventInfo() { int PS4_SYSV_ABI sceSaveDataSetParam(const OrbisSaveDataMountPoint* mountPoint, OrbisSaveDataParamType paramType, const void* paramBuf, size_t paramBufSize) { - auto* mnt = Common::Singleton::Instance(); - const auto mount_dir = mnt->GetHostPath(mountPoint->data); - LOG_INFO(Lib_SaveData, "called = {}, mountPoint->data = {}", mount_dir.string(), - mountPoint->data); + if (paramBuf == nullptr) + return ORBIS_SAVE_DATA_ERROR_PARAMETER; - if (paramBuf != nullptr) { - Common::FS::IOFile file(mount_dir / "param.txt", Common::FS::FileAccessMode::Write); - file.WriteRaw(paramBuf, paramBufSize); + auto* mnt = Common::Singleton::Instance(); + const auto mount_dir = mnt->GetHostPath(mountPoint->data) / "param.txt"; + OrbisSaveDataParam params; + if (std::filesystem::exists(mount_dir)) { + Common::FS::IOFile file(mount_dir, Common::FS::FileAccessMode::Read); + file.ReadRaw(¶ms, sizeof(OrbisSaveDataParam)); } + + LOG_INFO(Lib_SaveData, "called"); + + switch (paramType) { + case ORBIS_SAVE_DATA_PARAM_TYPE_ALL: { + memcpy(¶ms, paramBuf, sizeof(OrbisSaveDataParam)); + } break; + case ORBIS_SAVE_DATA_PARAM_TYPE_TITLE: { + strncpy(params.title, static_cast(paramBuf), paramBufSize); + } break; + case ORBIS_SAVE_DATA_PARAM_TYPE_SUB_TITLE: { + strncpy(params.subTitle, static_cast(paramBuf), paramBufSize); + } break; + case ORBIS_SAVE_DATA_PARAM_TYPE_DETAIL: { + strncpy(params.detail, static_cast(paramBuf), paramBufSize); + } break; + case ORBIS_SAVE_DATA_PARAM_TYPE_USER_PARAM: { + params.userParam = *(static_cast(paramBuf)); + } break; + case ORBIS_SAVE_DATA_PARAM_TYPE_MTIME: { + params.mtime = *(static_cast(paramBuf)); + } break; + default: { + UNREACHABLE_MSG("Unknown Param = {}", paramType); + } + } + + Common::FS::IOFile file(mount_dir, Common::FS::FileAccessMode::Write); + file.WriteRaw(¶ms, sizeof(OrbisSaveDataParam)); + return ORBIS_OK; } @@ -604,11 +679,11 @@ int PS4_SYSV_ABI sceSaveDataSetSaveDataMemory(const u32 userId, const void* buf, const size_t bufSize, const int64_t offset) { LOG_INFO(Lib_SaveData, "called"); const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) / - std::to_string(userId) / game_serial / "save_mem1.sav"; + std::to_string(userId) / game_serial / "sdmemory/save_mem1.sav"; Common::FS::IOFile file(mount_dir, Common::FS::FileAccessMode::Write); file.Seek(offset); - file.WriteRaw((void*)buf, bufSize); + file.WriteRaw(buf, bufSize); return ORBIS_OK; } @@ -616,13 +691,13 @@ int PS4_SYSV_ABI sceSaveDataSetSaveDataMemory(const u32 userId, const void* buf, int PS4_SYSV_ABI sceSaveDataSetSaveDataMemory2(const OrbisSaveDataMemorySet2* setParam) { LOG_INFO(Lib_SaveData, "called: dataNum = {}, slotId= {}", setParam->dataNum, setParam->slotId); const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) / - std::to_string(setParam->userId) / game_serial; + std::to_string(setParam->userId) / game_serial / "sdmemory"; if (setParam->data != nullptr) { Common::FS::IOFile file(mount_dir / "save_mem2.sav", Common::FS::FileAccessMode::Write); if (!file.IsOpen()) return -1; file.Seek(setParam->data->offset); - file.WriteRaw((void*)setParam->data->buf, setParam->data->bufSize); + file.WriteRaw(setParam->data->buf, setParam->data->bufSize); } if (setParam->param != nullptr) { @@ -632,7 +707,7 @@ int PS4_SYSV_ABI sceSaveDataSetSaveDataMemory2(const OrbisSaveDataMemorySet2* se if (setParam->icon != nullptr) { Common::FS::IOFile file(mount_dir / "save_icon.png", Common::FS::FileAccessMode::Write); - file.WriteRaw((void*)setParam->icon->buf, setParam->icon->bufSize); + file.WriteRaw(setParam->icon->buf, setParam->icon->bufSize); } return ORBIS_OK; @@ -644,7 +719,7 @@ int PS4_SYSV_ABI sceSaveDataSetupSaveDataMemory(u32 userId, size_t memorySize, LOG_INFO(Lib_SaveData, "called:userId = {}, memorySize = {}", userId, memorySize); const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) / - std::to_string(userId) / game_serial; + std::to_string(userId) / game_serial / "sdmemory"; if (std::filesystem::exists(mount_dir)) { return ORBIS_SAVE_DATA_ERROR_EXISTS; @@ -663,7 +738,7 @@ int PS4_SYSV_ABI sceSaveDataSetupSaveDataMemory2(const OrbisSaveDataMemorySetup2 LOG_INFO(Lib_SaveData, "called"); // if (setupParam->option == 1) { // check this later. const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) / - std::to_string(setupParam->userId) / game_serial; + std::to_string(setupParam->userId) / game_serial / "sdmemory"; if (std::filesystem::exists(mount_dir) && std::filesystem::exists(mount_dir / "save_mem2.sav")) { Common::FS::IOFile file(mount_dir / "save_mem2.sav", Common::FS::FileAccessMode::Read); @@ -717,16 +792,17 @@ int PS4_SYSV_ABI sceSaveDataTransferringMount() { } s32 PS4_SYSV_ABI sceSaveDataUmount(const OrbisSaveDataMountPoint* mountPoint) { - if (std::string(mountPoint->data).empty()) { + LOG_INFO(Lib_SaveData, "mountPoint = {}", mountPoint->data); + if (std::string_view(mountPoint->data).empty()) { return ORBIS_SAVE_DATA_ERROR_NOT_MOUNTED; } const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) / std::to_string(1) / game_serial / mountPoint->data; auto* mnt = Common::Singleton::Instance(); - + const auto& guest_path = mnt->GetHostPath(mountPoint->data); + if (guest_path.empty()) + return ORBIS_SAVE_DATA_ERROR_NOT_MOUNTED; mnt->Unmount(mount_dir, mountPoint->data); - LOG_INFO(Lib_SaveData, "mountPoint = {}", std::string(mountPoint->data)); - return ORBIS_OK; } @@ -736,23 +812,33 @@ int PS4_SYSV_ABI sceSaveDataUmountSys() { } int PS4_SYSV_ABI sceSaveDataUmountWithBackup(const OrbisSaveDataMountPoint* mountPoint) { - LOG_ERROR(Lib_SaveData, "called = {}", std::string(mountPoint->data)); + LOG_INFO(Lib_SaveData, "called mount = {}, is_rw_mode = {}", std::string(mountPoint->data), + is_rw_mode); auto* mnt = Common::Singleton::Instance(); const auto mount_dir = mnt->GetHostPath(mountPoint->data); if (!std::filesystem::exists(mount_dir)) { return ORBIS_SAVE_DATA_ERROR_NOT_FOUND; } + // leave disabled for now. and just unmount. - std::filesystem::create_directories(mount_dir.parent_path() / "backup"); + /* if (is_rw_mode) { // backup is done only when mount mode is ReadWrite. + auto backup_path = mount_dir; + std::string save_data_dir = (mount_dir.string() + "_backup"); + backup_path.replace_filename(save_data_dir); - for (const auto& entry : std::filesystem::recursive_directory_iterator(mount_dir)) { - const auto& path = entry.path(); - const auto target_path = mount_dir.parent_path() / "backup"; - if (std::filesystem::is_regular_file(path)) { - std::filesystem::copy(path, target_path, - std::filesystem::copy_options::overwrite_existing); + std::filesystem::create_directories(backup_path); + + for (const auto& entry : std::filesystem::recursive_directory_iterator(mount_dir)) { + const auto& path = entry.path(); + if (std::filesystem::is_regular_file(path)) { + std::filesystem::copy(path, save_data_dir, + std::filesystem::copy_options::overwrite_existing); + } } - } + }*/ + const auto& guest_path = mnt->GetHostPath(mountPoint->data); + if (guest_path.empty()) + return ORBIS_SAVE_DATA_ERROR_NOT_MOUNTED; mnt->Unmount(mount_dir, mountPoint->data); return ORBIS_OK; diff --git a/src/core/libraries/save_data/savedata.h b/src/core/libraries/save_data/savedata.h index f342d0ddb..9b3cf900f 100644 --- a/src/core/libraries/save_data/savedata.h +++ b/src/core/libraries/save_data/savedata.h @@ -242,6 +242,13 @@ struct OrbisSaveDataMemorySync { u8 reserved[28]; }; +constexpr int ORBIS_SAVE_DATA_PARAM_TYPE_ALL = 0; +constexpr int ORBIS_SAVE_DATA_PARAM_TYPE_TITLE = 1; +constexpr int ORBIS_SAVE_DATA_PARAM_TYPE_SUB_TITLE = 2; +constexpr int ORBIS_SAVE_DATA_PARAM_TYPE_DETAIL = 3; +constexpr int ORBIS_SAVE_DATA_PARAM_TYPE_USER_PARAM = 4; +constexpr int ORBIS_SAVE_DATA_PARAM_TYPE_MTIME = 5; + int PS4_SYSV_ABI sceSaveDataAbort(); int PS4_SYSV_ABI sceSaveDataBackup(); int PS4_SYSV_ABI sceSaveDataBindPsnAccount(); @@ -291,7 +298,9 @@ int PS4_SYSV_ABI sceSaveDataGetFormat(); int PS4_SYSV_ABI sceSaveDataGetMountedSaveDataCount(); int PS4_SYSV_ABI sceSaveDataGetMountInfo(const OrbisSaveDataMountPoint* mountPoint, OrbisSaveDataMountInfo* info); -int PS4_SYSV_ABI sceSaveDataGetParam(); +int PS4_SYSV_ABI sceSaveDataGetParam(const OrbisSaveDataMountPoint* mountPoint, + const OrbisSaveDataParamType paramType, void* paramBuf, + const size_t paramBufSize, size_t* gotSize); int PS4_SYSV_ABI sceSaveDataGetProgress(); int PS4_SYSV_ABI sceSaveDataGetSaveDataCount(); int PS4_SYSV_ABI sceSaveDataGetSaveDataMemory(const u32 userId, void* buf, const size_t bufSize, diff --git a/src/core/libraries/system/systemservice.cpp b/src/core/libraries/system/systemservice.cpp index b704ccd93..d99ec7c7c 100644 --- a/src/core/libraries/system/systemservice.cpp +++ b/src/core/libraries/system/systemservice.cpp @@ -1898,7 +1898,7 @@ s32 PS4_SYSV_ABI sceSystemServiceParamGetInt(int param_id, int* value) { } switch (param_id) { case ORBIS_SYSTEM_SERVICE_PARAM_ID_LANG: - *value = ORBIS_SYSTEM_PARAM_LANG_ENGLISH_US; + *value = Config::GetLanguage(); break; case ORBIS_SYSTEM_SERVICE_PARAM_ID_DATE_FORMAT: *value = ORBIS_SYSTEM_PARAM_DATE_FORMAT_DDMMYYYY; diff --git a/src/core/libraries/videoout/driver.cpp b/src/core/libraries/videoout/driver.cpp index e74fb10f2..25de48a4d 100644 --- a/src/core/libraries/videoout/driver.cpp +++ b/src/core/libraries/videoout/driver.cpp @@ -3,14 +3,17 @@ #include #include "common/assert.h" +#include "common/config.h" +#include "common/debug.h" +#include "common/thread.h" #include "core/libraries/error_codes.h" #include "core/libraries/kernel/time_management.h" #include "core/libraries/videoout/driver.h" #include "core/platform.h" - #include "video_core/renderer_vulkan/renderer_vulkan.h" extern std::unique_ptr renderer; +extern std::unique_ptr liverpool; namespace Libraries::VideoOut { @@ -41,20 +44,18 @@ VideoOutDriver::VideoOutDriver(u32 width, u32 height) { main_port.resolution.fullHeight = height; main_port.resolution.paneWidth = width; main_port.resolution.paneHeight = height; + present_thread = std::jthread([&](std::stop_token token) { PresentThread(token); }); } VideoOutDriver::~VideoOutDriver() = default; int VideoOutDriver::Open(const ServiceThreadParams* params) { - std::scoped_lock lock{mutex}; - if (main_port.is_open) { return ORBIS_VIDEO_OUT_ERROR_RESOURCE_BUSY; } - - int handle = 1; main_port.is_open = true; - return handle; + liverpool->SetVoPort(&main_port); + return 1; } void VideoOutDriver::Close(s32 handle) { @@ -158,41 +159,37 @@ int VideoOutDriver::UnregisterBuffers(VideoOutPort* port, s32 attributeIndex) { return ORBIS_OK; } -void VideoOutDriver::Flip(std::chrono::microseconds timeout) { - Request req; - { - std::unique_lock lock{mutex}; - submit_cond.wait_for(lock, timeout, [&] { return !requests.empty(); }); - if (requests.empty()) { - renderer->ShowSplash(); - return; - } - - // Retrieve the request. - req = requests.front(); - requests.pop(); +std::chrono::microseconds VideoOutDriver::Flip(const Request& req) { + if (!req) { + return std::chrono::microseconds{0}; } + const auto start = std::chrono::high_resolution_clock::now(); + // Whatever the game is rendering show splash if it is active if (!renderer->ShowSplash(req.frame)) { // Present the frame. renderer->Present(req.frame); } - std::scoped_lock lock{mutex}; - // Update flip status. - auto& flip_status = req.port->flip_status; - flip_status.count++; - flip_status.processTime = Libraries::Kernel::sceKernelGetProcessTime(); - flip_status.tsc = Libraries::Kernel::sceKernelReadTsc(); - flip_status.submitTsc = Libraries::Kernel::sceKernelReadTsc(); - flip_status.flipArg = req.flip_arg; - flip_status.currentBuffer = req.index; - flip_status.flipPendingNum = static_cast(requests.size()); + auto* port = req.port; + { + std::unique_lock lock{port->port_mutex}; + auto& flip_status = port->flip_status; + flip_status.count++; + flip_status.processTime = Libraries::Kernel::sceKernelGetProcessTime(); + flip_status.tsc = Libraries::Kernel::sceKernelReadTsc(); + flip_status.flipArg = req.flip_arg; + flip_status.currentBuffer = req.index; + if (req.eop) { + --flip_status.gcQueueNum; + } + --flip_status.flipPendingNum; + } // Trigger flip events for the port. - for (auto& event : req.port->flip_events) { + for (auto& event : port->flip_events) { if (event != nullptr) { event->TriggerEvent(SCE_VIDEO_OUT_EVENT_FLIP, Kernel::SceKernelEvent::Filter::VideoOut, reinterpret_cast(req.flip_arg)); @@ -201,57 +198,109 @@ void VideoOutDriver::Flip(std::chrono::microseconds timeout) { // Reset flip label if (req.index != -1) { - req.port->buffer_labels[req.index] = 0; + port->buffer_labels[req.index] = 0; + port->SignalVoLabel(); } + + const auto end = std::chrono::high_resolution_clock::now(); + return std::chrono::duration_cast(end - start); } bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop /*= false*/) { - std::scoped_lock lock{mutex}; + { + std::unique_lock lock{port->port_mutex}; + if (index != -1 && port->flip_status.flipPendingNum >= port->NumRegisteredBuffers()) { + LOG_ERROR(Lib_VideoOut, "Flip queue is full"); + return false; + } + if (is_eop) { + ++port->flip_status.gcQueueNum; + } + ++port->flip_status.flipPendingNum; // integral GPU and CPU pending flips counter + port->flip_status.submitTsc = Libraries::Kernel::sceKernelReadTsc(); + } + + if (!is_eop) { + // Before processing the flip we need to ask GPU thread to flush command list as at this + // point VO surface is ready to be presented, and we will need have an actual state of + // Vulkan image at the time of frame presentation. + liverpool->SendCommand([=, this]() { + renderer->FlushDraw(); + SubmitFlipInternal(port, index, flip_arg, is_eop); + }); + } else { + SubmitFlipInternal(port, index, flip_arg, is_eop); + } + + return true; +} + +void VideoOutDriver::SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg, + bool is_eop /*= false*/) { Vulkan::Frame* frame; if (index == -1) { - frame = renderer->PrepareBlankFrame(); + frame = renderer->PrepareBlankFrame(is_eop); } else { const auto& buffer = port->buffer_slots[index]; const auto& group = port->groups[buffer.group_index]; - frame = renderer->PrepareFrame(group, buffer.address_left); - } - - if (index != -1 && requests.size() >= port->NumRegisteredBuffers()) { - LOG_ERROR(Lib_VideoOut, "Flip queue is full"); - return false; + frame = renderer->PrepareFrame(group, buffer.address_left, is_eop); } + std::scoped_lock lock{mutex}; requests.push({ .frame = frame, .port = port, .index = index, .flip_arg = flip_arg, - .submit_tsc = Libraries::Kernel::sceKernelReadTsc(), .eop = is_eop, }); - - port->flip_status.flipPendingNum = static_cast(requests.size()); - port->flip_status.gcQueueNum = 0; - submit_cond.notify_one(); - - return true; } -void VideoOutDriver::Vblank() { - std::scoped_lock lock{mutex}; +void VideoOutDriver::PresentThread(std::stop_token token) { + static constexpr std::chrono::milliseconds VblankPeriod{16}; + Common::SetCurrentThreadName("PresentThread"); - auto& vblank_status = main_port.vblank_status; - vblank_status.count++; - vblank_status.processTime = Libraries::Kernel::sceKernelGetProcessTime(); - vblank_status.tsc = Libraries::Kernel::sceKernelReadTsc(); + const auto receive_request = [this] -> Request { + std::scoped_lock lk{mutex}; + if (!requests.empty()) { + const auto request = requests.front(); + requests.pop(); + return request; + } + return {}; + }; - // Trigger flip events for the port. - for (auto& event : main_port.vblank_events) { - if (event != nullptr) { - event->TriggerEvent(SCE_VIDEO_OUT_EVENT_VBLANK, - Kernel::SceKernelEvent::Filter::VideoOut, nullptr); + auto vblank_period = VblankPeriod / Config::vblankDiv(); + auto delay = std::chrono::microseconds{0}; + while (!token.stop_requested()) { + // Sleep for most of the vblank duration. + std::this_thread::sleep_for(vblank_period - delay); + + // Check if it's time to take a request. + auto& vblank_status = main_port.vblank_status; + if (vblank_status.count % (main_port.flip_rate + 1) == 0) { + const auto request = receive_request(); + delay = Flip(request); + FRAME_END; + } + + { + // Needs lock here as can be concurrently read by `sceVideoOutGetVblankStatus` + std::unique_lock lock{main_port.vo_mutex}; + vblank_status.count++; + vblank_status.processTime = Libraries::Kernel::sceKernelGetProcessTime(); + vblank_status.tsc = Libraries::Kernel::sceKernelReadTsc(); + main_port.vblank_cv.notify_all(); + } + + // Trigger flip events for the port. + for (auto& event : main_port.vblank_events) { + if (event != nullptr) { + event->TriggerEvent(SCE_VIDEO_OUT_EVENT_VBLANK, + Kernel::SceKernelEvent::Filter::VideoOut, nullptr); + } } } } diff --git a/src/core/libraries/videoout/driver.h b/src/core/libraries/videoout/driver.h index d98e62eea..bee800602 100644 --- a/src/core/libraries/videoout/driver.h +++ b/src/core/libraries/videoout/driver.h @@ -3,10 +3,13 @@ #pragma once +#include "common/debug.h" +#include "common/polyfill_thread.h" +#include "core/libraries/videoout/video_out.h" + #include #include #include -#include "core/libraries/videoout/video_out.h" namespace Vulkan { struct Frame; @@ -25,6 +28,10 @@ struct VideoOutPort { SceVideoOutVblankStatus vblank_status; std::vector flip_events; std::vector vblank_events; + std::mutex vo_mutex; + std::mutex port_mutex; + std::condition_variable vo_cv; + std::condition_variable vblank_cv; int flip_rate = 0; s32 FindFreeGroup() const { @@ -35,6 +42,22 @@ struct VideoOutPort { return index; } + bool IsVoLabel(const u64* address) const { + const u64* start = &buffer_labels[0]; + const u64* end = &buffer_labels[MaxDisplayBuffers - 1]; + return address >= start && address <= end; + } + + void WaitVoLabel(auto&& pred) { + std::unique_lock lk{vo_mutex}; + vo_cv.wait(lk, pred); + } + + void SignalVoLabel() { + std::scoped_lock lk{vo_mutex}; + vo_cv.notify_one(); + } + [[nodiscard]] int NumRegisteredBuffers() const { return std::count_if(buffer_slots.cbegin(), buffer_slots.cend(), [](auto& buffer) { return buffer.group_index != -1; }); @@ -63,27 +86,29 @@ public: const BufferAttribute* attribute); int UnregisterBuffers(VideoOutPort* port, s32 attributeIndex); - void Flip(std::chrono::microseconds timeout); bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop = false); - void Vblank(); - private: struct Request { Vulkan::Frame* frame; VideoOutPort* port; s32 index; s64 flip_arg; - u64 submit_tsc; bool eop; + + operator bool() const noexcept { + return frame != nullptr; + } }; + std::chrono::microseconds Flip(const Request& req); + void SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop = false); + void PresentThread(std::stop_token token); + std::mutex mutex; VideoOutPort main_port{}; - std::condition_variable_any submit_cond; - std::condition_variable done_cond; + std::jthread present_thread; std::queue requests; - bool is_neo{}; }; } // namespace Libraries::VideoOut diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp index 8fbd69c4d..ab9eac76f 100644 --- a/src/core/libraries/videoout/video_out.cpp +++ b/src/core/libraries/videoout/video_out.cpp @@ -113,7 +113,9 @@ s32 PS4_SYSV_ABI sceVideoOutSetFlipRate(s32 handle, s32 rate) { s32 PS4_SYSV_ABI sceVideoOutIsFlipPending(s32 handle) { LOG_INFO(Lib_VideoOut, "called"); - s32 pending = driver->GetPort(handle)->flip_status.flipPendingNum; + auto* port = driver->GetPort(handle); + std::unique_lock lock{port->port_mutex}; + s32 pending = port->flip_status.flipPendingNum; return pending; } @@ -161,6 +163,7 @@ s32 PS4_SYSV_ABI sceVideoOutGetFlipStatus(s32 handle, FlipStatus* status) { return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE; } + std::unique_lock lock{port->port_mutex}; *status = port->flip_status; LOG_INFO(Lib_VideoOut, @@ -183,6 +186,7 @@ s32 PS4_SYSV_ABI sceVideoOutGetVblankStatus(int handle, SceVideoOutVblankStatus* return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE; } + std::unique_lock lock{port->vo_mutex}; *status = port->vblank_status; return ORBIS_OK; } @@ -196,7 +200,6 @@ s32 PS4_SYSV_ABI sceVideoOutGetResolutionStatus(s32 handle, SceVideoOutResolutio s32 PS4_SYSV_ABI sceVideoOutOpen(SceUserServiceUserId userId, s32 busType, s32 index, const void* param) { LOG_INFO(Lib_VideoOut, "called"); - ASSERT(userId == UserService::ORBIS_USER_SERVICE_USER_ID_SYSTEM || userId == 0); ASSERT(busType == SCE_VIDEO_OUT_BUS_TYPE_MAIN); if (index != 0) { @@ -229,14 +232,6 @@ s32 PS4_SYSV_ABI sceVideoOutUnregisterBuffers(s32 handle, s32 attributeIndex) { return driver->UnregisterBuffers(port, attributeIndex); } -void Flip(std::chrono::microseconds micros) { - return driver->Flip(micros); -} - -void Vblank() { - return driver->Vblank(); -} - void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr) { auto* port = driver->GetPort(handle); ASSERT(port); @@ -266,6 +261,18 @@ s32 PS4_SYSV_ABI sceVideoOutGetDeviceCapabilityInfo( return ORBIS_OK; } +s32 PS4_SYSV_ABI sceVideoOutWaitVblank(s32 handle) { + auto* port = driver->GetPort(handle); + if (!port) { + return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE; + } + + std::unique_lock lock{port->vo_mutex}; + const auto prev_counter = port->vblank_status.count; + port->vblank_cv.wait(lock, [&]() { return prev_counter != port->vblank_status.count; }); + return ORBIS_OK; +} + void RegisterLib(Core::Loader::SymbolsResolver* sym) { driver = std::make_unique(Config::getScreenWidth(), Config::getScreenHeight()); @@ -294,6 +301,7 @@ void RegisterLib(Core::Loader::SymbolsResolver* sym) { sceVideoOutGetVblankStatus); LIB_FUNCTION("kGVLc3htQE8", "libSceVideoOut", 1, "libSceVideoOut", 0, 0, sceVideoOutGetDeviceCapabilityInfo); + LIB_FUNCTION("j6RaAUlaLv0", "libSceVideoOut", 1, "libSceVideoOut", 0, 0, sceVideoOutWaitVblank); // openOrbis appears to have libSceVideoOut_v1 module libSceVideoOut_v1.1 LIB_FUNCTION("Up36PTk687E", "libSceVideoOut", 1, "libSceVideoOut", 1, 1, sceVideoOutOpen); diff --git a/src/core/libraries/videoout/video_out.h b/src/core/libraries/videoout/video_out.h index 52426eccb..b4423efdf 100644 --- a/src/core/libraries/videoout/video_out.h +++ b/src/core/libraries/videoout/video_out.h @@ -92,11 +92,12 @@ void PS4_SYSV_ABI sceVideoOutSetBufferAttribute(BufferAttribute* attribute, Pixe u32 tilingMode, u32 aspectRatio, u32 width, u32 height, u32 pitchInPixel); s32 PS4_SYSV_ABI sceVideoOutAddFlipEvent(Kernel::SceKernelEqueue eq, s32 handle, void* udata); -s32 PS4_SYSV_ABI sceVideoOutAddVBlankEvent(Kernel::SceKernelEqueue eq, s32 handle, void* udata); +s32 PS4_SYSV_ABI sceVideoOutAddVblankEvent(Kernel::SceKernelEqueue eq, s32 handle, void* udata); s32 PS4_SYSV_ABI sceVideoOutRegisterBuffers(s32 handle, s32 startIndex, void* const* addresses, s32 bufferNum, const BufferAttribute* attribute); s32 PS4_SYSV_ABI sceVideoOutSetFlipRate(s32 handle, s32 rate); s32 PS4_SYSV_ABI sceVideoOutIsFlipPending(s32 handle); +s32 PS4_SYSV_ABI sceVideoOutWaitVblank(s32 handle); s32 PS4_SYSV_ABI sceVideoOutSubmitFlip(s32 handle, s32 bufferIndex, s32 flipMode, s64 flipArg); s32 PS4_SYSV_ABI sceVideoOutGetFlipStatus(s32 handle, FlipStatus* status); s32 PS4_SYSV_ABI sceVideoOutGetResolutionStatus(s32 handle, SceVideoOutResolutionStatus* status); @@ -104,9 +105,6 @@ s32 PS4_SYSV_ABI sceVideoOutOpen(SceUserServiceUserId userId, s32 busType, s32 i const void* param); s32 PS4_SYSV_ABI sceVideoOutClose(s32 handle); -void Flip(std::chrono::microseconds micros); -void Vblank(); - // Internal system functions void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr); s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void** unk); diff --git a/src/core/linker.cpp b/src/core/linker.cpp index 2e47d17d6..d4a15825b 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -305,7 +305,8 @@ void* Linker::TlsGetAddr(u64 module_index, u64 offset) { // Module was just loaded by above code. Allocate TLS block for it. Module* module = m_modules[module_index - 1].get(); const u32 init_image_size = module->tls.init_image_size; - u8* dest = reinterpret_cast(heap_api_func(module->tls.image_size)); + // TODO: Determine if Windows will crash from this + u8* dest = reinterpret_cast(heap_api->heap_malloc(module->tls.image_size)); const u8* src = reinterpret_cast(module->tls.image_virtual_addr); std::memcpy(dest, src, init_image_size); std::memset(dest + init_image_size, 0, module->tls.image_size - init_image_size); @@ -320,21 +321,38 @@ void Linker::InitTlsForThread(bool is_primary) { static constexpr size_t TlsAllocAlign = 0x20; const size_t total_tls_size = Common::AlignUp(static_tls_size, TlsAllocAlign) + TcbSize; + // If sceKernelMapNamedFlexibleMemory is being called from libkernel and addr = 0 + // it automatically places mappings in system reserved area instead of managed. + static constexpr VAddr KernelAllocBase = 0x880000000ULL; + // The kernel module has a few different paths for TLS allocation. // For SDK < 1.7 it allocates both main and secondary thread blocks using libc mspace/malloc. // In games compiled with newer SDK, the main thread gets mapped from flexible memory, // with addr = 0, so system managed area. Here we will only implement the latter. - void* addr_out{}; + void* addr_out{reinterpret_cast(KernelAllocBase)}; if (is_primary) { const size_t tls_aligned = Common::AlignUp(total_tls_size, 16_KB); const int ret = Libraries::Kernel::sceKernelMapNamedFlexibleMemory( &addr_out, tls_aligned, 3, 0, "SceKernelPrimaryTcbTls"); ASSERT_MSG(ret == 0, "Unable to allocate TLS+TCB for the primary thread"); } else { - if (heap_api_func) { - addr_out = heap_api_func(total_tls_size); + if (heap_api) { +#ifndef WIN32 + addr_out = heap_api->heap_malloc(total_tls_size); } else { addr_out = std::malloc(total_tls_size); +#else + // TODO: Windows tls malloc replacement, refer to rtld_tls_block_malloc + LOG_ERROR(Core_Linker, "TLS user malloc called, using std::malloc"); + addr_out = std::malloc(total_tls_size); + if (!addr_out) { + auto pth_id = pthread_self(); + auto handle = pthread_gethandle(pth_id); + ASSERT_MSG(addr_out, + "Cannot allocate TLS block defined for handle=%x, index=%d size=%d", + handle, pth_id, total_tls_size); + } +#endif } } diff --git a/src/core/linker.h b/src/core/linker.h index aee8c8fd3..ed1fe400c 100644 --- a/src/core/linker.h +++ b/src/core/linker.h @@ -46,7 +46,21 @@ struct EntryParams { const char* argv[3]; }; -using HeapApiFunc = PS4_SYSV_ABI void* (*)(size_t); +struct HeapAPI { + PS4_SYSV_ABI void* (*heap_malloc)(size_t); + PS4_SYSV_ABI void (*heap_free)(void*); + PS4_SYSV_ABI void* (*heap_calloc)(size_t, size_t); + PS4_SYSV_ABI void* (*heap_realloc)(void*, size_t); + PS4_SYSV_ABI void* (*heap_memalign)(size_t, size_t); + PS4_SYSV_ABI int (*heap_posix_memalign)(void**, size_t, size_t); + // NOTE: Fields below may be inaccurate + PS4_SYSV_ABI int (*heap_reallocalign)(void); + PS4_SYSV_ABI void (*heap_malloc_stats)(void); + PS4_SYSV_ABI int (*heap_malloc_stats_fast)(void); + PS4_SYSV_ABI size_t (*heap_malloc_usable_size)(void*); +}; + +using AppHeapAPI = HeapAPI*; class Linker { public: @@ -75,8 +89,8 @@ public: } } - void SetHeapApiFunc(void* func) { - heap_api_func = *reinterpret_cast(func); + void SetHeapAPI(void* func[]) { + heap_api = reinterpret_cast(func); } void AdvanceGenerationCounter() noexcept { @@ -104,7 +118,7 @@ private: size_t static_tls_size{}; u32 max_tls_index{}; u32 num_static_modules{}; - HeapApiFunc heap_api_func{}; + AppHeapAPI heap_api{}; std::vector> m_modules; Loader::SymbolsResolver m_hle_symbols{}; }; diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 9326ccaad..6d0d581f9 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -4,11 +4,10 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/debug.h" -#include "common/scope_exit.h" #include "core/libraries/error_codes.h" #include "core/libraries/kernel/memory_management.h" #include "core/memory.h" -#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" namespace Core { @@ -55,7 +54,7 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size, free_addr = alignment > 0 ? Common::AlignUp(free_addr, alignment) : free_addr; // Add the allocated region to the list and commit its pages. - auto& area = AddDmemAllocation(free_addr, size); + auto& area = CarveDmemArea(free_addr, size)->second; area.memory_type = memory_type; area.is_free = false; return free_addr; @@ -64,9 +63,8 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size, void MemoryManager::Free(PAddr phys_addr, size_t size) { std::scoped_lock lk{mutex}; - const auto dmem_area = FindDmemArea(phys_addr); - ASSERT(dmem_area != dmem_map.end() && dmem_area->second.base == phys_addr && - dmem_area->second.size == size); + auto dmem_area = CarveDmemArea(phys_addr, size); + ASSERT(dmem_area != dmem_map.end() && dmem_area->second.size >= size); // Release any dmem mappings that reference this physical block. std::vector> remove_list; @@ -75,10 +73,11 @@ void MemoryManager::Free(PAddr phys_addr, size_t size) { continue; } if (mapping.phys_base <= phys_addr && phys_addr < mapping.phys_base + mapping.size) { - LOG_INFO(Kernel_Vmm, "Unmaping direct mapping {:#x} with size {:#x}", addr, - mapping.size); + auto vma_segment_start_addr = phys_addr - mapping.phys_base + addr; + LOG_INFO(Kernel_Vmm, "Unmaping direct mapping {:#x} with size {:#x}", + vma_segment_start_addr, size); // Unmaping might erase from vma_map. We can't do it here. - remove_list.emplace_back(addr, mapping.size); + remove_list.emplace_back(vma_segment_start_addr, size); } } for (const auto& [addr, size] : remove_list) { @@ -100,29 +99,30 @@ int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, Mem alignment = alignment > 0 ? alignment : 16_KB; VAddr mapped_addr = alignment > 0 ? Common::AlignUp(virtual_addr, alignment) : virtual_addr; + // Fixed mapping means the virtual address must exactly match the provided one. + if (True(flags & MemoryMapFlags::Fixed)) { + const auto& vma = FindVMA(mapped_addr)->second; + // If the VMA is mapped, unmap the region first. + if (vma.IsMapped()) { + UnmapMemory(mapped_addr, size); + } + const size_t remaining_size = vma.base + vma.size - mapped_addr; + ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size); + } + // Find the first free area starting with provided virtual address. if (False(flags & MemoryMapFlags::Fixed)) { - auto it = FindVMA(mapped_addr); - // If the VMA is free and contains the requested mapping we are done. - if (it->second.type == VMAType::Free && it->second.Contains(virtual_addr, size)) { - mapped_addr = virtual_addr; - } else { - // Search for the first free VMA that fits our mapping. - while (it->second.type != VMAType::Free || it->second.size < size) { - it++; - } - ASSERT(it != vma_map.end()); - const auto& vma = it->second; - mapped_addr = alignment > 0 ? Common::AlignUp(vma.base, alignment) : vma.base; - } + mapped_addr = SearchFree(mapped_addr, size, alignment); } // Add virtual memory area - auto& new_vma = AddMapping(mapped_addr, size); + const auto new_vma_handle = CarveVMA(mapped_addr, size); + auto& new_vma = new_vma_handle->second; new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce); new_vma.prot = MemoryProt::NoAccess; new_vma.name = ""; new_vma.type = VMAType::Reserved; + MergeAdjacent(vma_map, new_vma_handle); *out_addr = std::bit_cast(mapped_addr); return ORBIS_OK; @@ -132,6 +132,9 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M MemoryMapFlags flags, VMAType type, std::string_view name, bool is_exec, PAddr phys_addr, u64 alignment) { std::scoped_lock lk{mutex}; + + // Certain games perform flexible mappings on loop to determine + // the available flexible memory size. Questionable but we need to handle this. if (type == VMAType::Flexible && flexible_usage + size > total_flexible_size) { return SCE_KERNEL_ERROR_ENOMEM; } @@ -140,91 +143,64 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M // flag so we will take the branch that searches for free (or reserved) mappings. virtual_addr = (virtual_addr == 0) ? impl.SystemManagedVirtualBase() : virtual_addr; alignment = alignment > 0 ? alignment : 16_KB; - VAddr mapped_addr = alignment > 0 ? Common::AlignUp(virtual_addr, alignment) : virtual_addr; - SCOPE_EXIT { - auto& new_vma = AddMapping(mapped_addr, size); - new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce); - new_vma.prot = prot; - new_vma.name = name; - new_vma.type = type; - - if (type == VMAType::Direct) { - new_vma.phys_base = phys_addr; - MapVulkanMemory(mapped_addr, size); - } - if (type == VMAType::Flexible) { - flexible_usage += size; - } - }; // Fixed mapping means the virtual address must exactly match the provided one. - if (True(flags & MemoryMapFlags::Fixed) && True(flags & MemoryMapFlags::NoOverwrite)) { + if (True(flags & MemoryMapFlags::Fixed)) { // This should return SCE_KERNEL_ERROR_ENOMEM but shouldn't normally happen. const auto& vma = FindVMA(mapped_addr)->second; const size_t remaining_size = vma.base + vma.size - mapped_addr; - ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size); + ASSERT_MSG(!vma.IsMapped() && remaining_size >= size); } // Find the first free area starting with provided virtual address. if (False(flags & MemoryMapFlags::Fixed)) { - auto it = FindVMA(mapped_addr); - // If the VMA is free and contains the requested mapping we are done. - if (it->second.type == VMAType::Free && it->second.Contains(virtual_addr, size)) { - mapped_addr = virtual_addr; - } else { - // Search for the first free VMA that fits our mapping. - while (it->second.type != VMAType::Free || it->second.size < size) { - it++; - } - ASSERT(it != vma_map.end()); - const auto& vma = it->second; - mapped_addr = alignment > 0 ? Common::AlignUp(vma.base, alignment) : vma.base; - } + mapped_addr = SearchFree(mapped_addr, size, alignment); } // Perform the mapping. *out_addr = impl.Map(mapped_addr, size, alignment, phys_addr, is_exec); TRACK_ALLOC(*out_addr, size, "VMEM"); + + auto& new_vma = CarveVMA(mapped_addr, size)->second; + new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce); + new_vma.prot = prot; + new_vma.name = name; + new_vma.type = type; + new_vma.is_exec = is_exec; + + if (type == VMAType::Direct) { + new_vma.phys_base = phys_addr; + rasterizer->MapMemory(mapped_addr, size); + } + if (type == VMAType::Flexible) { + flexible_usage += size; + } + return ORBIS_OK; } int MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot, MemoryMapFlags flags, uintptr_t fd, size_t offset) { - if (virtual_addr == 0) { - virtual_addr = impl.SystemManagedVirtualBase(); - } else { - LOG_INFO(Kernel_Vmm, "Virtual addr {:#x} with size {:#x}", virtual_addr, size); - } - - VAddr mapped_addr = 0; + VAddr mapped_addr = (virtual_addr == 0) ? impl.SystemManagedVirtualBase() : virtual_addr; const size_t size_aligned = Common::AlignUp(size, 16_KB); // Find first free area to map the file. if (False(flags & MemoryMapFlags::Fixed)) { - auto it = FindVMA(virtual_addr); - while (it->second.type != VMAType::Free || it->second.size < size_aligned) { - it++; - } - ASSERT(it != vma_map.end()); - - mapped_addr = it->second.base; + mapped_addr = SearchFree(mapped_addr, size_aligned, 1); } if (True(flags & MemoryMapFlags::Fixed)) { const auto& vma = FindVMA(virtual_addr)->second; const size_t remaining_size = vma.base + vma.size - virtual_addr; - ASSERT_MSG((vma.type == VMAType::Free || vma.type == VMAType::Reserved) && - remaining_size >= size); - - mapped_addr = virtual_addr; + ASSERT_MSG(!vma.IsMapped() && remaining_size >= size); } // Map the file. impl.MapFile(mapped_addr, size, offset, std::bit_cast(prot), fd); // Add virtual memory area - auto& new_vma = AddMapping(mapped_addr, size_aligned); + auto& new_vma = CarveVMA(mapped_addr, size_aligned)->second; new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce); new_vma.prot = prot; new_vma.name = "File"; @@ -238,29 +214,38 @@ int MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, size_t size, Mem void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) { std::scoped_lock lk{mutex}; - // TODO: Partial unmaps are technically supported by the guest. - const auto it = vma_map.find(virtual_addr); - ASSERT_MSG(it != vma_map.end() && it->first == virtual_addr, - "Attempting to unmap partially mapped range"); + const auto it = FindVMA(virtual_addr); + const auto& vma_base = it->second; + ASSERT_MSG(vma_base.Contains(virtual_addr, size), + "Existing mapping does not contain requested unmap range"); - const auto type = it->second.type; + const auto vma_base_addr = vma_base.base; + const auto vma_base_size = vma_base.size; + const auto phys_base = vma_base.phys_base; + const bool is_exec = vma_base.is_exec; + const auto start_in_vma = virtual_addr - vma_base_addr; + const auto type = vma_base.type; const bool has_backing = type == VMAType::Direct || type == VMAType::File; if (type == VMAType::Direct) { - UnmapVulkanMemory(virtual_addr, size); + rasterizer->UnmapMemory(virtual_addr, size); } if (type == VMAType::Flexible) { flexible_usage -= size; } // Mark region as free and attempt to coalesce it with neighbours. - auto& vma = it->second; + const auto new_it = CarveVMA(virtual_addr, size); + auto& vma = new_it->second; vma.type = VMAType::Free; vma.prot = MemoryProt::NoAccess; vma.phys_base = 0; - MergeAdjacent(vma_map, it); + vma.disallow_merge = false; + vma.name = ""; + MergeAdjacent(vma_map, new_it); // Unmap the memory region. - impl.Unmap(virtual_addr, size, has_backing); + impl.Unmap(vma_base_addr, vma_base_size, start_in_vma, start_in_vma + size, phys_base, is_exec, + has_backing); TRACK_FREE(virtual_addr, "VMEM"); } @@ -284,7 +269,7 @@ int MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* pr } int MemoryManager::VirtualQuery(VAddr addr, int flags, - Libraries::Kernel::OrbisVirtualQueryInfo* info) { + ::Libraries::Kernel::OrbisVirtualQueryInfo* info) { std::scoped_lock lk{mutex}; auto it = FindVMA(addr); @@ -302,6 +287,7 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags, info->is_flexible.Assign(vma.type == VMAType::Flexible); info->is_direct.Assign(vma.type == VMAType::Direct); info->is_commited.Assign(vma.type != VMAType::Free); + vma.name.copy(info->name.data(), std::min(info->name.size(), vma.name.size())); if (vma.type == VMAType::Direct) { const auto dmem_it = FindDmemArea(vma.phys_base); ASSERT(dmem_it != dmem_map.end()); @@ -313,7 +299,7 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags, } int MemoryManager::DirectMemoryQuery(PAddr addr, bool find_next, - Libraries::Kernel::OrbisQueryInfo* out_info) { + ::Libraries::Kernel::OrbisQueryInfo* out_info) { std::scoped_lock lk{mutex}; auto dmem_area = FindDmemArea(addr); @@ -353,21 +339,53 @@ int MemoryManager::DirectQueryAvailable(PAddr search_start, PAddr search_end, si return ORBIS_OK; } -std::pair MemoryManager::GetVulkanBuffer(VAddr addr) { - auto it = mapped_memories.upper_bound(addr); - it = std::prev(it); - ASSERT(it != mapped_memories.end() && it->first <= addr); - return std::make_pair(*it->second.buffer, addr - it->first); +void MemoryManager::NameVirtualRange(VAddr virtual_addr, size_t size, std::string_view name) { + auto it = FindVMA(virtual_addr); + + ASSERT_MSG(it->second.Contains(virtual_addr, size), + "Range provided is not fully containted in vma"); + it->second.name = name; +} +VAddr MemoryManager::SearchFree(VAddr virtual_addr, size_t size, u32 alignment) { + // If the requested address is below the mapped range, start search from the lowest address + auto min_search_address = impl.SystemManagedVirtualBase(); + if (virtual_addr < min_search_address) { + virtual_addr = min_search_address; + } + + auto it = FindVMA(virtual_addr); + ASSERT_MSG(it != vma_map.end(), "Specified mapping address was not found!"); + + // If the VMA is free and contains the requested mapping we are done. + if (it->second.IsFree() && it->second.Contains(virtual_addr, size)) { + return virtual_addr; + } + // Search for the first free VMA that fits our mapping. + const auto is_suitable = [&] { + if (!it->second.IsFree()) { + return false; + } + const auto& vma = it->second; + virtual_addr = Common::AlignUp(vma.base, alignment); + // Sometimes the alignment itself might be larger than the VMA. + if (virtual_addr > vma.base + vma.size) { + return false; + } + const size_t remaining_size = vma.base + vma.size - virtual_addr; + return remaining_size >= size; + }; + while (!is_suitable()) { + it++; + } + return virtual_addr; } -VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) { +MemoryManager::VMAHandle MemoryManager::CarveVMA(VAddr virtual_addr, size_t size) { auto vma_handle = FindVMA(virtual_addr); ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map"); const VirtualMemoryArea& vma = vma_handle->second; - ASSERT_MSG((vma.type == VMAType::Free || vma.type == VMAType::Reserved) && - vma.base <= virtual_addr, - "Adding a mapping to already mapped region"); + ASSERT_MSG(vma.base <= virtual_addr, "Adding a mapping to already mapped region"); const VAddr start_in_vma = virtual_addr - vma.base; const VAddr end_in_vma = start_in_vma + size; @@ -382,16 +400,15 @@ VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) { vma_handle = Split(vma_handle, start_in_vma); } - return vma_handle->second; + return vma_handle; } -DirectMemoryArea& MemoryManager::AddDmemAllocation(PAddr addr, size_t size) { +MemoryManager::DMemHandle MemoryManager::CarveDmemArea(PAddr addr, size_t size) { auto dmem_handle = FindDmemArea(addr); ASSERT_MSG(dmem_handle != dmem_map.end(), "Physical address not in dmem_map"); const DirectMemoryArea& area = dmem_handle->second; - ASSERT_MSG(area.is_free && area.base <= addr, - "Adding an allocation to already allocated region"); + ASSERT_MSG(area.base <= addr, "Adding an allocation to already allocated region"); const PAddr start_in_area = addr - area.base; const PAddr end_in_vma = start_in_area + size; @@ -406,7 +423,7 @@ DirectMemoryArea& MemoryManager::AddDmemAllocation(PAddr addr, size_t size) { dmem_handle = Split(dmem_handle, start_in_area); } - return dmem_handle->second; + return dmem_handle; } MemoryManager::VMAHandle MemoryManager::Split(VMAHandle vma_handle, size_t offset_in_vma) { @@ -436,85 +453,6 @@ MemoryManager::DMemHandle MemoryManager::Split(DMemHandle dmem_handle, size_t of return dmem_map.emplace_hint(std::next(dmem_handle), new_area.base, new_area); }; -void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) { - return; - const vk::Device device = instance->GetDevice(); - const auto memory_props = instance->GetPhysicalDevice().getMemoryProperties(); - void* host_pointer = reinterpret_cast(addr); - const auto host_mem_props = device.getMemoryHostPointerPropertiesEXT( - vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, host_pointer); - ASSERT(host_mem_props.memoryTypeBits != 0); - - int mapped_memory_type = -1; - auto find_mem_type_with_flag = [&](const vk::MemoryPropertyFlags flags) { - u32 host_mem_types = host_mem_props.memoryTypeBits; - while (host_mem_types != 0) { - // Try to find a cached memory type - mapped_memory_type = std::countr_zero(host_mem_types); - host_mem_types -= (1 << mapped_memory_type); - - if ((memory_props.memoryTypes[mapped_memory_type].propertyFlags & flags) == flags) { - return; - } - } - - mapped_memory_type = -1; - }; - - // First try to find a memory that is both coherent and cached - find_mem_type_with_flag(vk::MemoryPropertyFlagBits::eHostCoherent | - vk::MemoryPropertyFlagBits::eHostCached); - if (mapped_memory_type == -1) - // Then only coherent (lower performance) - find_mem_type_with_flag(vk::MemoryPropertyFlagBits::eHostCoherent); - - if (mapped_memory_type == -1) { - LOG_CRITICAL(Render_Vulkan, "No coherent memory available for memory mapping"); - mapped_memory_type = std::countr_zero(host_mem_props.memoryTypeBits); - } - - const vk::StructureChain alloc_info = { - vk::MemoryAllocateInfo{ - .allocationSize = size, - .memoryTypeIndex = static_cast(mapped_memory_type), - }, - vk::ImportMemoryHostPointerInfoEXT{ - .handleType = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, - .pHostPointer = host_pointer, - }, - }; - - const auto [it, new_memory] = mapped_memories.try_emplace(addr); - ASSERT_MSG(new_memory, "Attempting to remap already mapped vulkan memory"); - - auto& memory = it->second; - memory.backing = device.allocateMemoryUnique(alloc_info.get()); - - constexpr vk::BufferUsageFlags MapFlags = - vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer | - vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst | - vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer; - - const vk::StructureChain buffer_info = { - vk::BufferCreateInfo{ - .size = size, - .usage = MapFlags, - .sharingMode = vk::SharingMode::eExclusive, - }, - vk::ExternalMemoryBufferCreateInfoKHR{ - .handleTypes = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, - }}; - memory.buffer = device.createBufferUnique(buffer_info.get()); - device.bindBufferMemory(*memory.buffer, *memory.backing, 0); -} - -void MemoryManager::UnmapVulkanMemory(VAddr addr, size_t size) { - return; - const auto it = mapped_memories.find(addr); - ASSERT(it != mapped_memories.end() && it->second.buffer_size == size); - mapped_memories.erase(it); -} - int MemoryManager::GetDirectMemoryType(PAddr addr, int* directMemoryTypeOut, void** directMemoryStartOut, void** directMemoryEndOut) { std::scoped_lock lk{mutex}; diff --git a/src/core/memory.h b/src/core/memory.h index 93aef2d8c..d58269678 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -3,20 +3,17 @@ #pragma once -#include +#include #include #include -#include -#include #include "common/enum.h" #include "common/singleton.h" #include "common/types.h" #include "core/address_space.h" #include "core/libraries/kernel/memory_management.h" -#include "video_core/renderer_vulkan/vk_common.h" namespace Vulkan { -class Instance; +class Rasterizer; } namespace Libraries::Kernel { @@ -87,9 +84,18 @@ struct VirtualMemoryArea { bool disallow_merge = false; std::string name = ""; uintptr_t fd = 0; + bool is_exec = false; bool Contains(VAddr addr, size_t size) const { - return addr >= base && (addr + size) < (base + this->size); + return addr >= base && (addr + size) <= (base + this->size); + } + + bool IsFree() const noexcept { + return type == VMAType::Free; + } + + bool IsMapped() const noexcept { + return type != VMAType::Free && type != VMAType::Reserved; } bool CanMergeWith(const VirtualMemoryArea& next) const { @@ -120,8 +126,8 @@ public: explicit MemoryManager(); ~MemoryManager(); - void SetInstance(const Vulkan::Instance* instance_) { - instance = instance_; + void SetRasterizer(Vulkan::Rasterizer* rasterizer_) { + rasterizer = rasterizer_; } void SetTotalFlexibleSize(u64 size) { @@ -132,9 +138,7 @@ public: return total_flexible_size - flexible_usage; } - /// Returns the offset of the mapped virtual system managed memory base from where it usually - /// would be mapped. - [[nodiscard]] VAddr SystemReservedVirtualBase() noexcept { + VAddr SystemReservedVirtualBase() noexcept { return impl.SystemReservedVirtualBase(); } @@ -164,11 +168,11 @@ public: int DirectQueryAvailable(PAddr search_start, PAddr search_end, size_t alignment, PAddr* phys_addr_out, size_t* size_out); - std::pair GetVulkanBuffer(VAddr addr); - int GetDirectMemoryType(PAddr addr, int* directMemoryTypeOut, void** directMemoryStartOut, void** directMemoryEndOut); + void NameVirtualRange(VAddr virtual_addr, size_t size, std::string_view name); + private: VMAHandle FindVMA(VAddr target) { return std::prev(vma_map.upper_bound(target)); @@ -198,18 +202,16 @@ private: return iter; } - VirtualMemoryArea& AddMapping(VAddr virtual_addr, size_t size); + VAddr SearchFree(VAddr virtual_addr, size_t size, u32 alignment = 0); - DirectMemoryArea& AddDmemAllocation(PAddr addr, size_t size); + VMAHandle CarveVMA(VAddr virtual_addr, size_t size); + + DMemHandle CarveDmemArea(PAddr addr, size_t size); VMAHandle Split(VMAHandle vma_handle, size_t offset_in_vma); DMemHandle Split(DMemHandle dmem_handle, size_t offset_in_area); - void MapVulkanMemory(VAddr addr, size_t size); - - void UnmapVulkanMemory(VAddr addr, size_t size); - private: AddressSpace impl; DMemMap dmem_map; @@ -217,14 +219,7 @@ private: std::recursive_mutex mutex; size_t total_flexible_size = 448_MB; size_t flexible_usage{}; - - struct MappedMemory { - vk::UniqueBuffer buffer; - vk::UniqueDeviceMemory backing; - size_t buffer_size; - }; - std::map mapped_memories; - const Vulkan::Instance* instance{}; + Vulkan::Rasterizer* rasterizer{}; }; using Memory = Common::Singleton; diff --git a/src/core/module.cpp b/src/core/module.cpp index d885b917f..775e1ef19 100644 --- a/src/core/module.cpp +++ b/src/core/module.cpp @@ -88,6 +88,7 @@ void Module::LoadModuleToMemory(u32& max_tls_index) { aligned_base_size + TrampolineSize, MemoryProt::CpuReadWrite, MemoryMapFlags::Fixed, VMAType::Code, name, true); LoadOffset += CODE_BASE_INCR * (1 + aligned_base_size / CODE_BASE_INCR); + LOG_INFO(Core_Linker, "Loading module {} to {}", name, fmt::ptr(*out_addr)); // Initialize trampoline generator. void* trampoline_addr = std::bit_cast(base_virtual_addr + aligned_base_size); diff --git a/src/emulator.cpp b/src/emulator.cpp index 5e584eee9..4990b4aab 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -1,29 +1,33 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include -#include -#include -#include -#include -#include -#include -#include #include + #include "common/config.h" #include "common/debug.h" #include "common/logging/backend.h" +#include "common/logging/log.h" #include "common/ntapi.h" #include "common/path_util.h" #include "common/polyfill_thread.h" +#include "common/scm_rev.h" #include "common/singleton.h" #include "common/version.h" +#include "core/file_format/playgo_chunk.h" +#include "core/file_format/psf.h" +#include "core/file_format/splash.h" #include "core/file_sys/fs.h" +#include "core/libraries/disc_map/disc_map.h" #include "core/libraries/kernel/thread_management.h" +#include "core/libraries/libc/libc.h" +#include "core/libraries/libc_internal/libc_internal.h" #include "core/libraries/libs.h" +#include "core/libraries/rtc/rtc.h" +#include "core/libraries/videoout/video_out.h" #include "core/linker.h" #include "core/memory.h" #include "emulator.h" +#include "video_core/renderdoc.h" Frontend::WindowSDL* g_window = nullptr; @@ -46,14 +50,17 @@ Emulator::Emulator() { Common::Log::Initialize(); Common::Log::Start(); LOG_INFO(Loader, "Starting shadps4 emulator v{} ", Common::VERSION); + LOG_INFO(Loader, "Revision {}", Common::g_scm_rev); + LOG_INFO(Loader, "Branch {}", Common::g_scm_branch); + LOG_INFO(Loader, "Description {}", Common::g_scm_desc); // Defer until after logging is initialized. memory = Core::Memory::Instance(); controller = Common::Singleton::Instance(); linker = Common::Singleton::Instance(); - window = std::make_unique(WindowWidth, WindowHeight, controller); - g_window = window.get(); + // Load renderdoc module. + VideoCore::LoadRenderDoc(); } Emulator::~Emulator() { @@ -68,6 +75,8 @@ void Emulator::Run(const std::filesystem::path& file) { // Loading param.sfo file if exists std::string id; + std::string title; + std::string app_version; std::filesystem::path sce_sys_folder = file.parent_path() / "sce_sys"; if (std::filesystem::is_directory(sce_sys_folder)) { for (const auto& entry : std::filesystem::directory_iterator(sce_sys_folder)) { @@ -75,11 +84,14 @@ void Emulator::Run(const std::filesystem::path& file) { auto* param_sfo = Common::Singleton::Instance(); param_sfo->open(sce_sys_folder.string() + "/param.sfo", {}); id = std::string(param_sfo->GetString("CONTENT_ID"), 7, 9); - std::string title(param_sfo->GetString("TITLE")); + title = param_sfo->GetString("TITLE"); LOG_INFO(Loader, "Game id: {} Title: {}", id, title); u32 fw_version = param_sfo->GetInteger("SYSTEM_VER"); - std::string app_version = param_sfo->GetString("APP_VER"); + app_version = param_sfo->GetString("APP_VER"); LOG_INFO(Loader, "Fw: {:#x} App Version: {}", fw_version, app_version); + } else if (entry.path().filename() == "playgo-chunk.dat") { + auto* playgo = Common::Singleton::Instance(); + playgo->Open(sce_sys_folder.string() + "/playgo-chunk.dat"); } else if (entry.path().filename() == "pic0.png" || entry.path().filename() == "pic1.png") { auto* splash = Common::Singleton::Instance(); @@ -93,6 +105,19 @@ void Emulator::Run(const std::filesystem::path& file) { } } + std::string game_title = fmt::format("{} - {} <{}>", id, title, app_version); + std::string window_title = ""; + if (Common::isRelease) { + window_title = fmt::format("shadPS4 v{} | {}", Common::VERSION, game_title); + } else { + window_title = + fmt::format("shadPS4 v{} {} | {}", Common::VERSION, Common::g_scm_desc, game_title); + } + window = + std::make_unique(WindowWidth, WindowHeight, controller, window_title); + + g_window = window.get(); + const auto& mount_data_dir = Common::FS::GetUserPath(Common::FS::PathType::GameDataDir) / id; if (!std::filesystem::exists(mount_data_dir)) { std::filesystem::create_directory(mount_data_dir); @@ -103,6 +128,20 @@ void Emulator::Run(const std::filesystem::path& file) { std::filesystem::create_directory(mount_temp_dir); } mnt->Mount(mount_temp_dir, "/temp0"); // called in app_content ==> stat/mkdir + mnt->Mount(mount_temp_dir, "/temp"); + + const auto& mount_download_dir = + Common::FS::GetUserPath(Common::FS::PathType::DownloadDir) / id; + if (!std::filesystem::exists(mount_download_dir)) { + std::filesystem::create_directory(mount_download_dir); + } + mnt->Mount(mount_download_dir, "/download0"); + + const auto& mount_captures_dir = Common::FS::GetUserPath(Common::FS::PathType::CapturesDir); + if (!std::filesystem::exists(mount_captures_dir)) { + std::filesystem::create_directory(mount_captures_dir); + } + VideoCore::SetOutputDir(mount_captures_dir.generic_string(), id); // Initialize kernel and library facilities. Libraries::Kernel::init_pthreads(); @@ -136,29 +175,24 @@ void Emulator::Run(const std::filesystem::path& file) { std::jthread mainthread = std::jthread([this](std::stop_token stop_token) { linker->Execute(); }); - // Begin main window loop until the application exits - static constexpr std::chrono::milliseconds FlipPeriod{16}; - while (window->isOpen()) { window->waitEvent(); - Libraries::VideoOut::Flip(FlipPeriod); - Libraries::VideoOut::Vblank(); - FRAME_END; } std::exit(0); } void Emulator::LoadSystemModules(const std::filesystem::path& file) { - constexpr std::array ModulesToLoad{ + constexpr std::array ModulesToLoad{ {{"libSceNgs2.sprx", nullptr}, {"libSceFiber.sprx", nullptr}, {"libSceUlt.sprx", nullptr}, + {"libSceJson.sprx", nullptr}, + {"libSceJson2.sprx", nullptr}, {"libSceLibcInternal.sprx", &Libraries::LibcInternal::RegisterlibSceLibcInternal}, {"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap}, {"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc}, - {"libSceJpegEnc.sprx", nullptr}, - {"libSceJson2.sprx", nullptr}}, + {"libSceJpegEnc.sprx", nullptr}}, }; std::vector found_modules; diff --git a/src/emulator.h b/src/emulator.h index 323170e38..01bce7e7c 100644 --- a/src/emulator.h +++ b/src/emulator.h @@ -6,7 +6,7 @@ #include #include -#include +#include "common/singleton.h" #include "core/linker.h" #include "input/controller.h" #include "sdl_window.h" diff --git a/src/images/shadPS4.icns b/src/images/shadPS4.icns new file mode 100644 index 000000000..0e26368db Binary files /dev/null and b/src/images/shadPS4.icns differ diff --git a/src/images/shadps4.ico b/src/images/shadps4.ico index 4f71b2341..bb50f9995 100644 Binary files a/src/images/shadps4.ico and b/src/images/shadps4.ico differ diff --git a/src/input/controller.cpp b/src/input/controller.cpp index 7bfecadc8..4a3db1633 100644 --- a/src/input/controller.cpp +++ b/src/input/controller.cpp @@ -1,9 +1,11 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "core/libraries/kernel/time_management.h" #include "core/libraries/pad/pad.h" #include "input/controller.h" + namespace Input { GameController::GameController() { @@ -116,4 +118,29 @@ void GameController::Axis(int id, Input::Axis axis, int value) { AddState(state); } +void GameController::SetLightBarRGB(u8 r, u8 g, u8 b) { + if (m_sdl_gamepad != nullptr) { + SDL_SetGamepadLED(m_sdl_gamepad, r, g, b); + } +} + +bool GameController::SetVibration(u8 smallMotor, u8 largeMotor) { + if (m_sdl_gamepad != nullptr) { + return SDL_RumbleGamepad(m_sdl_gamepad, (smallMotor / 255.0f) * 0xFFFF, + (largeMotor / 255.0f) * 0xFFFF, -1) == 0; + } + return true; +} + +void GameController::TryOpenSDLController() { + if (m_sdl_gamepad == nullptr || !SDL_GamepadConnected(m_sdl_gamepad)) { + int gamepad_count; + SDL_JoystickID* gamepads = SDL_GetGamepads(&gamepad_count); + m_sdl_gamepad = gamepad_count > 0 ? SDL_OpenGamepad(gamepads[0]) : nullptr; + SDL_free(gamepads); + } + + SetLightBarRGB(0, 0, 255); +} + } // namespace Input diff --git a/src/input/controller.h b/src/input/controller.h index a16f7dd06..ef0991568 100644 --- a/src/input/controller.h +++ b/src/input/controller.h @@ -6,6 +6,8 @@ #include #include "common/types.h" +struct SDL_Gamepad; + namespace Input { enum class Axis { @@ -43,6 +45,9 @@ public: void CheckButton(int id, u32 button, bool isPressed); void AddState(const State& state); void Axis(int id, Input::Axis axis, int value); + void SetLightBarRGB(u8 r, u8 g, u8 b); + bool SetVibration(u8 smallMotor, u8 largeMotor); + void TryOpenSDLController(); private: struct StateInternal { @@ -57,6 +62,8 @@ private: u32 m_first_state = 0; std::array m_states; std::array m_private; + + SDL_Gamepad* m_sdl_gamepad = nullptr; }; } // namespace Input diff --git a/src/main.cpp b/src/main.cpp index c7210ac57..9df14f138 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,8 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include #include +#include "emulator.h" int main(int argc, char* argv[]) { if (argc == 1) { diff --git a/src/qt_gui/elf_viewer.cpp b/src/qt_gui/elf_viewer.cpp index 1674e1ab8..72861d15f 100644 --- a/src/qt_gui/elf_viewer.cpp +++ b/src/qt_gui/elf_viewer.cpp @@ -2,7 +2,9 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include + #include "elf_viewer.h" + ElfViewer::ElfViewer(QWidget* parent) : QTableWidget(parent) { dir_list_std = Config::getElfViewer(); for (const auto& str : dir_list_std) { diff --git a/src/qt_gui/elf_viewer.h b/src/qt_gui/elf_viewer.h index 15aeb55fd..a3b85223d 100644 --- a/src/qt_gui/elf_viewer.h +++ b/src/qt_gui/elf_viewer.h @@ -14,8 +14,9 @@ #include #include #include + +#include "core/loader/elf.h" #include "game_list_frame.h" -#include "src/core/loader/elf.h" class ElfViewer : public QTableWidget { Q_OBJECT diff --git a/src/qt_gui/game_grid_frame.h b/src/qt_gui/game_grid_frame.h index 19ac531bc..ce775315e 100644 --- a/src/qt_gui/game_grid_frame.h +++ b/src/qt_gui/game_grid_frame.h @@ -14,6 +14,7 @@ #include #include #include + #include "common/config.h" #include "game_info.h" #include "game_list_utils.h" diff --git a/src/qt_gui/game_info.cpp b/src/qt_gui/game_info.cpp index 39cdeb75a..0a472eaef 100644 --- a/src/qt_gui/game_info.cpp +++ b/src/qt_gui/game_info.cpp @@ -5,6 +5,7 @@ #include #include #include + #include "game_info.h" GameInfoClass::GameInfoClass() = default; @@ -42,4 +43,4 @@ void GameInfoClass::GetGameInfo(QWidget* parent) { &QProgressDialog::setValue); dialog.exec(); -} \ No newline at end of file +} diff --git a/src/qt_gui/game_info.h b/src/qt_gui/game_info.h index c137a5a60..b2b102e00 100644 --- a/src/qt_gui/game_info.h +++ b/src/qt_gui/game_info.h @@ -7,6 +7,7 @@ #include #include #include + #include "common/config.h" #include "core/file_format/psf.h" #include "game_list_utils.h" diff --git a/src/qt_gui/game_install_dialog.cpp b/src/qt_gui/game_install_dialog.cpp index ab4fc2734..4b2b8528b 100644 --- a/src/qt_gui/game_install_dialog.cpp +++ b/src/qt_gui/game_install_dialog.cpp @@ -1,8 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include "game_install_dialog.h" - #include #include #include @@ -14,6 +12,8 @@ #include #include +#include "game_install_dialog.h" + GameInstallDialog::GameInstallDialog() : m_gamesDirectory(nullptr) { auto layout = new QVBoxLayout(this); @@ -21,8 +21,8 @@ GameInstallDialog::GameInstallDialog() : m_gamesDirectory(nullptr) { layout->addStretch(); layout->addWidget(SetupDialogActions()); - setWindowTitle("Shadps4 - Choose directory"); - setWindowIcon(QIcon(":/images/shadps4.ico")); + setWindowTitle("shadPS4 - Choose directory"); + setWindowIcon(QIcon(":images/shadps4.ico")); } GameInstallDialog::~GameInstallDialog() {} @@ -47,7 +47,7 @@ QWidget* GameInstallDialog::SetupGamesDirectory() { layout->addWidget(m_gamesDirectory); // Browse button. - auto browse = new QPushButton("..."); + auto browse = new QPushButton("Browse"); connect(browse, &QPushButton::clicked, this, &GameInstallDialog::Browse); diff --git a/src/qt_gui/game_install_dialog.h b/src/qt_gui/game_install_dialog.h index abd447d93..6f439e81d 100644 --- a/src/qt_gui/game_install_dialog.h +++ b/src/qt_gui/game_install_dialog.h @@ -4,6 +4,7 @@ #pragma once #include + #include "common/config.h" #include "common/path_util.h" diff --git a/src/qt_gui/game_list_frame.cpp b/src/qt_gui/game_list_frame.cpp index 4353964fe..2699c9615 100644 --- a/src/qt_gui/game_list_frame.cpp +++ b/src/qt_gui/game_list_frame.cpp @@ -190,17 +190,17 @@ void GameListFrame::SetRegionFlag(int row, int column, QString itemStr) { QTableWidgetItem* item = new QTableWidgetItem(); QImage scaledPixmap; if (itemStr == "Japan") { - scaledPixmap = QImage(":/images/flag_jp.png"); + scaledPixmap = QImage(":images/flag_jp.png"); } else if (itemStr == "Europe") { - scaledPixmap = QImage(":/images/flag_eu.png"); + scaledPixmap = QImage(":images/flag_eu.png"); } else if (itemStr == "USA") { - scaledPixmap = QImage(":/images/flag_us.png"); + scaledPixmap = QImage(":images/flag_us.png"); } else if (itemStr == "Asia") { - scaledPixmap = QImage(":/images/flag_china.png"); + scaledPixmap = QImage(":images/flag_china.png"); } else if (itemStr == "World") { - scaledPixmap = QImage(":/images/flag_world.png"); + scaledPixmap = QImage(":images/flag_world.png"); } else { - scaledPixmap = QImage(":/images/flag_unk.png"); + scaledPixmap = QImage(":images/flag_unk.png"); } QWidget* widget = new QWidget(this); QVBoxLayout* layout = new QVBoxLayout(widget); diff --git a/src/qt_gui/game_list_frame.h b/src/qt_gui/game_list_frame.h index e9f75afd4..d8bccf466 100644 --- a/src/qt_gui/game_list_frame.h +++ b/src/qt_gui/game_list_frame.h @@ -16,6 +16,7 @@ #include #include #include + #include "game_info.h" #include "game_list_utils.h" #include "gui_context_menus.h" diff --git a/src/qt_gui/gui_context_menus.h b/src/qt_gui/gui_context_menus.h index 9f8953607..146d5c34d 100644 --- a/src/qt_gui/gui_context_menus.h +++ b/src/qt_gui/gui_context_menus.h @@ -165,12 +165,12 @@ public: if (createShortcutLinux(linkPath, ebootPath, iconPath)) { #endif QMessageBox::information( - nullptr, "Shortcut Creation", - QString("Shortcut created successfully:\n %1").arg(linkPath)); + nullptr, "Shortcut creation", + QString("Shortcut created successfully!\n %1").arg(linkPath)); } else { QMessageBox::critical( nullptr, "Error", - QString("Error creating shortcut:\n %1").arg(linkPath)); + QString("Error creating shortcut!\n %1").arg(linkPath)); } } else { QMessageBox::critical(nullptr, "Error", "Failed to convert icon."); @@ -183,11 +183,11 @@ public: if (createShortcutLinux(linkPath, ebootPath, iconPath)) { #endif QMessageBox::information( - nullptr, "Shortcut Creation", - QString("Shortcut created successfully:\n %1").arg(linkPath)); + nullptr, "Shortcut creation", + QString("Shortcut created successfully!\n %1").arg(linkPath)); } else { QMessageBox::critical(nullptr, "Error", - QString("Error creating shortcut:\n %1").arg(linkPath)); + QString("Error creating shortcut!\n %1").arg(linkPath)); } } } @@ -308,7 +308,7 @@ private: QFile shortcutFile(linkPath); if (!shortcutFile.open(QIODevice::WriteOnly | QIODevice::Text)) { QMessageBox::critical(nullptr, "Error", - QString("Error creating shortcut:\n %1").arg(linkPath)); + QString("Error creating shortcut!\n %1").arg(linkPath)); return false; } diff --git a/src/qt_gui/main.cpp b/src/qt_gui/main.cpp index ea3f27f82..cff01cc2b 100644 --- a/src/qt_gui/main.cpp +++ b/src/qt_gui/main.cpp @@ -2,15 +2,14 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include "common/config.h" #include "core/file_sys/fs.h" +#include "emulator.h" #include "qt_gui/game_install_dialog.h" #include "qt_gui/main_window.h" -#include -#include - // Custom message handler to ignore Qt logs void customMessageHandler(QtMsgType, const QMessageLogContext&, const QString&) {} @@ -22,8 +21,11 @@ int main(int argc, char* argv[]) { Config::load(user_dir / "config.toml"); std::filesystem::create_directory(user_dir / "game_data"); + // Check if elf or eboot.bin path was passed as a command line argument + bool has_command_line_argument = argc > 1; + // Check if the game install directory is set - if (Config::getGameInstallDir() == "") { + if (Config::getGameInstallDir() == "" && !has_command_line_argument) { GameInstallDialog dlg; dlg.exec(); } @@ -36,7 +38,7 @@ int main(int argc, char* argv[]) { m_main_window->Init(); // Check for command line arguments - if (argc > 1) { + if (has_command_line_argument) { Core::Emulator emulator; emulator.Run(argv[1]); } diff --git a/src/qt_gui/main_window.cpp b/src/qt_gui/main_window.cpp index 3f325ff4a..aec2e7a5d 100644 --- a/src/qt_gui/main_window.cpp +++ b/src/qt_gui/main_window.cpp @@ -8,12 +8,15 @@ #include #include #include + #include "common/io_file.h" #include "common/version.h" #include "core/file_format/pkg.h" #include "core/loader.h" #include "game_install_dialog.h" #include "main_window.h" +#include "settings_dialog.h" +#include "video_core/renderer_vulkan/vk_instance.h" MainWindow::MainWindow(QWidget* parent) : QMainWindow(parent), ui(new Ui::MainWindow) { ui->setupUi(this); @@ -37,6 +40,7 @@ bool MainWindow::Init() { CreateConnects(); SetLastUsedTheme(); SetLastIconSizeBullet(); + GetPhysicalDevices(); // show ui setMinimumSize(350, minimumSizeHint().height()); setWindowTitle(QString::fromStdString("shadPS4 v" + std::string(Common::VERSION))); @@ -50,8 +54,8 @@ bool MainWindow::Init() { this->setStatusBar(statusBar.data()); // Update status bar int numGames = m_game_info->m_games.size(); - QString statusMessage = "Games: " + QString::number(numGames) + " (" + - QString::number(duration.count()) + "ms). Ready."; + QString statusMessage = + "Games: " + QString::number(numGames) + " (" + QString::number(duration.count()) + "ms)"; statusBar->showMessage(statusMessage); return true; } @@ -71,8 +75,8 @@ void MainWindow::CreateActions() { // create action group for themes m_theme_act_group = new QActionGroup(this); - m_theme_act_group->addAction(ui->setThemeLight); m_theme_act_group->addAction(ui->setThemeDark); + m_theme_act_group->addAction(ui->setThemeLight); m_theme_act_group->addAction(ui->setThemeGreen); m_theme_act_group->addAction(ui->setThemeBlue); m_theme_act_group->addAction(ui->setThemeViolet); @@ -156,6 +160,19 @@ void MainWindow::LoadGameLists() { } } +void MainWindow::GetPhysicalDevices() { + Vulkan::Instance instance(false, false); + auto physical_devices = instance.GetPhysicalDevices(); + for (const vk::PhysicalDevice physical_device : physical_devices) { + auto prop = physical_device.getProperties(); + QString name = QString::fromUtf8(prop.deviceName, -1); + if (prop.apiVersion < Vulkan::TargetVulkanApiVersion) { + name += " * Unsupported Vulkan Version"; + } + m_physical_devices.push_back(name); + } +} + void MainWindow::CreateConnects() { connect(this, &MainWindow::WindowResized, this, &MainWindow::HandleResize); connect(ui->mw_searchbar, &QLineEdit::textChanged, this, &MainWindow::SearchGameTable); @@ -178,31 +195,15 @@ void MainWindow::CreateConnects() { } }); - connect(ui->playButton, &QPushButton::clicked, this, [this]() { - QString gamePath = ""; - int table_mode = Config::getTableMode(); - if (table_mode == 0) { - if (m_game_list_frame->currentItem()) { - int itemID = m_game_list_frame->currentItem()->row(); - gamePath = QString::fromStdString(m_game_info->m_games[itemID].path + "/eboot.bin"); - } - } else if (table_mode == 1) { - if (m_game_grid_frame->cellClicked) { - int itemID = (m_game_grid_frame->crtRow * m_game_grid_frame->columnCnt) + - m_game_grid_frame->crtColumn; - gamePath = QString::fromStdString(m_game_info->m_games[itemID].path + "/eboot.bin"); - } - } else { - if (m_elf_viewer->currentItem()) { - int itemID = m_elf_viewer->currentItem()->row(); - gamePath = QString::fromStdString(m_elf_viewer->m_elf_list[itemID].toStdString()); - } - } - if (gamePath != "") { - AddRecentFiles(gamePath); - Core::Emulator emulator; - emulator.Run(gamePath.toUtf8().constData()); - } + connect(ui->playButton, &QPushButton::clicked, this, &MainWindow::StartGame); + connect(m_game_grid_frame.get(), &QTableWidget::cellDoubleClicked, this, + &MainWindow::StartGame); + connect(m_game_list_frame.get(), &QTableWidget::cellDoubleClicked, this, + &MainWindow::StartGame); + + connect(ui->settingsButton, &QPushButton::clicked, this, [this]() { + auto settingsDialog = new SettingsDialog(m_physical_devices, this); + settingsDialog->exec(); }); connect(ui->setIconSizeTinyAct, &QAction::triggered, this, [this]() { @@ -343,14 +344,6 @@ void MainWindow::CreateConnects() { }); // Themes - connect(ui->setThemeLight, &QAction::triggered, &m_window_themes, [this]() { - m_window_themes.SetWindowTheme(Theme::Light, ui->mw_searchbar); - Config::setMainWindowTheme(static_cast(Theme::Light)); - if (!isIconBlack) { - SetUiIcons(true); - isIconBlack = true; - } - }); connect(ui->setThemeDark, &QAction::triggered, &m_window_themes, [this]() { m_window_themes.SetWindowTheme(Theme::Dark, ui->mw_searchbar); Config::setMainWindowTheme(static_cast(Theme::Dark)); @@ -359,6 +352,14 @@ void MainWindow::CreateConnects() { isIconBlack = false; } }); + connect(ui->setThemeLight, &QAction::triggered, &m_window_themes, [this]() { + m_window_themes.SetWindowTheme(Theme::Light, ui->mw_searchbar); + Config::setMainWindowTheme(static_cast(Theme::Light)); + if (!isIconBlack) { + SetUiIcons(true); + isIconBlack = true; + } + }); connect(ui->setThemeGreen, &QAction::triggered, &m_window_themes, [this]() { m_window_themes.SetWindowTheme(Theme::Green, ui->mw_searchbar); Config::setMainWindowTheme(static_cast(Theme::Green)); @@ -385,6 +386,33 @@ void MainWindow::CreateConnects() { }); } +void MainWindow::StartGame() { + QString gamePath = ""; + int table_mode = Config::getTableMode(); + if (table_mode == 0) { + if (m_game_list_frame->currentItem()) { + int itemID = m_game_list_frame->currentItem()->row(); + gamePath = QString::fromStdString(m_game_info->m_games[itemID].path + "/eboot.bin"); + } + } else if (table_mode == 1) { + if (m_game_grid_frame->cellClicked) { + int itemID = (m_game_grid_frame->crtRow * m_game_grid_frame->columnCnt) + + m_game_grid_frame->crtColumn; + gamePath = QString::fromStdString(m_game_info->m_games[itemID].path + "/eboot.bin"); + } + } else { + if (m_elf_viewer->currentItem()) { + int itemID = m_elf_viewer->currentItem()->row(); + gamePath = QString::fromStdString(m_elf_viewer->m_elf_list[itemID].toStdString()); + } + } + if (gamePath != "") { + AddRecentFiles(gamePath); + Core::Emulator emulator; + emulator.Run(gamePath.toUtf8().constData()); + } +} + void MainWindow::SearchGameTable(const QString& text) { if (isTableList) { for (int row = 0; row < m_game_list_frame->rowCount(); row++) { @@ -414,7 +442,7 @@ void MainWindow::RefreshGameTable() { m_game_grid_frame->PopulateGameGrid(m_game_info->m_games, false); statusBar->clearMessage(); int numGames = m_game_info->m_games.size(); - QString statusMessage = "Games: " + QString::number(numGames) + ". Ready."; + QString statusMessage = "Games: " + QString::number(numGames); statusBar->showMessage(statusMessage); } @@ -576,6 +604,7 @@ void MainWindow::InstallDragDropPkg(std::filesystem::path file, int pkgNum, int void MainWindow::InstallDirectory() { GameInstallDialog dlg; dlg.exec(); + RefreshGameTable(); } void MainWindow::SetLastUsedTheme() { diff --git a/src/qt_gui/main_window.h b/src/qt_gui/main_window.h index 27d14b937..35fd0bf6c 100644 --- a/src/qt_gui/main_window.h +++ b/src/qt_gui/main_window.h @@ -9,13 +9,14 @@ #include #include #include -#include #include + #include "common/config.h" #include "common/path_util.h" #include "core/file_format/psf.h" #include "core/file_sys/fs.h" #include "elf_viewer.h" +#include "emulator.h" #include "game_grid_frame.h" #include "game_info.h" #include "game_list_frame.h" @@ -38,6 +39,7 @@ public: bool Init(); void InstallDragDropPkg(std::filesystem::path file, int pkgNum, int nPkg); void InstallDirectory(); + void StartGame(); private Q_SLOTS: void ConfigureGuiFromSettings(); @@ -52,6 +54,7 @@ private: void CreateActions(); void CreateRecentGameActions(); void CreateDockWindows(); + void GetPhysicalDevices(); void LoadGameLists(); void CreateConnects(); void SetLastUsedTheme(); @@ -77,6 +80,8 @@ private: QScopedPointer m_elf_viewer; // Status Bar. QScopedPointer statusBar; + // Available GPU devices + std::vector m_physical_devices; PSF psf; diff --git a/src/qt_gui/main_window_themes.cpp b/src/qt_gui/main_window_themes.cpp index 858bbb07a..c89fa5a00 100644 --- a/src/qt_gui/main_window_themes.cpp +++ b/src/qt_gui/main_window_themes.cpp @@ -7,25 +7,6 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { QPalette themePalette; switch (theme) { - case Theme::Light: - mw_searchbar->setStyleSheet("background-color: #ffffff; /* Light gray background */" - "color: #000000; /* Black text */" - "padding: 5px;"); - themePalette.setColor(QPalette::Window, QColor(240, 240, 240)); // Light gray - themePalette.setColor(QPalette::WindowText, Qt::black); // Black - themePalette.setColor(QPalette::Base, QColor(230, 230, 230, 80)); // Grayish - themePalette.setColor(QPalette::ToolTipBase, Qt::black); // Black - themePalette.setColor(QPalette::ToolTipText, Qt::black); // Black - themePalette.setColor(QPalette::Text, Qt::black); // Black - themePalette.setColor(QPalette::Button, QColor(240, 240, 240)); // Light gray - themePalette.setColor(QPalette::ButtonText, Qt::black); // Black - themePalette.setColor(QPalette::BrightText, Qt::red); // Red - themePalette.setColor(QPalette::Link, QColor(42, 130, 218)); // Blue - themePalette.setColor(QPalette::Highlight, QColor(42, 130, 218)); // Blue - themePalette.setColor(QPalette::HighlightedText, Qt::white); // White - qApp->setPalette(themePalette); - break; - case Theme::Dark: mw_searchbar->setStyleSheet("background-color: #1e1e1e; /* Dark background */" "color: #ffffff; /* White text */" @@ -48,6 +29,25 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { qApp->setPalette(themePalette); break; + case Theme::Light: + mw_searchbar->setStyleSheet("background-color: #ffffff; /* Light gray background */" + "color: #000000; /* Black text */" + "padding: 5px;"); + themePalette.setColor(QPalette::Window, QColor(240, 240, 240)); // Light gray + themePalette.setColor(QPalette::WindowText, Qt::black); // Black + themePalette.setColor(QPalette::Base, QColor(230, 230, 230, 80)); // Grayish + themePalette.setColor(QPalette::ToolTipBase, Qt::black); // Black + themePalette.setColor(QPalette::ToolTipText, Qt::black); // Black + themePalette.setColor(QPalette::Text, Qt::black); // Black + themePalette.setColor(QPalette::Button, QColor(240, 240, 240)); // Light gray + themePalette.setColor(QPalette::ButtonText, Qt::black); // Black + themePalette.setColor(QPalette::BrightText, Qt::red); // Red + themePalette.setColor(QPalette::Link, QColor(42, 130, 218)); // Blue + themePalette.setColor(QPalette::Highlight, QColor(42, 130, 218)); // Blue + themePalette.setColor(QPalette::HighlightedText, Qt::white); // White + qApp->setPalette(themePalette); + break; + case Theme::Green: mw_searchbar->setStyleSheet("background-color: #354535; /* Dark green background */" "color: #ffffff; /* White text */" diff --git a/src/qt_gui/main_window_themes.h b/src/qt_gui/main_window_themes.h index 8b87fbce5..6da70e995 100644 --- a/src/qt_gui/main_window_themes.h +++ b/src/qt_gui/main_window_themes.h @@ -2,13 +2,14 @@ // SPDX-License-Identifier: GPL-2.0-or-later #pragma once + #include #include #include enum class Theme : int { - Light, Dark, + Light, Green, Blue, Violet, diff --git a/src/qt_gui/main_window_ui.h b/src/qt_gui/main_window_ui.h index 7b5bf1816..06e5cf7fb 100644 --- a/src/qt_gui/main_window_ui.h +++ b/src/qt_gui/main_window_ui.h @@ -44,8 +44,8 @@ public: QAction* gameInstallPathAct; QAction* dumpGameListAct; QAction* pkgViewerAct; - QAction* setThemeLight; QAction* setThemeDark; + QAction* setThemeLight; QAction* setThemeGreen; QAction* setThemeBlue; QAction* setThemeViolet; @@ -76,7 +76,7 @@ public: MainWindow->setObjectName("MainWindow"); // MainWindow->resize(1280, 720); QIcon icon; - icon.addFile(QString::fromUtf8(":/images/shadps4.ico"), QSize(), QIcon::Normal, QIcon::Off); + icon.addFile(QString::fromUtf8(":images/shadps4.ico"), QSize(), QIcon::Normal, QIcon::Off); MainWindow->setWindowIcon(icon); QSizePolicy sizePolicy(QSizePolicy::Expanding, QSizePolicy::Expanding); sizePolicy.setHorizontalStretch(0); @@ -136,13 +136,13 @@ public: pkgViewerAct->setObjectName("pkgViewer"); pkgViewerAct->setObjectName("pkgViewer"); pkgViewerAct->setIcon(QIcon(":images/file_icon.png")); - setThemeLight = new QAction(MainWindow); - setThemeLight->setObjectName("setThemeLight"); - setThemeLight->setCheckable(true); - setThemeLight->setChecked(true); setThemeDark = new QAction(MainWindow); setThemeDark->setObjectName("setThemeDark"); setThemeDark->setCheckable(true); + setThemeDark->setChecked(true); + setThemeLight = new QAction(MainWindow); + setThemeLight->setObjectName("setThemeLight"); + setThemeLight->setCheckable(true); setThemeGreen = new QAction(MainWindow); setThemeGreen->setObjectName("setThemeGreen"); setThemeGreen->setCheckable(true); @@ -285,7 +285,7 @@ public: } // setupUi void retranslateUi(QMainWindow* MainWindow) { - MainWindow->setWindowTitle(QCoreApplication::translate("MainWindow", "Shadps4", nullptr)); + MainWindow->setWindowTitle(QCoreApplication::translate("MainWindow", "shadPS4", nullptr)); addElfFolderAct->setText( QCoreApplication::translate("MainWindow", "Open/Add Elf Folder", nullptr)); bootInstallPkgAct->setText( @@ -297,7 +297,7 @@ public: menuRecent->setTitle(QCoreApplication::translate("MainWindow", "Recent Games", nullptr)); exitAct->setText(QCoreApplication::translate("MainWindow", "Exit", nullptr)); #if QT_CONFIG(tooltip) - exitAct->setToolTip(QCoreApplication::translate("MainWindow", "Exit Shadps4", nullptr)); + exitAct->setToolTip(QCoreApplication::translate("MainWindow", "Exit shadPS4", nullptr)); #endif // QT_CONFIG(tooltip) #if QT_CONFIG(statustip) exitAct->setStatusTip( @@ -332,8 +332,8 @@ public: menuSettings->setTitle(QCoreApplication::translate("MainWindow", "Settings", nullptr)); menuUtils->setTitle(QCoreApplication::translate("MainWindow", "Utils", nullptr)); menuThemes->setTitle(QCoreApplication::translate("MainWindow", "Themes", nullptr)); - setThemeLight->setText(QCoreApplication::translate("MainWindow", "Light", nullptr)); setThemeDark->setText(QCoreApplication::translate("MainWindow", "Dark", nullptr)); + setThemeLight->setText(QCoreApplication::translate("MainWindow", "Light", nullptr)); setThemeGreen->setText(QCoreApplication::translate("MainWindow", "Green", nullptr)); setThemeBlue->setText(QCoreApplication::translate("MainWindow", "Blue", nullptr)); setThemeViolet->setText(QCoreApplication::translate("MainWindow", "Violet", nullptr)); diff --git a/src/qt_gui/pkg_viewer.cpp b/src/qt_gui/pkg_viewer.cpp index cf0b21679..cd2ce2b6e 100644 --- a/src/qt_gui/pkg_viewer.cpp +++ b/src/qt_gui/pkg_viewer.cpp @@ -3,6 +3,7 @@ #include #include + #include "pkg_viewer.h" PKGViewer::PKGViewer(std::shared_ptr game_info_get, QWidget* parent, diff --git a/src/qt_gui/pkg_viewer.h b/src/qt_gui/pkg_viewer.h index 0e0a87062..e040d5950 100644 --- a/src/qt_gui/pkg_viewer.h +++ b/src/qt_gui/pkg_viewer.h @@ -15,6 +15,7 @@ #include #include #include + #include "common/io_file.h" #include "core/file_format/pkg.h" #include "core/file_format/pkg_type.h" diff --git a/src/qt_gui/settings_dialog.cpp b/src/qt_gui/settings_dialog.cpp new file mode 100644 index 000000000..ca47f3310 --- /dev/null +++ b/src/qt_gui/settings_dialog.cpp @@ -0,0 +1,136 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "settings_dialog.h" +#include "ui_settings_dialog.h" + +SettingsDialog::SettingsDialog(std::span physical_devices, QWidget* parent) + : QDialog(parent), ui(new Ui::SettingsDialog) { + ui->setupUi(this); + ui->tabWidgetSettings->setUsesScrollButtons(false); + const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); + + ui->buttonBox->button(QDialogButtonBox::StandardButton::Close)->setFocus(); + + // Add list of available GPUs + ui->graphicsAdapterBox->addItem("Auto Select"); // -1, auto selection + for (const auto& device : physical_devices) { + ui->graphicsAdapterBox->addItem(device); + } + + LoadValuesFromConfig(); + + connect(ui->buttonBox, &QDialogButtonBox::rejected, this, &QWidget::close); + + connect(ui->buttonBox, &QDialogButtonBox::clicked, this, + [this, config_dir](QAbstractButton* button) { + if (button == ui->buttonBox->button(QDialogButtonBox::Save)) { + Config::save(config_dir / "config.toml"); + QWidget::close(); + } else if (button == ui->buttonBox->button(QDialogButtonBox::Apply)) { + Config::save(config_dir / "config.toml"); + } else if (button == ui->buttonBox->button(QDialogButtonBox::RestoreDefaults)) { + Config::setDefaultValues(); + LoadValuesFromConfig(); + } + }); + + connect(ui->tabWidgetSettings, &QTabWidget::currentChanged, this, [this]() { + ui->buttonBox->button(QDialogButtonBox::StandardButton::Close)->setFocus(); + }); + + // GENERAL TAB + { + connect(ui->userNameLineEdit, &QLineEdit::textChanged, this, + [](const QString& text) { Config::setUserName(text.toStdString()); }); + + connect(ui->consoleLanguageComboBox, &QComboBox::currentIndexChanged, this, + [](int index) { Config::setLanguage(index); }); + + connect(ui->fullscreenCheckBox, &QCheckBox::stateChanged, this, + [](int val) { Config::setFullscreenMode(val); }); + + connect(ui->showSplashCheckBox, &QCheckBox::stateChanged, this, + [](int val) { Config::setShowSplash(val); }); + + connect(ui->ps4proCheckBox, &QCheckBox::stateChanged, this, + [](int val) { Config::setNeoMode(val); }); + + connect(ui->logTypeComboBox, &QComboBox::currentTextChanged, this, + [](const QString& text) { Config::setLogType(text.toStdString()); }); + + connect(ui->logFilterLineEdit, &QLineEdit::textChanged, this, + [](const QString& text) { Config::setLogFilter(text.toStdString()); }); + } + + // GPU TAB + { + // First options is auto selection -1, so gpuId on the GUI will always have to subtract 1 + // when setting and add 1 when getting to select the correct gpu in Qt + connect(ui->graphicsAdapterBox, &QComboBox::currentIndexChanged, this, + [](int index) { Config::setGpuId(index - 1); }); + + connect(ui->widthSpinBox, &QSpinBox::valueChanged, this, + [](int val) { Config::setScreenWidth(val); }); + + connect(ui->heightSpinBox, &QSpinBox::valueChanged, this, + [](int val) { Config::setScreenHeight(val); }); + + connect(ui->vblankSpinBox, &QSpinBox::valueChanged, this, + [](int val) { Config::setVblankDiv(val); }); + + connect(ui->dumpShadersCheckBox, &QCheckBox::stateChanged, this, + [](int val) { Config::setDumpShaders(val); }); + + connect(ui->nullGpuCheckBox, &QCheckBox::stateChanged, this, + [](int val) { Config::setNullGpu(val); }); + + connect(ui->dumpPM4CheckBox, &QCheckBox::stateChanged, this, + [](int val) { Config::setDumpPM4(val); }); + } + + // DEBUG TAB + { + connect(ui->debugDump, &QCheckBox::stateChanged, this, + [](int val) { Config::setDebugDump(val); }); + + connect(ui->vkValidationCheckBox, &QCheckBox::stateChanged, this, + [](int val) { Config::setVkValidation(val); }); + + connect(ui->vkSyncValidationCheckBox, &QCheckBox::stateChanged, this, + [](int val) { Config::setVkSyncValidation(val); }); + + connect(ui->rdocCheckBox, &QCheckBox::stateChanged, this, + [](int val) { Config::setRdocEnabled(val); }); + } +} + +void SettingsDialog::LoadValuesFromConfig() { + ui->consoleLanguageComboBox->setCurrentIndex(Config::GetLanguage()); + + ui->graphicsAdapterBox->setCurrentIndex(Config::getGpuId() + 1); + ui->widthSpinBox->setValue(Config::getScreenWidth()); + ui->heightSpinBox->setValue(Config::getScreenHeight()); + ui->vblankSpinBox->setValue(Config::vblankDiv()); + ui->dumpShadersCheckBox->setChecked(Config::dumpShaders()); + ui->nullGpuCheckBox->setChecked(Config::nullGpu()); + ui->dumpPM4CheckBox->setChecked(Config::dumpPM4()); + + ui->fullscreenCheckBox->setChecked(Config::isFullscreenMode()); + ui->showSplashCheckBox->setChecked(Config::showSplash()); + ui->ps4proCheckBox->setChecked(Config::isNeoMode()); + ui->logTypeComboBox->setCurrentText(QString::fromStdString(Config::getLogType())); + ui->logFilterLineEdit->setText(QString::fromStdString(Config::getLogFilter())); + ui->userNameLineEdit->setText(QString::fromStdString(Config::getUserName())); + + ui->debugDump->setChecked(Config::debugDump()); + ui->vkValidationCheckBox->setChecked(Config::vkValidationEnabled()); + ui->vkSyncValidationCheckBox->setChecked(Config::vkValidationSyncEnabled()); + ui->rdocCheckBox->setChecked(Config::isRdocEnabled()); +} + +int SettingsDialog::exec() { + return QDialog::exec(); +} + +SettingsDialog::~SettingsDialog() {} \ No newline at end of file diff --git a/src/qt_gui/settings_dialog.h b/src/qt_gui/settings_dialog.h new file mode 100644 index 000000000..7d8701093 --- /dev/null +++ b/src/qt_gui/settings_dialog.h @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include + +#include "common/config.h" +#include "common/path_util.h" + +namespace Ui { +class SettingsDialog; +} + +class SettingsDialog : public QDialog { + Q_OBJECT +public: + explicit SettingsDialog(std::span physical_devices, QWidget* parent = nullptr); + ~SettingsDialog(); + + int exec() override; + +private: + void LoadValuesFromConfig(); + + std::unique_ptr ui; +}; diff --git a/src/qt_gui/settings_dialog.ui b/src/qt_gui/settings_dialog.ui new file mode 100644 index 000000000..3302f9e62 --- /dev/null +++ b/src/qt_gui/settings_dialog.ui @@ -0,0 +1,902 @@ + + + + SettingsDialog + + + Qt::WindowModality::WindowModal + + + + 0 + 0 + 1024 + 768 + + + + + 0 + 0 + + + + Settings + + + + :/images/shadps4.ico:/images/shadps4.ico + + + + + + QFrame::Shape::NoFrame + + + true + + + + true + + + + 0 + 0 + 1002 + 710 + + + + + 0 + 0 + + + + 0 + + + + General + + + + + + + + + + Emulator Settings + + + + + + 6 + + + 0 + + + + + + + Username + + + + + + + + + + + + + + + + Console Language + + + + + + + Japanese + + + + + English (United States) + + + + + French (France) + + + + + Spanish (Spain) + + + + + German + + + + + Italian + + + + + Dutch + + + + + Portuguese (Portugal) + + + + + Russian + + + + + Korean + + + + + Traditional Chinese + + + + + Simplified Chinese + + + + + Finnish + + + + + Swedish + + + + + Danish + + + + + Norwegian + + + + + Polish + + + + + Portuguese (Brazil) + + + + + English (United Kingdom) + + + + + Turkish + + + + + Spanish (Latin America) + + + + + Arabic + + + + + French (Canada) + + + + + Czech + + + + + Hungarian + + + + + Greek + + + + + Romanian + + + + + Thai + + + + + Vietnamese + + + + + Indonesian + + + + + + + + + + + Enable Fullscreen + + + + + + + Show Splash + + + + + + + Is PS4 Pro + + + + + + + Qt::Orientation::Vertical + + + QSizePolicy::Policy::MinimumExpanding + + + + 0 + 0 + + + + + + + + + + + + + + + Logger Settings + + + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + Log Type + + + + + + + async + + + + + sync + + + + + + + + + + + + + + 6 + + + 0 + + + + + + + Log Filter + + + + + + + + + + + + + + + + + + + + + + + Additional Settings + + + + + + Qt::Orientation::Vertical + + + QSizePolicy::Policy::MinimumExpanding + + + + 0 + 0 + + + + + + + + + + + + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + + + + Qt::Orientation::Vertical + + + QSizePolicy::Policy::MinimumExpanding + + + + 0 + 0 + + + + + + + + + + + 12 + + + 12 + + + + + + + Qt::Orientation::Vertical + + + QSizePolicy::Policy::MinimumExpanding + + + + 0 + 0 + + + + + + + + + GPU + + + + + + + + + + Graphics Device + + + + + + + + + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + + + + Qt::Orientation::Vertical + + + QSizePolicy::Policy::MinimumExpanding + + + + 0 + 0 + + + + + + + + + + + + 6 + + + 0 + + + + + + + Width + + + + + + true + + + QAbstractSpinBox::CorrectionMode::CorrectToNearestValue + + + false + + + 0 + + + 9999 + + + 1280 + + + + + + + + + + Height + + + + + + true + + + true + + + QAbstractSpinBox::CorrectionMode::CorrectToNearestValue + + + false + + + 0 + + + 9999 + + + 720 + + + + + + + + + + + + + + 6 + + + 0 + + + + + + + Vblank Divider + + + + + + true + + + true + + + QAbstractSpinBox::CorrectionMode::CorrectToNearestValue + + + false + + + 1 + + + 9999 + + + 1 + + + + + + + + + + + + + + Qt::Orientation::Vertical + + + QSizePolicy::Policy::MinimumExpanding + + + + 0 + 0 + + + + + + + + + + 12 + + + 12 + + + + + Additional Settings + + + Qt::AlignmentFlag::AlignLeading|Qt::AlignmentFlag::AlignLeft|Qt::AlignmentFlag::AlignVCenter + + + + + + Enable Shaders Dumping + + + + + + + Enable NULL GPU + + + + + + + Enable PM4 Dumping + + + + + + + + + + Qt::Orientation::Vertical + + + QSizePolicy::Policy::MinimumExpanding + + + + 0 + 0 + + + + + + + + + + + + Qt::Orientation::Vertical + + + QSizePolicy::Policy::MinimumExpanding + + + + 0 + 0 + + + + + + + + + Debug + + + + + + + + true + + + General + + + Qt::AlignmentFlag::AlignLeading|Qt::AlignmentFlag::AlignLeft|Qt::AlignmentFlag::AlignTop + + + + + + Enable Debug Dumping + + + + + + + Qt::Orientation::Vertical + + + QSizePolicy::Policy::MinimumExpanding + + + + 0 + 0 + + + + + + + + Enable Vulkan Validation Layers + + + + + + + Enable Vulkan Synchronization Validation + + + + + + + Enable RenderDoc Debugging + + + + + + + + + + + + Qt::Orientation::Vertical + + + QSizePolicy::Policy::MinimumExpanding + + + + 0 + 0 + + + + + + + + + + + + + QDialogButtonBox::StandardButton::Apply|QDialogButtonBox::StandardButton::Close|QDialogButtonBox::StandardButton::RestoreDefaults|QDialogButtonBox::StandardButton::Save + + + + + + + + diff --git a/src/qt_gui/trophy_viewer.h b/src/qt_gui/trophy_viewer.h index ab79ac501..2b794593f 100644 --- a/src/qt_gui/trophy_viewer.h +++ b/src/qt_gui/trophy_viewer.h @@ -16,6 +16,7 @@ #include #include #include + #include "common/types.h" #include "core/file_format/trp.h" diff --git a/src/sdl_window.cpp b/src/sdl_window.cpp index c4fcbcfae..9fd596699 100644 --- a/src/sdl_window.cpp +++ b/src/sdl_window.cpp @@ -11,6 +11,7 @@ #include "core/libraries/pad/pad.h" #include "input/controller.h" #include "sdl_window.h" +#include "video_core/renderdoc.h" #ifdef __APPLE__ #include @@ -18,16 +19,17 @@ namespace Frontend { -WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_) +WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_, + std::string_view window_title) : width{width_}, height{height_}, controller{controller_} { if (SDL_Init(SDL_INIT_VIDEO) < 0) { UNREACHABLE_MSG("Failed to initialize SDL video subsystem: {}", SDL_GetError()); } SDL_InitSubSystem(SDL_INIT_AUDIO); - const std::string title = "shadPS4 v" + std::string(Common::VERSION); SDL_PropertiesID props = SDL_CreateProperties(); - SDL_SetStringProperty(props, SDL_PROP_WINDOW_CREATE_TITLE_STRING, title.c_str()); + SDL_SetStringProperty(props, SDL_PROP_WINDOW_CREATE_TITLE_STRING, + std::string(window_title).c_str()); SDL_SetNumberProperty(props, SDL_PROP_WINDOW_CREATE_X_NUMBER, SDL_WINDOWPOS_CENTERED); SDL_SetNumberProperty(props, SDL_PROP_WINDOW_CREATE_Y_NUMBER, SDL_WINDOWPOS_CENTERED); SDL_SetNumberProperty(props, SDL_PROP_WINDOW_CREATE_WIDTH_NUMBER, width); @@ -41,23 +43,26 @@ WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_ SDL_SetWindowFullscreen(window, Config::isFullscreenMode()); + SDL_InitSubSystem(SDL_INIT_GAMEPAD); + controller->TryOpenSDLController(); + #if defined(SDL_PLATFORM_WIN32) window_info.type = WindowSystemType::Windows; - window_info.render_surface = - SDL_GetProperty(SDL_GetWindowProperties(window), SDL_PROP_WINDOW_WIN32_HWND_POINTER, NULL); + window_info.render_surface = SDL_GetPointerProperty(SDL_GetWindowProperties(window), + SDL_PROP_WINDOW_WIN32_HWND_POINTER, NULL); #elif defined(SDL_PLATFORM_LINUX) if (SDL_strcmp(SDL_GetCurrentVideoDriver(), "x11") == 0) { window_info.type = WindowSystemType::X11; - window_info.display_connection = SDL_GetProperty(SDL_GetWindowProperties(window), - SDL_PROP_WINDOW_X11_DISPLAY_POINTER, NULL); + window_info.display_connection = SDL_GetPointerProperty( + SDL_GetWindowProperties(window), SDL_PROP_WINDOW_X11_DISPLAY_POINTER, NULL); window_info.render_surface = (void*)SDL_GetNumberProperty( SDL_GetWindowProperties(window), SDL_PROP_WINDOW_X11_WINDOW_NUMBER, 0); } else if (SDL_strcmp(SDL_GetCurrentVideoDriver(), "wayland") == 0) { window_info.type = WindowSystemType::Wayland; - window_info.display_connection = SDL_GetProperty( + window_info.display_connection = SDL_GetPointerProperty( SDL_GetWindowProperties(window), SDL_PROP_WINDOW_WAYLAND_DISPLAY_POINTER, NULL); - window_info.render_surface = SDL_GetProperty(SDL_GetWindowProperties(window), - SDL_PROP_WINDOW_WAYLAND_SURFACE_POINTER, NULL); + window_info.render_surface = SDL_GetPointerProperty( + SDL_GetWindowProperties(window), SDL_PROP_WINDOW_WAYLAND_SURFACE_POINTER, NULL); } #elif defined(SDL_PLATFORM_MACOS) window_info.type = WindowSystemType::Metal; @@ -71,7 +76,7 @@ void WindowSDL::waitEvent() { // Called on main thread SDL_Event event; - if (!SDL_PollEvent(&event)) { + if (!SDL_WaitEvent(&event)) { return; } @@ -90,6 +95,11 @@ void WindowSDL::waitEvent() { case SDL_EVENT_KEY_UP: onKeyPress(&event); break; + case SDL_EVENT_GAMEPAD_BUTTON_DOWN: + case SDL_EVENT_GAMEPAD_BUTTON_UP: + case SDL_EVENT_GAMEPAD_AXIS_MOTION: + onGamepadEvent(&event); + break; case SDL_EVENT_QUIT: is_open = false; break; @@ -179,6 +189,11 @@ void WindowSDL::onKeyPress(const SDL_Event* event) { ax = Input::GetAxis(-0x80, 0x80, axisvalue); break; case SDLK_S: + if (event->key.mod == SDL_KMOD_LCTRL) { + // Trigger rdoc capture + VideoCore::TriggerCapture(); + break; + } axis = Input::Axis::LeftY; if (event->type == SDL_EVENT_KEY_DOWN) { axisvalue += 127; @@ -269,4 +284,71 @@ void WindowSDL::onKeyPress(const SDL_Event* event) { } } +void WindowSDL::onGamepadEvent(const SDL_Event* event) { + using Libraries::Pad::OrbisPadButtonDataOffset; + + u32 button = 0; + Input::Axis axis = Input::Axis::AxisMax; + switch (event->type) { + case SDL_EVENT_GAMEPAD_BUTTON_DOWN: + case SDL_EVENT_GAMEPAD_BUTTON_UP: + button = sdlGamepadToOrbisButton(event->gbutton.button); + if (button != 0) { + controller->CheckButton(0, button, event->type == SDL_EVENT_GAMEPAD_BUTTON_DOWN); + } + break; + case SDL_EVENT_GAMEPAD_AXIS_MOTION: + axis = event->gaxis.axis == SDL_GAMEPAD_AXIS_LEFTX ? Input::Axis::LeftX + : event->gaxis.axis == SDL_GAMEPAD_AXIS_LEFTY ? Input::Axis::LeftY + : event->gaxis.axis == SDL_GAMEPAD_AXIS_RIGHTX ? Input::Axis::RightX + : event->gaxis.axis == SDL_GAMEPAD_AXIS_RIGHTY ? Input::Axis::RightY + : event->gaxis.axis == SDL_GAMEPAD_AXIS_LEFT_TRIGGER ? Input::Axis::TriggerLeft + : event->gaxis.axis == SDL_GAMEPAD_AXIS_RIGHT_TRIGGER ? Input::Axis::TriggerRight + : Input::Axis::AxisMax; + if (axis != Input::Axis::AxisMax) { + controller->Axis(0, axis, Input::GetAxis(-0x8000, 0x8000, event->gaxis.value)); + } + break; + } +} + +int WindowSDL::sdlGamepadToOrbisButton(u8 button) { + using Libraries::Pad::OrbisPadButtonDataOffset; + + switch (button) { + case SDL_GAMEPAD_BUTTON_DPAD_DOWN: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_DOWN; + case SDL_GAMEPAD_BUTTON_DPAD_UP: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_UP; + case SDL_GAMEPAD_BUTTON_DPAD_LEFT: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_LEFT; + case SDL_GAMEPAD_BUTTON_DPAD_RIGHT: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_RIGHT; + case SDL_GAMEPAD_BUTTON_SOUTH: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_CROSS; + case SDL_GAMEPAD_BUTTON_NORTH: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_TRIANGLE; + case SDL_GAMEPAD_BUTTON_WEST: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_SQUARE; + case SDL_GAMEPAD_BUTTON_EAST: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_CIRCLE; + case SDL_GAMEPAD_BUTTON_START: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_OPTIONS; + case SDL_GAMEPAD_BUTTON_TOUCHPAD: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_TOUCH_PAD; + case SDL_GAMEPAD_BUTTON_BACK: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_TOUCH_PAD; + case SDL_GAMEPAD_BUTTON_LEFT_SHOULDER: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_L1; + case SDL_GAMEPAD_BUTTON_RIGHT_SHOULDER: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_R1; + case SDL_GAMEPAD_BUTTON_LEFT_STICK: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_L3; + case SDL_GAMEPAD_BUTTON_RIGHT_STICK: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_R3; + default: + return 0; + } +} + } // namespace Frontend diff --git a/src/sdl_window.h b/src/sdl_window.h index 6e14fbd0e..cf6c37116 100644 --- a/src/sdl_window.h +++ b/src/sdl_window.h @@ -3,9 +3,11 @@ #pragma once +#include #include "common/types.h" struct SDL_Window; +struct SDL_Gamepad; union SDL_Event; namespace Input { @@ -40,7 +42,8 @@ struct WindowSystemInfo { class WindowSDL { public: - explicit WindowSDL(s32 width, s32 height, Input::GameController* controller); + explicit WindowSDL(s32 width, s32 height, Input::GameController* controller, + std::string_view window_title); ~WindowSDL(); s32 getWidth() const { @@ -64,6 +67,9 @@ public: private: void onResize(); void onKeyPress(const SDL_Event* event); + void onGamepadEvent(const SDL_Event* event); + + int sdlGamepadToOrbisButton(u8 button); private: s32 width; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 561014a33..c70427635 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -183,6 +183,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { ctx.AddCapability(spv::Capability::Float16); ctx.AddCapability(spv::Capability::Int16); } + ctx.AddCapability(spv::Capability::Int64); if (info.has_storage_images) { ctx.AddCapability(spv::Capability::StorageImageExtendedFormats); } @@ -204,8 +205,8 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { } else { ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); } + ctx.AddCapability(spv::Capability::GroupNonUniform); if (info.uses_group_quad) { - ctx.AddCapability(spv::Capability::GroupNonUniform); ctx.AddCapability(spv::Capability::GroupNonUniformQuad); } if (info.has_discard) { @@ -217,9 +218,9 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { if (info.has_image_query) { ctx.AddCapability(spv::Capability::ImageQuery); } - // if (program.info.stores_frag_depth) { - // ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); - // } + if (info.stores.Get(IR::Attribute::Depth)) { + ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); + } break; default: throw NotImplementedException("Stage {}", u32(program.info.stage)); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index da29f3927..03a0a00f0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -6,8 +6,8 @@ namespace Shader::Backend::SPIRV { -void EmitBitCastU16F16(EmitContext&) { - UNREACHABLE_MSG("SPIR-V Instruction"); +Id EmitBitCastU16F16(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U16, value); } Id EmitBitCastU32F32(EmitContext& ctx, Id value) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 87ffa150f..bbf259fe8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -21,6 +21,7 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) { case VsOutput::ClipDist7: { const u32 index = u32(output) - u32(VsOutput::ClipDist0); const Id clip_num{ctx.ConstU32(index)}; + ASSERT_MSG(Sirit::ValidId(ctx.clip_distances), "Clip distance used but not defined"); return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, clip_num); } case VsOutput::CullDist0: @@ -33,6 +34,7 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) { case VsOutput::CullDist7: { const u32 index = u32(output) - u32(VsOutput::CullDist0); const Id cull_num{ctx.ConstU32(index)}; + ASSERT_MSG(Sirit::ValidId(ctx.cull_distances), "Cull distance used but not defined"); return ctx.OpAccessChain(ctx.output_f32, ctx.cull_distances, cull_num); } default: @@ -120,11 +122,13 @@ void EmitGetGotoVariable(EmitContext&) { } Id EmitReadConst(EmitContext& ctx) { + return ctx.u32_zero_value; UNREACHABLE_MSG("Unreachable instruction"); } Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) { - const auto& buffer = ctx.buffers[handle]; + auto& buffer = ctx.buffers[handle]; + index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords); const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)}; return ctx.OpLoad(buffer.data_types->Get(1), ptr); } @@ -136,7 +140,7 @@ Id EmitReadConstBufferU32(EmitContext& ctx, u32 handle, Id index) { Id EmitReadStepRate(EmitContext& ctx, int rate_idx) { return ctx.OpLoad( ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]), - ctx.instance_step_rates, + ctx.push_data_block, rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value)); } @@ -149,6 +153,9 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) { // Attribute is disabled or varying component is not written return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f); } + if (param.is_default) { + return ctx.OpCompositeExtract(param.component_type, param.id, comp); + } if (param.num_components > 1) { const Id pointer{ @@ -207,8 +214,12 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) { } void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) { + if (attr == IR::Attribute::Position1) { + LOG_WARNING(Render_Vulkan, "Ignoring pos1 export"); + return; + } const Id pointer{OutputAttrPointer(ctx, attr, element)}; - ctx.OpStore(pointer, value); + ctx.OpStore(pointer, ctx.OpBitcast(ctx.F32[1], value)); } Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { @@ -217,7 +228,8 @@ Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { template static Id EmitLoadBufferF32xN(EmitContext& ctx, u32 handle, Id address) { - const auto& buffer = ctx.buffers[handle]; + auto& buffer = ctx.buffers[handle]; + address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); if constexpr (N == 1) { const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)}; @@ -310,7 +322,7 @@ static Id ComponentOffset(EmitContext& ctx, Id address, u32 stride, u32 bit_offs } static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 comp) { - const auto& buffer = ctx.buffers[handle]; + auto& buffer = ctx.buffers[handle]; const auto format = buffer.buffer.GetDataFmt(); switch (format) { case AmdGpu::DataFormat::FormatInvalid: @@ -371,19 +383,12 @@ static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 com if (is_signed) { value = ctx.OpBitFieldSExtract(ctx.S32[1], value, comp_offset, ctx.ConstU32(bit_width)); - value = ctx.OpConvertSToF(ctx.F32[1], value); } else { value = ctx.OpBitFieldUExtract(ctx.U32[1], value, comp_offset, ctx.ConstU32(bit_width)); - value = ctx.OpConvertUToF(ctx.F32[1], value); - } - } else { - if (is_signed) { - value = ctx.OpConvertSToF(ctx.F32[1], value); - } else { - value = ctx.OpConvertUToF(ctx.F32[1], value); } } + value = ctx.OpBitcast(ctx.F32[1], value); return ConvertValue(ctx, value, num_format, bit_width); } break; @@ -395,6 +400,8 @@ static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 com template static Id EmitLoadBufferFormatF32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + auto& buffer = ctx.buffers[handle]; + address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); if constexpr (N == 1) { return GetBufferFormatValue(ctx, handle, address, 0); } else { @@ -424,7 +431,8 @@ Id EmitLoadBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id ad template static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id value) { - const auto& buffer = ctx.buffers[handle]; + auto& buffer = ctx.buffers[handle]; + address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); if constexpr (N == 1) { const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)}; @@ -459,4 +467,96 @@ void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address EmitStoreBufferF32xN<1>(ctx, handle, address, value); } +static Id ConvertF32ToFormat(EmitContext& ctx, Id value, AmdGpu::NumberFormat format, + u32 bit_width) { + switch (format) { + case AmdGpu::NumberFormat::Unorm: + return ctx.OpConvertFToU( + ctx.U32[1], ctx.OpFMul(ctx.F32[1], value, ctx.ConstF32(float(UXBitsMax(bit_width))))); + case AmdGpu::NumberFormat::Uint: + return ctx.OpBitcast(ctx.U32[1], value); + case AmdGpu::NumberFormat::Float: + return value; + default: + UNREACHABLE_MSG("Unsupported number fromat for conversion: {}", + magic_enum::enum_name(format)); + } +} + +template +static void EmitStoreBufferFormatF32xN(EmitContext& ctx, u32 handle, Id address, Id value) { + auto& buffer = ctx.buffers[handle]; + const auto format = buffer.buffer.GetDataFmt(); + const auto num_format = buffer.buffer.GetNumberFmt(); + + switch (format) { + case AmdGpu::DataFormat::FormatInvalid: + return; + case AmdGpu::DataFormat::Format8_8_8_8: + case AmdGpu::DataFormat::Format16: + case AmdGpu::DataFormat::Format32: + case AmdGpu::DataFormat::Format32_32_32_32: { + ASSERT(N == AmdGpu::NumComponents(format)); + + address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); + const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); + const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index); + + Id packed_value{}; + for (u32 i = 0; i < N; i++) { + const u32 bit_width = AmdGpu::ComponentBits(format, i); + const u32 bit_offset = AmdGpu::ComponentOffset(format, i) % 32; + + const Id comp{ConvertF32ToFormat( + ctx, N == 1 ? value : ctx.OpCompositeExtract(ctx.F32[1], value, i), num_format, + bit_width)}; + + if (bit_width == 32) { + if constexpr (N == 1) { + ctx.OpStore(ptr, comp); + } else { + const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); + const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, + ctx.u32_zero_value, index_i); + ctx.OpStore(ptr, comp); + } + } else { + if (i == 0) { + packed_value = comp; + } else { + packed_value = + ctx.OpBitFieldInsert(ctx.U32[1], packed_value, comp, + ctx.ConstU32(bit_offset), ctx.ConstU32(bit_width)); + } + + if (i == N - 1) { + ctx.OpStore(ptr, packed_value); + } + } + } + } break; + default: + UNREACHABLE_MSG("Invalid format for conversion: {}", magic_enum::enum_name(format)); + } +} + +void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + EmitStoreBufferFormatF32xN<1>(ctx, handle, address, value); +} + +void EmitStoreBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, + Id value) { + EmitStoreBufferFormatF32xN<2>(ctx, handle, address, value); +} + +void EmitStoreBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, + Id value) { + EmitStoreBufferFormatF32xN<3>(ctx, handle, address, value); +} + +void EmitStoreBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, + Id value) { + EmitStoreBufferFormatF32xN<4>(ctx, handle, address, value); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp index ede592e0d..945fa6877 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -259,4 +259,8 @@ Id EmitConvertU16U32(EmitContext& ctx, Id value) { return ctx.OpUConvert(ctx.U16, value); } +Id EmitConvertU32U16(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], value); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 911983a41..e822eabef 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -385,4 +385,8 @@ Id EmitFPIsInf64(EmitContext& ctx, Id value) { return ctx.OpIsInf(ctx.U1[1], value); } +void EmitFPCmpClass32(EmitContext&) { + UNREACHABLE(); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 17def57ab..5526e5411 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -17,87 +17,133 @@ struct ImageOperands { operands.push_back(value); } + void AddOffset(EmitContext& ctx, const IR::Value& offset, + bool can_use_runtime_offsets = false) { + if (offset.IsEmpty()) { + return; + } + if (offset.IsImmediate()) { + const s32 operand = offset.U32(); + Add(spv::ImageOperandsMask::ConstOffset, ctx.ConstS32(operand)); + return; + } + IR::Inst* const inst{offset.InstRecursive()}; + if (inst->AreAllArgsImmediates()) { + switch (inst->GetOpcode()) { + case IR::Opcode::CompositeConstructU32x2: + Add(spv::ImageOperandsMask::ConstOffset, + ctx.ConstS32(static_cast(inst->Arg(0).U32()), + static_cast(inst->Arg(1).U32()))); + return; + case IR::Opcode::CompositeConstructU32x3: + Add(spv::ImageOperandsMask::ConstOffset, + ctx.ConstS32(static_cast(inst->Arg(0).U32()), + static_cast(inst->Arg(1).U32()), + static_cast(inst->Arg(2).U32()))); + return; + default: + break; + } + } + if (can_use_runtime_offsets) { + Add(spv::ImageOperandsMask::Offset, ctx.Def(offset)); + } else { + LOG_WARNING(Render_Vulkan, + "Runtime offset provided to unsupported image sample instruction"); + } + } + spv::ImageOperandsMask mask{}; boost::container::static_vector operands; }; -Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, - Id offset) { +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias, + const IR::Value& offset) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); ImageOperands operands; - operands.Add(spv::ImageOperandsMask::Offset, offset); + operands.Add(spv::ImageOperandsMask::Bias, bias); + operands.AddOffset(ctx, offset); return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords, operands.mask, operands.operands); } -Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, - Id offset) { +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, + const IR::Value& offset) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); - return ctx.OpImageSampleExplicitLod(ctx.F32[4], sampled_image, coords, - spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f)); + ImageOperands operands; + operands.Add(spv::ImageOperandsMask::Lod, lod); + operands.AddOffset(ctx, offset); + return ctx.OpImageSampleExplicitLod(ctx.F32[4], sampled_image, coords, operands.mask, + operands.operands); } Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, - Id bias_lc, const IR::Value& offset) { + Id bias, const IR::Value& offset) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); - return ctx.OpImageSampleDrefImplicitLod(ctx.F32[1], sampled_image, coords, dref); + ImageOperands operands; + operands.Add(spv::ImageOperandsMask::Bias, bias); + operands.AddOffset(ctx, offset); + return ctx.OpImageSampleDrefImplicitLod(ctx.F32[1], sampled_image, coords, dref, operands.mask, + operands.operands); } Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, - Id bias_lc, Id offset) { + Id lod, const IR::Value& offset) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); - return ctx.OpImageSampleDrefExplicitLod(ctx.F32[1], sampled_image, coords, dref, - spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f)); + ImageOperands operands; + operands.AddOffset(ctx, offset); + operands.Add(spv::ImageOperandsMask::Lod, lod); + return ctx.OpImageSampleDrefExplicitLod(ctx.F32[1], sampled_image, coords, dref, operands.mask, + operands.operands); } -Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id offset2) { +Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, + const IR::Value& offset) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); const u32 comp = inst->Flags().gather_comp.Value(); ImageOperands operands; - operands.Add(spv::ImageOperandsMask::Offset, offset); - operands.Add(spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f)); + operands.AddOffset(ctx, offset); return ctx.OpImageGather(ctx.F32[4], sampled_image, coords, ctx.ConstU32(comp), operands.mask, operands.operands); } -Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, - Id offset2, Id dref) { +Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, + const IR::Value& offset, Id dref) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); ImageOperands operands; - operands.Add(spv::ImageOperandsMask::Offset, offset); + operands.AddOffset(ctx, offset); return ctx.OpImageDrefGather(ctx.F32[4], sampled_image, coords, dref, operands.mask, operands.operands); } -Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod, - Id ms) { +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset, + Id lod, Id ms) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id result_type = texture.data_types->Get(4); - if (Sirit::ValidId(lod)) { - return ctx.OpBitcast(ctx.F32[4], ctx.OpImageFetch(result_type, image, coords, - spv::ImageOperandsMask::Lod, lod)); - } else { - return ctx.OpBitcast(ctx.F32[4], ctx.OpImageFetch(result_type, image, coords)); - } + ImageOperands operands; + operands.AddOffset(ctx, offset); + operands.Add(spv::ImageOperandsMask::Lod, lod); + return ctx.OpBitcast( + ctx.F32[4], ctx.OpImageFetch(result_type, image, coords, operands.mask, operands.operands)); } Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips) { @@ -106,8 +152,7 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod const auto type = ctx.info.images[handle & 0xFFFF].type; const Id zero = ctx.u32_zero_value; const auto mips{[&] { return skip_mips ? zero : ctx.OpImageQueryLevels(ctx.U32[1], image); }}; - const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa && - type != AmdGpu::ImageType::Buffer}; + const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa}; const auto query{[&](Id type) { return uses_lod ? ctx.OpImageQuerySizeLod(type, image, lod) : ctx.OpImageQuerySize(type, image); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index e2b411e47..8a0fcd4b8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -42,6 +42,7 @@ void EmitSetVcc(EmitContext& ctx); void EmitSetSccLo(EmitContext& ctx); void EmitSetVccLo(EmitContext& ctx); void EmitSetVccHi(EmitContext& ctx); +void EmitFPCmpClass32(EmitContext& ctx); void EmitPrologue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx); void EmitDiscard(EmitContext& ctx); @@ -75,6 +76,10 @@ void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp); Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); @@ -92,15 +97,9 @@ Id EmitUndefU8(EmitContext& ctx); Id EmitUndefU16(EmitContext& ctx); Id EmitUndefU32(EmitContext& ctx); Id EmitUndefU64(EmitContext& ctx); -Id EmitLoadSharedU8(EmitContext& ctx, Id offset); -Id EmitLoadSharedS8(EmitContext& ctx, Id offset); -Id EmitLoadSharedU16(EmitContext& ctx, Id offset); -Id EmitLoadSharedS16(EmitContext& ctx, Id offset); Id EmitLoadSharedU32(EmitContext& ctx, Id offset); Id EmitLoadSharedU64(EmitContext& ctx, Id offset); Id EmitLoadSharedU128(EmitContext& ctx, Id offset); -void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); -void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); @@ -148,7 +147,7 @@ Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); -void EmitBitCastU16F16(EmitContext& ctx); +Id EmitBitCastU16F16(EmitContext& ctx, Id value); Id EmitBitCastU32F32(EmitContext& ctx, Id value); void EmitBitCastU64F64(EmitContext& ctx); Id EmitBitCastF16U16(EmitContext& ctx, Id value); @@ -258,6 +257,7 @@ Id EmitISub64(EmitContext& ctx, Id a, Id b); Id EmitSMulExt(EmitContext& ctx, Id a, Id b); Id EmitUMulExt(EmitContext& ctx, Id a, Id b); Id EmitIMul32(EmitContext& ctx, Id a, Id b); +Id EmitIMul64(EmitContext& ctx, Id a, Id b); Id EmitSDiv32(EmitContext& ctx, Id a, Id b); Id EmitUDiv32(EmitContext& ctx, Id a, Id b); Id EmitINeg32(EmitContext& ctx, Id value); @@ -271,6 +271,7 @@ Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift); Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift); Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseOr64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); @@ -280,14 +281,17 @@ Id EmitBitCount32(EmitContext& ctx, Id value); Id EmitBitwiseNot32(EmitContext& ctx, Id value); Id EmitFindSMsb32(EmitContext& ctx, Id value); Id EmitFindUMsb32(EmitContext& ctx, Id value); +Id EmitFindILsb32(EmitContext& ctx, Id value); Id EmitSMin32(EmitContext& ctx, Id a, Id b); Id EmitUMin32(EmitContext& ctx, Id a, Id b); Id EmitSMax32(EmitContext& ctx, Id a, Id b); Id EmitUMax32(EmitContext& ctx, Id a, Id b); Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); -Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs); Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); @@ -349,20 +353,22 @@ Id EmitConvertF64U16(EmitContext& ctx, Id value); Id EmitConvertF64U32(EmitContext& ctx, Id value); Id EmitConvertF64U64(EmitContext& ctx, Id value); Id EmitConvertU16U32(EmitContext& ctx, Id value); +Id EmitConvertU32U16(EmitContext& ctx, Id value); -Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, - Id offset); -Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, - Id offset); +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias, + const IR::Value& offset); +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, + const IR::Value& offset); Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, - Id bias_lc, const IR::Value& offset); + Id bias, const IR::Value& offset); Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, - Id bias_lc, Id offset); -Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id offset2); -Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, - Id offset2, Id dref); -Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod, - Id ms); + Id lod, const IR::Value& offset); +Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, + const IR::Value& offset); +Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, + const IR::Value& offset, Id dref); +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset, + Id lod, Id ms); Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips); Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords); Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, @@ -383,6 +389,7 @@ Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitLaneId(EmitContext& ctx); +Id EmitWarpId(EmitContext& ctx); Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index d5a0f2767..f20c4fac2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -84,6 +84,10 @@ Id EmitIMul32(EmitContext& ctx, Id a, Id b) { return ctx.OpIMul(ctx.U32[1], a, b); } +Id EmitIMul64(EmitContext& ctx, Id a, Id b) { + return ctx.OpIMul(ctx.U64, a, b); +} + Id EmitSDiv32(EmitContext& ctx, Id a, Id b) { return ctx.OpSDiv(ctx.U32[1], a, b); } @@ -142,6 +146,13 @@ Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return result; } +Id EmitBitwiseOr64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + const Id result{ctx.OpBitwiseOr(ctx.U64, a, b)}; + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; +} + Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { const Id result{ctx.OpBitwiseXor(ctx.U32[1], a, b)}; SetZeroFlag(ctx, inst, result); @@ -187,6 +198,10 @@ Id EmitFindUMsb32(EmitContext& ctx, Id value) { return ctx.OpFindUMsb(ctx.U32[1], value); } +Id EmitFindILsb32(EmitContext& ctx, Id value) { + return ctx.OpFindILsb(ctx.U32[1], value); +} + Id EmitSMin32(EmitContext& ctx, Id a, Id b) { return ctx.OpSMin(ctx.U32[1], a, b); } @@ -231,11 +246,19 @@ Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) { return result; } -Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSLessThan32(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSLessThan(ctx.U1[1], lhs, rhs); } -Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSLessThan64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSLessThan(ctx.U1[1], lhs, rhs); +} + +Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpULessThan(ctx.U1[1], lhs, rhs); +} + +Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpULessThan(ctx.U1[1], lhs, rhs); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp index 1582d9dd2..57ea476f1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp @@ -5,99 +5,25 @@ #include "shader_recompiler/backend/spirv/spirv_emit_context.h" namespace Shader::Backend::SPIRV { -namespace { -Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) { - const Id shift_id{ctx.ConstU32(shift)}; - const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; - return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index); -} -Id Word(EmitContext& ctx, Id offset) { +Id EmitLoadSharedU32(EmitContext& ctx, Id offset) { const Id shift_id{ctx.ConstU32(2U)}; const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; return ctx.OpLoad(ctx.U32[1], pointer); } -std::pair ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) { - const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.ConstU32(3U))}; - const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.ConstU32(mask))}; - const Id count_id{ctx.ConstU32(count)}; - return {bit, count_id}; -} -} // Anonymous namespace - -Id EmitLoadSharedU8(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{ - ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; - return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); - } else { - const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; - return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); - } -} - -Id EmitLoadSharedS8(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{ - ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; - return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); - } else { - const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; - return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); - } -} - -Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; - return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); - } else { - const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; - return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); - } -} - -Id EmitLoadSharedS16(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; - return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); - } else { - const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; - return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); - } -} - -Id EmitLoadSharedU32(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)}; - return ctx.OpLoad(ctx.U32[1], pointer); - } else { - return Word(ctx, offset); - } -} - Id EmitLoadSharedU64(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; - return ctx.OpLoad(ctx.U32[2], pointer); - } else { - const Id shift_id{ctx.ConstU32(2U)}; - const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; - const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))}; - const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)}; - const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)}; - return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), - ctx.OpLoad(ctx.U32[1], rhs_pointer)); - } + const Id shift_id{ctx.ConstU32(2U)}; + const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))}; + const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)}; + const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)}; + return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), + ctx.OpLoad(ctx.U32[1], rhs_pointer)); } Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; - return ctx.OpLoad(ctx.U32[4], pointer); - } const Id shift_id{ctx.ConstU32(2U)}; const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; std::array values{}; @@ -109,35 +35,14 @@ Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { return ctx.OpCompositeConstruct(ctx.U32[4], values); } -void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) { - const Id pointer{ - ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; - ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value)); -} - -void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) { - const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; - ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value)); -} - void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) { - Id pointer{}; - if (ctx.profile.support_explicit_workgroup_layout) { - pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2); - } else { - const Id shift{ctx.ConstU32(2U)}; - const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; - pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset); - } + const Id shift{ctx.ConstU32(2U)}; + const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; + const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset); ctx.OpStore(pointer, value); } void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; - ctx.OpStore(pointer, value); - return; - } const Id shift{ctx.ConstU32(2U)}; const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.ConstU32(1U))}; @@ -148,11 +53,6 @@ void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { } void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; - ctx.OpStore(pointer, value); - return; - } const Id shift{ctx.ConstU32(2U)}; const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; for (u32 i = 0; i < 4; ++i) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index 891e41df7..3ed89692b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -6,7 +6,9 @@ namespace Shader::Backend::SPIRV { -void EmitPrologue(EmitContext& ctx) {} +void EmitPrologue(EmitContext& ctx) { + ctx.DefineBufferOffsets(); +} void EmitEpilogue(EmitContext& ctx) {} diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index a17515887..38afd90f1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -10,6 +10,10 @@ Id SubgroupScope(EmitContext& ctx) { return ctx.ConstU32(static_cast(spv::Scope::Subgroup)); } +Id EmitWarpId(EmitContext& ctx) { + UNREACHABLE(); +} + Id EmitLaneId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 16c10f53c..4b732ecd4 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -46,10 +46,10 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin stage{program.info.stage}, binding{binding_} { AddCapability(spv::Capability::Shader); DefineArithmeticTypes(); - DefineInterfaces(program); - DefineBuffers(info); - DefineImagesAndSamplers(info); - DefineSharedMemory(info); + DefineInterfaces(); + DefineBuffers(); + DefineImagesAndSamplers(); + DefineSharedMemory(); } EmitContext::~EmitContext() = default; @@ -86,6 +86,7 @@ void EmitContext::DefineArithmeticTypes() { F32[1] = Name(TypeFloat(32), "f32_id"); S32[1] = Name(TypeSInt(32), "i32_id"); U32[1] = Name(TypeUInt(32), "u32_id"); + U64 = Name(TypeUInt(64), "u64_id"); for (u32 i = 2; i <= 4; i++) { if (info.uses_fp16) { @@ -116,9 +117,10 @@ void EmitContext::DefineArithmeticTypes() { full_result_u32x2 = Name(TypeStruct(U32[1], U32[1]), "full_result_u32x2"); } -void EmitContext::DefineInterfaces(const IR::Program& program) { - DefineInputs(program.info); - DefineOutputs(program.info); +void EmitContext::DefineInterfaces() { + DefinePushDataBlock(); + DefineInputs(); + DefineOutputs(); } Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) { @@ -126,6 +128,7 @@ Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) { case AmdGpu::NumberFormat::Float: case AmdGpu::NumberFormat::Unorm: case AmdGpu::NumberFormat::Snorm: + case AmdGpu::NumberFormat::SnormNz: return ctx.F32[4]; case AmdGpu::NumberFormat::Sint: return ctx.S32[4]; @@ -146,6 +149,7 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f case AmdGpu::NumberFormat::Float: case AmdGpu::NumberFormat::Unorm: case AmdGpu::NumberFormat::Snorm: + case AmdGpu::NumberFormat::SnormNz: return {id, input_f32, F32[1], 4}; case AmdGpu::NumberFormat::Uint: return {id, input_u32, U32[1], 4}; @@ -161,6 +165,20 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f throw InvalidArgument("Invalid attribute type {}", fmt); } +void EmitContext::DefineBufferOffsets() { + for (auto& buffer : buffers) { + const u32 binding = buffer.binding; + const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4); + const u32 comp = (binding & 0xf) >> 2; + const u32 offset = (binding & 0x3) << 3; + const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]), + push_data_block, ConstU32(half), ConstU32(comp))}; + const Id value{OpLoad(U32[1], ptr)}; + buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U)); + buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U)); + } +} + Id MakeDefaultValue(EmitContext& ctx, u32 default_value) { switch (default_value) { case 0: @@ -176,24 +194,13 @@ Id MakeDefaultValue(EmitContext& ctx, u32 default_value) { } } -void EmitContext::DefineInputs(const Info& info) { +void EmitContext::DefineInputs() { switch (stage) { case Stage::Vertex: { vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input); base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input); instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input); - // Create push constants block for instance steps rates - const Id struct_type{Name(TypeStruct(U32[1], U32[1]), "instance_step_rates")}; - Decorate(struct_type, spv::Decoration::Block); - MemberName(struct_type, 0, "sr0"); - MemberName(struct_type, 1, "sr1"); - MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); - MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U); - instance_step_rates = DefineVar(struct_type, spv::StorageClass::PushConstant); - Name(instance_step_rates, "step_rates"); - interfaces.push_back(instance_step_rates); - for (const auto& input : info.vs_inputs) { const Id type{GetAttributeType(*this, input.fmt)}; if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 || @@ -204,7 +211,9 @@ void EmitContext::DefineInputs(const Info& info) { : 1; // Note that we pass index rather than Id input_params[input.binding] = { - rate_idx, input_u32, U32[1], input.num_components, input.instance_data_buf, + rate_idx, input_u32, + U32[1], input.num_components, + false, input.instance_data_buf, }; } else { Id id{DefineInput(type, input.binding)}; @@ -220,19 +229,17 @@ void EmitContext::DefineInputs(const Info& info) { break; } case Stage::Fragment: - if (info.uses_group_quad) { - subgroup_local_invocation_id = DefineVariable( - U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input); - Decorate(subgroup_local_invocation_id, spv::Decoration::Flat); - } + subgroup_local_invocation_id = DefineVariable( + U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input); + Decorate(subgroup_local_invocation_id, spv::Decoration::Flat); frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input); frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output); front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input); for (const auto& input : info.ps_inputs) { const u32 semantic = input.param_index; if (input.is_default) { - input_params[semantic] = {MakeDefaultValue(*this, input.default_value), input_f32, - F32[1]}; + input_params[semantic] = {MakeDefaultValue(*this, input.default_value), F32[1], + F32[1], 4, true}; continue; } const IR::Attribute param{IR::Attribute::Param0 + input.param_index}; @@ -257,19 +264,20 @@ void EmitContext::DefineInputs(const Info& info) { } } -void EmitContext::DefineOutputs(const Info& info) { +void EmitContext::DefineOutputs() { switch (stage) { case Stage::Vertex: { output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); - const std::array zero{f32_zero_value, f32_zero_value, f32_zero_value, - f32_zero_value, f32_zero_value, f32_zero_value, - f32_zero_value, f32_zero_value}; - const Id type{TypeArray(F32[1], ConstU32(8U))}; - const Id initializer{ConstantComposite(type, zero)}; - clip_distances = DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output, - initializer); - cull_distances = DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output, - initializer); + const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) || + info.stores.Get(IR::Attribute::Position2) || + info.stores.Get(IR::Attribute::Position3); + if (has_extra_pos_stores) { + const Id type{TypeArray(F32[1], ConstU32(8U))}; + clip_distances = + DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output); + cull_distances = + DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output); + } for (u32 i = 0; i < IR::NumParams; i++) { const IR::Attribute param{IR::Attribute::Param0 + i}; if (!info.stores.GetAny(param)) { @@ -301,12 +309,31 @@ void EmitContext::DefineOutputs(const Info& info) { } } -void EmitContext::DefineBuffers(const Info& info) { +void EmitContext::DefinePushDataBlock() { + // Create push constants block for instance steps rates + const Id struct_type{Name(TypeStruct(U32[1], U32[1], U32[4], U32[4]), "AuxData")}; + Decorate(struct_type, spv::Decoration::Block); + MemberName(struct_type, 0, "sr0"); + MemberName(struct_type, 1, "sr1"); + MemberName(struct_type, 2, "buf_offsets0"); + MemberName(struct_type, 3, "buf_offsets1"); + MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U); + MemberDecorate(struct_type, 2, spv::Decoration::Offset, 8U); + MemberDecorate(struct_type, 3, spv::Decoration::Offset, 24U); + push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant); + Name(push_data_block, "push_data"); + interfaces.push_back(push_data_block); +} + +void EmitContext::DefineBuffers() { boost::container::small_vector type_ids; for (u32 i = 0; const auto& buffer : info.buffers) { const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32; const Id data_type = (*data_types)[1]; - const Id record_array_type{TypeArray(data_type, ConstU32(buffer.length))}; + const Id record_array_type{buffer.is_storage + ? TypeRuntimeArray(data_type) + : TypeArray(data_type, ConstU32(buffer.length))}; const Id struct_type{TypeStruct(record_array_type)}; if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) { Decorate(record_array_type, spv::Decoration::ArrayStride, 4); @@ -319,8 +346,8 @@ void EmitContext::DefineBuffers(const Info& info) { Decorate(struct_type, spv::Decoration::Block); MemberName(struct_type, 0, "data"); MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + type_ids.push_back(record_array_type); } - type_ids.push_back(record_array_type); const auto storage_class = buffer.is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform; @@ -331,9 +358,9 @@ void EmitContext::DefineBuffers(const Info& info) { Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("{}_{}", buffer.is_storage ? "ssbo" : "cbuf", buffer.sgpr_base)); - binding++; buffers.push_back({ .id = id, + .binding = binding++, .data_types = data_types, .pointer_type = pointer_type, .buffer = buffer.GetVsharp(info), @@ -380,6 +407,10 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) { image.GetNumberFmt() == AmdGpu::NumberFormat::Float) { return spv::ImageFormat::Rgba16f; } + if (image.GetDataFmt() == AmdGpu::DataFormat::Format16_16_16_16 && + image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) { + return spv::ImageFormat::Rgba16; + } if (image.GetDataFmt() == AmdGpu::DataFormat::Format8 && image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) { return spv::ImageFormat::R8; @@ -388,7 +419,20 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) { image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) { return spv::ImageFormat::Rgba8; } - UNREACHABLE(); + if (image.GetDataFmt() == AmdGpu::DataFormat::Format8_8_8_8 && + image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) { + return spv::ImageFormat::Rgba8ui; + } + if (image.GetDataFmt() == AmdGpu::DataFormat::Format10_11_11 && + image.GetNumberFmt() == AmdGpu::NumberFormat::Float) { + return spv::ImageFormat::R11fG11fB10f; + } + if (image.GetDataFmt() == AmdGpu::DataFormat::Format32_32_32_32 && + image.GetNumberFmt() == AmdGpu::NumberFormat::Float) { + return spv::ImageFormat::Rgba32f; + } + UNREACHABLE_MSG("Unknown storage format data_format={}, num_format={}", image.GetDataFmt(), + image.GetNumberFmt()); } Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) { @@ -408,15 +452,13 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) { return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format); case AmdGpu::ImageType::Cube: return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, false, false, sampled, format); - case AmdGpu::ImageType::Buffer: - throw NotImplementedException("Image buffer"); default: break; } throw InvalidArgument("Invalid texture type {}", desc.type); } -void EmitContext::DefineImagesAndSamplers(const Info& info) { +void EmitContext::DefineImagesAndSamplers() { for (const auto& image_desc : info.images) { const VectorIds* data_types = [&] { switch (image_desc.nfmt) { @@ -467,47 +509,17 @@ void EmitContext::DefineImagesAndSamplers(const Info& info) { } } -void EmitContext::DefineSharedMemory(const Info& info) { - if (info.shared_memory_size == 0) { +void EmitContext::DefineSharedMemory() { + static constexpr size_t DefaultSharedMemSize = 16_KB; + if (!info.uses_shared) { return; } - const auto make{[&](Id element_type, u32 element_size) { - const u32 num_elements{Common::DivCeil(info.shared_memory_size, element_size)}; - const Id array_type{TypeArray(element_type, ConstU32(num_elements))}; - Decorate(array_type, spv::Decoration::ArrayStride, element_size); - - const Id struct_type{TypeStruct(array_type)}; - MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U); - Decorate(struct_type, spv::Decoration::Block); - - const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)}; - const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)}; - const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)}; - Decorate(variable, spv::Decoration::Aliased); - interfaces.push_back(variable); - - return std::make_tuple(variable, element_pointer, pointer); - }}; - if (profile.support_explicit_workgroup_layout) { - AddExtension("SPV_KHR_workgroup_memory_explicit_layout"); - AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR); - if (info.uses_shared_u8) { - AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR); - std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1); - } - if (info.uses_shared_u16) { - AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR); - std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2); - } - std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4); - std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8); - std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16); - return; + if (info.shared_memory_size == 0) { + info.shared_memory_size = DefaultSharedMemSize; } const u32 num_elements{Common::DivCeil(info.shared_memory_size, 4U)}; const Id type{TypeArray(U32[1], ConstU32(num_elements))}; shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type); - shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup); interfaces.push_back(shared_memory_u32); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index fc6783442..81237a9a3 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -40,6 +40,7 @@ public: ~EmitContext(); Id Def(const IR::Value& value); + void DefineBufferOffsets(); [[nodiscard]] Id DefineInput(Id type, u32 location) { const Id input_id{DefineVar(type, spv::StorageClass::Input)}; @@ -168,7 +169,7 @@ public: Id output_position{}; Id vertex_index{}; Id instance_id{}; - Id instance_step_rates{}; + Id push_data_block{}; Id base_vertex{}; Id frag_coord{}; Id front_facing{}; @@ -201,14 +202,17 @@ public: struct BufferDefinition { Id id; + Id offset; + Id offset_dwords; + u32 binding; const VectorIds* data_types; Id pointer_type; AmdGpu::Buffer buffer; }; u32& binding; - boost::container::small_vector buffers; - boost::container::small_vector images; + boost::container::small_vector buffers; + boost::container::small_vector images; boost::container::small_vector samplers; Id sampler_type{}; @@ -219,6 +223,7 @@ public: Id pointer_type; Id component_type; u32 num_components; + bool is_default{}; s32 buffer_handle{-1}; }; std::array input_params{}; @@ -226,12 +231,13 @@ public: private: void DefineArithmeticTypes(); - void DefineInterfaces(const IR::Program& program); - void DefineInputs(const Info& info); - void DefineOutputs(const Info& info); - void DefineBuffers(const Info& info); - void DefineImagesAndSamplers(const Info& info); - void DefineSharedMemory(const Info& info); + void DefineInterfaces(); + void DefineInputs(); + void DefineOutputs(); + void DefinePushDataBlock(); + void DefineBuffers(); + void DefineImagesAndSamplers(); + void DefineSharedMemory(); SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id); }; diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index 59dc8850c..a2dedfdfd 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -40,7 +40,7 @@ static IR::Condition MakeCondition(Opcode opcode) { } } -CFG::CFG(ObjectPool& block_pool_, std::span inst_list_) +CFG::CFG(Common::ObjectPool& block_pool_, std::span inst_list_) : block_pool{block_pool_}, inst_list{inst_list_} { index_to_pc.resize(inst_list.size() + 1); EmitLabels(); diff --git a/src/shader_recompiler/frontend/control_flow_graph.h b/src/shader_recompiler/frontend/control_flow_graph.h index 071900871..ebe614ee5 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.h +++ b/src/shader_recompiler/frontend/control_flow_graph.h @@ -8,10 +8,10 @@ #include #include +#include "common/object_pool.h" #include "common/types.h" #include "shader_recompiler/frontend/instruction.h" #include "shader_recompiler/ir/condition.h" -#include "shader_recompiler/object_pool.h" namespace Shader::Gcn { @@ -49,7 +49,7 @@ class CFG { using Label = u32; public: - explicit CFG(ObjectPool& block_pool, std::span inst_list); + explicit CFG(Common::ObjectPool& block_pool, std::span inst_list); [[nodiscard]] std::string Dot() const; @@ -59,7 +59,7 @@ private: void LinkBlocks(); public: - ObjectPool& block_pool; + Common::ObjectPool& block_pool; std::span inst_list; std::vector index_to_pc; boost::container::small_vector labels; diff --git a/src/shader_recompiler/frontend/format.cpp b/src/shader_recompiler/frontend/format.cpp index 634566fa8..8df3ac364 100644 --- a/src/shader_recompiler/frontend/format.cpp +++ b/src/shader_recompiler/frontend/format.cpp @@ -1479,7 +1479,7 @@ constexpr std::array InstructionFormatVOP3 = {{ {InstClass::VectorFpGraph32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, ScalarType::Float32}, // 337 = V_MIN3_F32 - {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + {InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, ScalarType::Float32}, // 338 = V_MIN3_I32 {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Sint32, @@ -1488,7 +1488,7 @@ constexpr std::array InstructionFormatVOP3 = {{ {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, // 340 = V_MAX3_F32 - {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + {InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, ScalarType::Float32}, // 341 = V_MAX3_I32 {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Sint32, @@ -1497,7 +1497,7 @@ constexpr std::array InstructionFormatVOP3 = {{ {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, // 343 = V_MED3_F32 - {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + {InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, ScalarType::Float32}, // 344 = V_MED3_I32 {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Sint32, @@ -2779,11 +2779,9 @@ constexpr std::array InstructionFormatDS = {{ // 60 = DS_READ_U16 {InstClass::DsIdxRd, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, // 61 = DS_CONSUME - {InstClass::DsAppendCon, InstCategory::DataShare, 3, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::DsAppendCon, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, // 62 = DS_APPEND - {InstClass::DsAppendCon, InstCategory::DataShare, 3, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::DsAppendCon, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, // 63 = DS_ORDERED_COUNT {InstClass::GdsOrdCnt, InstCategory::DataShare, 3, 1, ScalarType::Undefined, ScalarType::Undefined}, diff --git a/src/shader_recompiler/frontend/instruction.h b/src/shader_recompiler/frontend/instruction.h index d1d10efb6..f83f43db5 100644 --- a/src/shader_recompiler/frontend/instruction.h +++ b/src/shader_recompiler/frontend/instruction.h @@ -76,11 +76,11 @@ struct SMRD { }; struct InstControlSOPK { - BitField<0, 16, u32> simm; + s16 simm; }; struct InstControlSOPP { - BitField<0, 16, u32> simm; + s16 simm; }; struct InstControlVOP3 { diff --git a/src/shader_recompiler/frontend/module.h b/src/shader_recompiler/frontend/module.h deleted file mode 100644 index 3901f0219..000000000 --- a/src/shader_recompiler/frontend/module.h +++ /dev/null @@ -1,10 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -namespace Shader::Gcn { - -void Translate(); - -} // namespace Shader::Gcn \ No newline at end of file diff --git a/src/shader_recompiler/frontend/opcodes.h b/src/shader_recompiler/frontend/opcodes.h index d38140d8f..cdc1e4746 100644 --- a/src/shader_recompiler/frontend/opcodes.h +++ b/src/shader_recompiler/frontend/opcodes.h @@ -2392,10 +2392,10 @@ enum class OperandField : u32 { ConstFloatPos_4_0, ConstFloatNeg_4_0, VccZ = 251, - ExecZ, - Scc, - LdsDirect, - LiteralConst, + ExecZ = 252, + Scc = 253, + LdsDirect = 254, + LiteralConst = 255, VectorGPR, Undefined = 0xFFFFFFFF, diff --git a/src/shader_recompiler/frontend/structured_control_flow.cpp b/src/shader_recompiler/frontend/structured_control_flow.cpp index 346f00aa4..b50205d46 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/structured_control_flow.cpp @@ -287,7 +287,7 @@ bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept { */ class GotoPass { public: - explicit GotoPass(CFG& cfg, ObjectPool& stmt_pool) : pool{stmt_pool} { + explicit GotoPass(CFG& cfg, Common::ObjectPool& stmt_pool) : pool{stmt_pool} { std::vector gotos{BuildTree(cfg)}; const auto end{gotos.rend()}; for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) { @@ -563,7 +563,7 @@ private: return parent_tree.insert(std::next(loop), *new_goto); } - ObjectPool& pool; + Common::ObjectPool& pool; Statement root_stmt{FunctionTag{}}; }; @@ -597,16 +597,17 @@ private: class TranslatePass { public: - TranslatePass(ObjectPool& inst_pool_, ObjectPool& block_pool_, - ObjectPool& stmt_pool_, Statement& root_stmt, + TranslatePass(Common::ObjectPool& inst_pool_, + Common::ObjectPool& block_pool_, + Common::ObjectPool& stmt_pool_, Statement& root_stmt, IR::AbstractSyntaxList& syntax_list_, std::span inst_list_, - Info& info_) + Info& info_, const Profile& profile_) : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, - syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_} { + syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_}, profile{profile_} { Visit(root_stmt, nullptr, nullptr); IR::Block& first_block{*syntax_list.front().data.block}; - Translator{&first_block, info}.EmitPrologue(); + Translator{&first_block, info, profile}.EmitPrologue(); } private: @@ -635,7 +636,7 @@ private: const u32 start = stmt.block->begin_index; const u32 size = stmt.block->end_index - start + 1; Translate(current_block, stmt.block->begin, inst_list.subspan(start, size), - info); + info, profile); } break; } @@ -808,23 +809,26 @@ private: return block_pool.Create(inst_pool); } - ObjectPool& stmt_pool; - ObjectPool& inst_pool; - ObjectPool& block_pool; + Common::ObjectPool& stmt_pool; + Common::ObjectPool& inst_pool; + Common::ObjectPool& block_pool; IR::AbstractSyntaxList& syntax_list; const Block dummy_flow_block{.is_dummy = true}; std::span inst_list; Info& info; + const Profile& profile; }; } // Anonymous namespace -IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, ObjectPool& block_pool, - CFG& cfg, Info& info) { - ObjectPool stmt_pool{64}; +IR::AbstractSyntaxList BuildASL(Common::ObjectPool& inst_pool, + Common::ObjectPool& block_pool, CFG& cfg, Info& info, + const Profile& profile) { + Common::ObjectPool stmt_pool{64}; GotoPass goto_pass{cfg, stmt_pool}; Statement& root{goto_pass.RootStatement()}; IR::AbstractSyntaxList syntax_list; - TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, info}; + TranslatePass{inst_pool, block_pool, stmt_pool, root, + syntax_list, cfg.inst_list, info, profile}; ASSERT_MSG(!info.translation_failed, "Shader translation has failed"); return syntax_list; } diff --git a/src/shader_recompiler/frontend/structured_control_flow.h b/src/shader_recompiler/frontend/structured_control_flow.h index 09814349c..f5a540518 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.h +++ b/src/shader_recompiler/frontend/structured_control_flow.h @@ -7,16 +7,16 @@ #include "shader_recompiler/ir/abstract_syntax_list.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/value.h" -#include "shader_recompiler/object_pool.h" namespace Shader { struct Info; -} +struct Profile; +} // namespace Shader namespace Shader::Gcn { -[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, - ObjectPool& block_pool, CFG& cfg, - Info& info); +[[nodiscard]] IR::AbstractSyntaxList BuildASL(Common::ObjectPool& inst_pool, + Common::ObjectPool& block_pool, CFG& cfg, + Info& info, const Profile& profile); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index c5d9f0ecd..532e024e2 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -5,6 +5,31 @@ namespace Shader::Gcn { +void Translator::EmitDataShare(const GcnInst& inst) { + switch (inst.opcode) { + case Opcode::DS_SWIZZLE_B32: + return DS_SWIZZLE_B32(inst); + case Opcode::DS_READ_B32: + return DS_READ(32, false, false, inst); + case Opcode::DS_READ_B64: + return DS_READ(64, false, false, inst); + case Opcode::DS_READ2_B32: + return DS_READ(32, false, true, inst); + case Opcode::DS_READ2_B64: + return DS_READ(64, false, true, inst); + case Opcode::DS_WRITE_B32: + return DS_WRITE(32, false, false, inst); + case Opcode::DS_WRITE_B64: + return DS_WRITE(64, false, false, inst); + case Opcode::DS_WRITE2_B32: + return DS_WRITE(32, false, true, inst); + case Opcode::DS_WRITE2_B64: + return DS_WRITE(64, false, true, inst); + default: + LogMissingOpcode(inst); + } +} + void Translator::DS_SWIZZLE_B32(const GcnInst& inst) { const u8 offset0 = inst.control.ds.offset0; const u8 offset1 = inst.control.ds.offset1; @@ -20,20 +45,34 @@ void Translator::DS_SWIZZLE_B32(const GcnInst& inst) { void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst) { const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))}; - const IR::VectorReg dst_reg{inst.dst[0].code}; + IR::VectorReg dst_reg{inst.dst[0].code}; if (is_pair) { - // Pair loads are either 32 or 64-bit. We assume 32-bit for now. - ASSERT(bit_size == 32); - const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); - ir.SetVectorReg(dst_reg, IR::U32{ir.LoadShared(32, is_signed, addr0)}); - const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1))); - ir.SetVectorReg(dst_reg + 1, IR::U32{ir.LoadShared(32, is_signed, addr1)}); + // Pair loads are either 32 or 64-bit + const u32 adj = bit_size == 32 ? 4 : 8; + const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj))); + const IR::Value data0 = ir.LoadShared(bit_size, is_signed, addr0); + if (bit_size == 32) { + ir.SetVectorReg(dst_reg++, IR::U32{data0}); + } else { + ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 0)}); + ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 1)}); + } + const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj))); + const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1); + if (bit_size == 32) { + ir.SetVectorReg(dst_reg++, IR::U32{data1}); + } else { + ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 0)}); + ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 1)}); + } } else if (bit_size == 64) { - const IR::Value data = ir.LoadShared(bit_size, is_signed, addr); + const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); + const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0); ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)}); ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(data, 1)}); } else { - const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr)}; + const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); + const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr0)}; ir.SetVectorReg(dst_reg, data); } } @@ -43,17 +82,31 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI const IR::VectorReg data0{inst.src[1].code}; const IR::VectorReg data1{inst.src[2].code}; if (is_pair) { - ASSERT(bit_size == 32); - const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); - ir.WriteShared(32, ir.GetVectorReg(data0), addr0); - const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1))); - ir.WriteShared(32, ir.GetVectorReg(data1), addr1); + const u32 adj = bit_size == 32 ? 4 : 8; + const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj))); + if (bit_size == 32) { + ir.WriteShared(32, ir.GetVectorReg(data0), addr0); + } else { + ir.WriteShared( + 64, ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)), + addr0); + } + const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj))); + if (bit_size == 32) { + ir.WriteShared(32, ir.GetVectorReg(data1), addr1); + } else { + ir.WriteShared( + 64, ir.CompositeConstruct(ir.GetVectorReg(data1), ir.GetVectorReg(data1 + 1)), + addr1); + } } else if (bit_size == 64) { + const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); const IR::Value data = ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)); - ir.WriteShared(bit_size, data, addr); + ir.WriteShared(bit_size, data, addr0); } else { - ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr); + const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); + ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0); } } @@ -62,7 +115,18 @@ void Translator::S_BARRIER() { } void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) { - UNREACHABLE(); + ASSERT(info.stage != Stage::Compute); + SetDst(inst.dst[0], GetSrc(inst.src[0])); +} + +void Translator::V_READLANE_B32(const GcnInst& inst) { + ASSERT(info.stage != Stage::Compute); + SetDst(inst.dst[0], GetSrc(inst.src[0])); +} + +void Translator::V_WRITELANE_B32(const GcnInst& inst) { + ASSERT(info.stage != Stage::Compute); + SetDst(inst.dst[0], GetSrc(inst.src[0])); } } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index 518405373..889de21b7 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -6,7 +6,7 @@ namespace Shader::Gcn { -void Translator::EXP(const GcnInst& inst) { +void Translator::EmitExport(const GcnInst& inst) { if (ir.block->has_multiple_predecessors && info.stage == Stage::Fragment) { LOG_WARNING(Render_Recompiler, "An ambiguous export appeared in translation"); ir.Discard(ir.LogicalNot(ir.GetExec())); diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index a20e91ca1..795b148d6 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -5,8 +5,102 @@ namespace Shader::Gcn { +void Translator::EmitScalarAlu(const GcnInst& inst) { + switch (inst.opcode) { + case Opcode::S_MOVK_I32: + return S_MOVK(inst); + case Opcode::S_MOV_B32: + return S_MOV(inst); + case Opcode::S_MUL_I32: + return S_MUL_I32(inst); + case Opcode::S_AND_SAVEEXEC_B64: + return S_AND_SAVEEXEC_B64(inst); + case Opcode::S_MOV_B64: + return S_MOV_B64(inst); + case Opcode::S_CMP_LT_U32: + return S_CMP(ConditionOp::LT, false, inst); + case Opcode::S_CMP_LE_U32: + return S_CMP(ConditionOp::LE, false, inst); + case Opcode::S_CMP_LG_U32: + return S_CMP(ConditionOp::LG, false, inst); + case Opcode::S_CMP_LT_I32: + return S_CMP(ConditionOp::LT, true, inst); + case Opcode::S_CMP_LG_I32: + return S_CMP(ConditionOp::LG, true, inst); + case Opcode::S_CMP_GT_I32: + return S_CMP(ConditionOp::GT, true, inst); + case Opcode::S_CMP_GE_I32: + return S_CMP(ConditionOp::GE, true, inst); + case Opcode::S_CMP_EQ_I32: + return S_CMP(ConditionOp::EQ, true, inst); + case Opcode::S_CMP_EQ_U32: + return S_CMP(ConditionOp::EQ, false, inst); + case Opcode::S_CMP_GE_U32: + return S_CMP(ConditionOp::GE, false, inst); + case Opcode::S_CMP_GT_U32: + return S_CMP(ConditionOp::GT, false, inst); + case Opcode::S_OR_B64: + return S_OR_B64(NegateMode::None, false, inst); + case Opcode::S_NOR_B64: + return S_OR_B64(NegateMode::Result, false, inst); + case Opcode::S_XOR_B64: + return S_OR_B64(NegateMode::None, true, inst); + case Opcode::S_ORN2_B64: + return S_OR_B64(NegateMode::Src1, false, inst); + case Opcode::S_AND_B64: + return S_AND_B64(NegateMode::None, inst); + case Opcode::S_NAND_B64: + return S_AND_B64(NegateMode::Result, inst); + case Opcode::S_ANDN2_B64: + return S_AND_B64(NegateMode::Src1, inst); + case Opcode::S_NOT_B64: + return S_NOT_B64(inst); + case Opcode::S_ADD_I32: + return S_ADD_I32(inst); + case Opcode::S_AND_B32: + return S_AND_B32(inst); + case Opcode::S_ASHR_I32: + return S_ASHR_I32(inst); + case Opcode::S_OR_B32: + return S_OR_B32(inst); + case Opcode::S_LSHL_B32: + return S_LSHL_B32(inst); + case Opcode::S_LSHR_B32: + return S_LSHR_B32(inst); + case Opcode::S_CSELECT_B32: + return S_CSELECT_B32(inst); + case Opcode::S_CSELECT_B64: + return S_CSELECT_B64(inst); + case Opcode::S_BFE_U32: + return S_BFE_U32(inst); + case Opcode::S_BFM_B32: + return S_BFM_B32(inst); + case Opcode::S_BREV_B32: + return S_BREV_B32(inst); + case Opcode::S_ADD_U32: + return S_ADD_U32(inst); + case Opcode::S_ADDC_U32: + return S_ADDC_U32(inst); + case Opcode::S_ADDK_I32: + return S_ADDK_I32(inst); + case Opcode::S_MULK_I32: + return S_MULK_I32(inst); + case Opcode::S_SUB_U32: + case Opcode::S_SUB_I32: + return S_SUB_U32(inst); + case Opcode::S_MIN_U32: + return S_MIN_U32(inst); + case Opcode::S_MAX_U32: + return S_MAX_U32(inst); + case Opcode::S_WQM_B64: + break; + default: + LogMissingOpcode(inst); + } +} + void Translator::S_MOVK(const GcnInst& inst) { - const auto simm16 = inst.control.sopk.simm.Value(); + const auto simm16 = inst.control.sopk.simm; if (simm16 & (1 << 15)) { // TODO: need to verify the case of imm sign extension UNREACHABLE(); @@ -14,6 +108,16 @@ void Translator::S_MOVK(const GcnInst& inst) { SetDst(inst.dst[0], ir.Imm32(simm16)); } +void Translator::S_ADDK_I32(const GcnInst& inst) { + const s32 simm16 = inst.control.sopk.simm; + SetDst(inst.dst[0], ir.IAdd(GetSrc(inst.dst[0]), ir.Imm32(simm16))); +} + +void Translator::S_MULK_I32(const GcnInst& inst) { + const s32 simm16 = inst.control.sopk.simm; + SetDst(inst.dst[0], ir.IMul(GetSrc(inst.dst[0]), ir.Imm32(simm16))); +} + void Translator::S_MOV(const GcnInst& inst) { SetDst(inst.dst[0], GetSrc(inst.src[0])); } @@ -62,15 +166,10 @@ void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) { } }(); - // Mark destination SPGR as an EXEC context. This means we will use 1-bit - // IR instruction whenever it's loaded. switch (inst.dst[0].field) { - case OperandField::ScalarGPR: { - const u32 reg = inst.dst[0].code; - exec_contexts[reg] = true; - ir.SetThreadBitScalarReg(IR::ScalarReg(reg), exec); + case OperandField::ScalarGPR: + ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), exec); break; - } case OperandField::VccLo: ir.SetVcc(exec); break; @@ -79,27 +178,37 @@ void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) { } // Update EXEC. - ir.SetExec(ir.LogicalAnd(exec, src)); + const IR::U1 result = ir.LogicalAnd(exec, src); + ir.SetExec(result); + ir.SetScc(result); } void Translator::S_MOV_B64(const GcnInst& inst) { - // TODO: Using VCC as EXEC context. - if (inst.src[0].field == OperandField::VccLo || inst.dst[0].field == OperandField::VccLo) { - return; - } - if (inst.dst[0].field == OperandField::ScalarGPR && inst.src[0].field == OperandField::ExecLo) { - // Exec context push - exec_contexts[inst.dst[0].code] = true; - ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), ir.GetExec()); - } else if (inst.dst[0].field == OperandField::ExecLo && - inst.src[0].field == OperandField::ScalarGPR) { - // Exec context pop - exec_contexts[inst.src[0].code] = false; - ir.SetExec(ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code))); - } else if (inst.dst[0].field == OperandField::ExecLo && - inst.src[0].field == OperandField::ConstZero) { - ir.SetExec(ir.Imm1(false)); - } else { + const IR::U1 src = [&] { + switch (inst.src[0].field) { + case OperandField::VccLo: + return ir.GetVcc(); + case OperandField::ExecLo: + return ir.GetExec(); + case OperandField::ScalarGPR: + return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code)); + case OperandField::ConstZero: + return ir.Imm1(false); + default: + UNREACHABLE(); + } + }(); + switch (inst.dst[0].field) { + case OperandField::ScalarGPR: + ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), src); + break; + case OperandField::ExecLo: + ir.SetExec(src); + break; + case OperandField::VccLo: + ir.SetVcc(src); + break; + default: UNREACHABLE(); } } @@ -338,4 +447,20 @@ void Translator::S_ADDC_U32(const GcnInst& inst) { SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), ir.GetSccLo())); } +void Translator::S_MAX_U32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 result = ir.UMax(src0, src1); + SetDst(inst.dst[0], result); + ir.SetScc(ir.IEqual(result, src0)); +} + +void Translator::S_MIN_U32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 result = ir.UMin(src0, src1); + SetDst(inst.dst[0], result); + ir.SetScc(ir.IEqual(result, src0)); +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/scalar_memory.cpp b/src/shader_recompiler/frontend/translate/scalar_memory.cpp index 3c80764c4..29f2acc27 100644 --- a/src/shader_recompiler/frontend/translate/scalar_memory.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_memory.cpp @@ -7,6 +7,29 @@ namespace Shader::Gcn { static constexpr u32 SQ_SRC_LITERAL = 0xFF; +void Translator::EmitScalarMemory(const GcnInst& inst) { + switch (inst.opcode) { + case Opcode::S_LOAD_DWORDX4: + return S_LOAD_DWORD(4, inst); + case Opcode::S_LOAD_DWORDX8: + return S_LOAD_DWORD(8, inst); + case Opcode::S_LOAD_DWORDX16: + return S_LOAD_DWORD(16, inst); + case Opcode::S_BUFFER_LOAD_DWORD: + return S_BUFFER_LOAD_DWORD(1, inst); + case Opcode::S_BUFFER_LOAD_DWORDX2: + return S_BUFFER_LOAD_DWORD(2, inst); + case Opcode::S_BUFFER_LOAD_DWORDX4: + return S_BUFFER_LOAD_DWORD(4, inst); + case Opcode::S_BUFFER_LOAD_DWORDX8: + return S_BUFFER_LOAD_DWORD(8, inst); + case Opcode::S_BUFFER_LOAD_DWORDX16: + return S_BUFFER_LOAD_DWORD(16, inst); + default: + LogMissingOpcode(inst); + } +} + void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) { const auto& smrd = inst.control.smrd; const u32 dword_offset = [&] -> u32 { diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 9e67e82e5..d48e4defd 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -16,13 +16,10 @@ namespace Shader::Gcn { -std::array Translator::exec_contexts{}; - -Translator::Translator(IR::Block* block_, Info& info_) - : ir{*block_, block_->begin()}, info{info_} {} +Translator::Translator(IR::Block* block_, Info& info_, const Profile& profile_) + : ir{*block_, block_->begin()}, info{info_}, profile{profile_} {} void Translator::EmitPrologue() { - exec_contexts.fill(false); ir.Prologue(); ir.SetExec(ir.Imm1(true)); @@ -76,103 +73,203 @@ void Translator::EmitPrologue() { } } -IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) { - // Input modifiers work on float values. - force_flt |= operand.input_modifier.abs | operand.input_modifier.neg; +template +T Translator::GetSrc(const InstOperand& operand) { + constexpr bool is_float = std::is_same_v; - IR::U32F32 value{}; + const auto get_imm = [&](auto value) -> T { + if constexpr (is_float) { + return ir.Imm32(std::bit_cast(value)); + } else { + return ir.Imm32(std::bit_cast(value)); + } + }; + + T value{}; switch (operand.field) { case OperandField::ScalarGPR: - if (operand.type == ScalarType::Float32 || force_flt) { - value = ir.GetScalarReg(IR::ScalarReg(operand.code)); - } else { - value = ir.GetScalarReg(IR::ScalarReg(operand.code)); - } + value = ir.GetScalarReg(IR::ScalarReg(operand.code)); break; case OperandField::VectorGPR: - if (operand.type == ScalarType::Float32 || force_flt) { - value = ir.GetVectorReg(IR::VectorReg(operand.code)); - } else { - value = ir.GetVectorReg(IR::VectorReg(operand.code)); - } + value = ir.GetVectorReg(IR::VectorReg(operand.code)); break; case OperandField::ConstZero: - if (force_flt) { - value = ir.Imm32(0.f); - } else { - value = ir.Imm32(0U); - } + value = get_imm(0U); break; case OperandField::SignedConstIntPos: - ASSERT(!force_flt); - value = ir.Imm32(operand.code - SignedConstIntPosMin + 1); + value = get_imm(operand.code - SignedConstIntPosMin + 1); break; case OperandField::SignedConstIntNeg: - ASSERT(!force_flt); - value = ir.Imm32(-s32(operand.code) + SignedConstIntNegMin - 1); + value = get_imm(-s32(operand.code) + SignedConstIntNegMin - 1); break; case OperandField::LiteralConst: - if (force_flt) { - value = ir.Imm32(std::bit_cast(operand.code)); - } else { - value = ir.Imm32(operand.code); - } + value = get_imm(operand.code); break; case OperandField::ConstFloatPos_1_0: - if (force_flt) { - value = ir.Imm32(1.f); - } else { - value = ir.Imm32(std::bit_cast(1.f)); - } + value = get_imm(1.f); break; case OperandField::ConstFloatPos_0_5: - value = ir.Imm32(0.5f); + value = get_imm(0.5f); break; case OperandField::ConstFloatPos_2_0: - value = ir.Imm32(2.0f); + value = get_imm(2.0f); break; case OperandField::ConstFloatPos_4_0: - value = ir.Imm32(4.0f); + value = get_imm(4.0f); break; case OperandField::ConstFloatNeg_0_5: - value = ir.Imm32(-0.5f); + value = get_imm(-0.5f); break; case OperandField::ConstFloatNeg_1_0: - value = ir.Imm32(-1.0f); + value = get_imm(-1.0f); break; case OperandField::ConstFloatNeg_2_0: - value = ir.Imm32(-2.0f); + value = get_imm(-2.0f); break; case OperandField::ConstFloatNeg_4_0: - value = ir.Imm32(-4.0f); + value = get_imm(-4.0f); break; case OperandField::VccLo: - if (force_flt) { + if constexpr (is_float) { value = ir.BitCast(ir.GetVccLo()); } else { value = ir.GetVccLo(); } break; case OperandField::VccHi: - if (force_flt) { + if constexpr (is_float) { value = ir.BitCast(ir.GetVccHi()); } else { value = ir.GetVccHi(); } break; + case OperandField::M0: + if constexpr (is_float) { + UNREACHABLE(); + } else { + return m0_value; + } default: UNREACHABLE(); } - if (operand.input_modifier.abs) { - value = ir.FPAbs(value); - } - if (operand.input_modifier.neg) { - value = ir.FPNeg(value); + if constexpr (is_float) { + if (operand.input_modifier.abs) { + value = ir.FPAbs(value); + } + if (operand.input_modifier.neg) { + value = ir.FPNeg(value); + } + } else { + if (operand.input_modifier.abs) { + LOG_WARNING(Render_Vulkan, "Input abs modifier on integer instruction"); + } + if (operand.input_modifier.neg) { + UNREACHABLE(); + } } return value; } +template IR::U32 Translator::GetSrc(const InstOperand&); +template IR::F32 Translator::GetSrc(const InstOperand&); + +template +T Translator::GetSrc64(const InstOperand& operand) { + constexpr bool is_float = std::is_same_v; + + const auto get_imm = [&](auto value) -> T { + if constexpr (is_float) { + return ir.Imm64(std::bit_cast(value)); + } else { + return ir.Imm64(std::bit_cast(value)); + } + }; + + T value{}; + switch (operand.field) { + case OperandField::ScalarGPR: { + const auto value_lo = ir.GetScalarReg(IR::ScalarReg(operand.code)); + const auto value_hi = ir.GetScalarReg(IR::ScalarReg(operand.code + 1)); + if constexpr (is_float) { + UNREACHABLE(); + } else { + value = ir.PackUint2x32(ir.CompositeConstruct(value_lo, value_hi)); + } + break; + } + case OperandField::VectorGPR: { + const auto value_lo = ir.GetVectorReg(IR::VectorReg(operand.code)); + const auto value_hi = ir.GetVectorReg(IR::VectorReg(operand.code + 1)); + if constexpr (is_float) { + UNREACHABLE(); + } else { + value = ir.PackUint2x32(ir.CompositeConstruct(value_lo, value_hi)); + } + break; + } + case OperandField::ConstZero: + value = get_imm(0ULL); + break; + case OperandField::SignedConstIntPos: + value = get_imm(s64(operand.code) - SignedConstIntPosMin + 1); + break; + case OperandField::SignedConstIntNeg: + value = get_imm(-s64(operand.code) + SignedConstIntNegMin - 1); + break; + case OperandField::LiteralConst: + value = get_imm(u64(operand.code)); + break; + case OperandField::ConstFloatPos_1_0: + value = get_imm(1.0); + break; + case OperandField::ConstFloatPos_0_5: + value = get_imm(0.5); + break; + case OperandField::ConstFloatPos_2_0: + value = get_imm(2.0); + break; + case OperandField::ConstFloatPos_4_0: + value = get_imm(4.0); + break; + case OperandField::ConstFloatNeg_0_5: + value = get_imm(-0.5); + break; + case OperandField::ConstFloatNeg_1_0: + value = get_imm(-1.0); + break; + case OperandField::ConstFloatNeg_2_0: + value = get_imm(-2.0); + break; + case OperandField::ConstFloatNeg_4_0: + value = get_imm(-4.0); + break; + case OperandField::VccLo: + if constexpr (is_float) { + UNREACHABLE(); + } else { + value = ir.PackUint2x32(ir.CompositeConstruct(ir.GetVccLo(), ir.GetVccHi())); + } + break; + case OperandField::VccHi: + default: + UNREACHABLE(); + } + + if constexpr (is_float) { + if (operand.input_modifier.abs) { + value = ir.FPAbs(value); + } + if (operand.input_modifier.neg) { + value = ir.FPNeg(value); + } + } + return value; +} + +template IR::U64 Translator::GetSrc64(const InstOperand&); +template IR::F64 Translator::GetSrc64(const InstOperand&); + void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) { IR::U32F32 result = value; if (operand.output_modifier.multiplier != 0.f) { @@ -190,6 +287,44 @@ void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) { return ir.SetVccLo(result); case OperandField::VccHi: return ir.SetVccHi(result); + case OperandField::M0: + m0_value = result; + break; + default: + UNREACHABLE(); + } +} + +void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_raw) { + IR::U64F64 value_untyped = value_raw; + + const bool is_float = value_raw.Type() == IR::Type::F64 || value_raw.Type() == IR::Type::F32; + if (is_float) { + if (operand.output_modifier.multiplier != 0.f) { + value_untyped = + ir.FPMul(value_untyped, ir.Imm64(f64(operand.output_modifier.multiplier))); + } + if (operand.output_modifier.clamp) { + value_untyped = ir.FPSaturate(value_raw); + } + } + const IR::U64 value = + is_float ? ir.BitCast(IR::F64{value_untyped}) : IR::U64{value_untyped}; + + const IR::Value unpacked{ir.UnpackUint2x32(value)}; + const IR::U32 lo{ir.CompositeExtract(unpacked, 0U)}; + const IR::U32 hi{ir.CompositeExtract(unpacked, 1U)}; + switch (operand.field) { + case OperandField::ScalarGPR: + ir.SetScalarReg(IR::ScalarReg(operand.code + 1), hi); + return ir.SetScalarReg(IR::ScalarReg(operand.code), lo); + case OperandField::VectorGPR: + ir.SetVectorReg(IR::VectorReg(operand.code + 1), hi); + return ir.SetVectorReg(IR::VectorReg(operand.code), lo); + case OperandField::VccLo: + UNREACHABLE(); + case OperandField::VccHi: + UNREACHABLE(); case OperandField::M0: break; default: @@ -261,6 +396,7 @@ void Translator::EmitFetch(const GcnInst& inst) { .is_instance_data = true, }); instance_buf_handle = s32(info.buffers.size() - 1); + info.uses_step_rates = true; } const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt()); @@ -276,703 +412,84 @@ void Translator::EmitFetch(const GcnInst& inst) { } } -void Translate(IR::Block* block, u32 block_base, std::span inst_list, Info& info) { +void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) { + switch (inst.opcode) { + case Opcode::S_BARRIER: + return S_BARRIER(); + case Opcode::S_TTRACEDATA: + LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!"); + return; + case Opcode::S_GETPC_B64: + return S_GETPC_B64(pc, inst); + case Opcode::S_WAITCNT: + case Opcode::S_NOP: + case Opcode::S_ENDPGM: + case Opcode::S_CBRANCH_EXECZ: + case Opcode::S_CBRANCH_SCC0: + case Opcode::S_CBRANCH_SCC1: + case Opcode::S_CBRANCH_VCCNZ: + case Opcode::S_CBRANCH_VCCZ: + case Opcode::S_BRANCH: + return; + default: + UNREACHABLE(); + } +} + +void Translator::LogMissingOpcode(const GcnInst& inst) { + const u32 opcode = u32(inst.opcode); + LOG_ERROR(Render_Recompiler, "Unknown opcode {} ({}, category = {})", + magic_enum::enum_name(inst.opcode), u32(inst.opcode), + magic_enum::enum_name(inst.category)); + info.translation_failed = true; +} + +void Translate(IR::Block* block, u32 pc, std::span inst_list, Info& info, + const Profile& profile) { if (inst_list.empty()) { return; } - Translator translator{block, info}; + Translator translator{block, info, profile}; for (const auto& inst : inst_list) { - block_base += inst.length; - switch (inst.opcode) { - case Opcode::S_MOVK_I32: - translator.S_MOVK(inst); - break; - case Opcode::S_MOV_B32: - translator.S_MOV(inst); - break; - case Opcode::S_MUL_I32: - translator.S_MUL_I32(inst); - break; - case Opcode::V_MAD_F32: - translator.V_MAD_F32(inst); - break; - case Opcode::V_MOV_B32: - translator.V_MOV(inst); - break; - case Opcode::V_MAC_F32: - translator.V_MAC_F32(inst); - break; - case Opcode::V_MUL_F32: - translator.V_MUL_F32(inst); - break; - case Opcode::V_AND_B32: - translator.V_AND_B32(inst); - break; - case Opcode::V_OR_B32: - translator.V_OR_B32(false, inst); - break; - case Opcode::V_XOR_B32: - translator.V_OR_B32(true, inst); - break; - case Opcode::V_LSHLREV_B32: - translator.V_LSHLREV_B32(inst); - break; - case Opcode::V_ADD_I32: - translator.V_ADD_I32(inst); - break; - case Opcode::V_CVT_F32_I32: - translator.V_CVT_F32_I32(inst); - break; - case Opcode::V_CVT_F32_U32: - translator.V_CVT_F32_U32(inst); - break; - case Opcode::V_RCP_F32: - translator.V_RCP_F32(inst); - break; - case Opcode::S_SWAPPC_B64: + pc += inst.length; + + // Special case for emitting fetch shader. + if (inst.opcode == Opcode::S_SWAPPC_B64) { ASSERT(info.stage == Stage::Vertex); translator.EmitFetch(inst); - break; - case Opcode::S_WAITCNT: - break; - case Opcode::S_LOAD_DWORDX4: - translator.S_LOAD_DWORD(4, inst); - break; - case Opcode::S_LOAD_DWORDX8: - translator.S_LOAD_DWORD(8, inst); - break; - case Opcode::S_LOAD_DWORDX16: - translator.S_LOAD_DWORD(16, inst); - break; - case Opcode::S_BUFFER_LOAD_DWORD: - translator.S_BUFFER_LOAD_DWORD(1, inst); - break; - case Opcode::S_BUFFER_LOAD_DWORDX2: - translator.S_BUFFER_LOAD_DWORD(2, inst); - break; - case Opcode::S_BUFFER_LOAD_DWORDX4: - translator.S_BUFFER_LOAD_DWORD(4, inst); - break; - case Opcode::S_BUFFER_LOAD_DWORDX8: - translator.S_BUFFER_LOAD_DWORD(8, inst); - break; - case Opcode::S_BUFFER_LOAD_DWORDX16: - translator.S_BUFFER_LOAD_DWORD(16, inst); - break; - case Opcode::EXP: - translator.EXP(inst); - break; - case Opcode::V_INTERP_P2_F32: - translator.V_INTERP_P2_F32(inst); - break; - case Opcode::V_CVT_PKRTZ_F16_F32: - translator.V_CVT_PKRTZ_F16_F32(inst); - break; - case Opcode::V_CVT_F32_F16: - translator.V_CVT_F32_F16(inst); - break; - case Opcode::V_CVT_F32_UBYTE0: - translator.V_CVT_F32_UBYTE(0, inst); - break; - case Opcode::V_CVT_F32_UBYTE1: - translator.V_CVT_F32_UBYTE(1, inst); - break; - case Opcode::V_CVT_F32_UBYTE2: - translator.V_CVT_F32_UBYTE(2, inst); - break; - case Opcode::V_CVT_F32_UBYTE3: - translator.V_CVT_F32_UBYTE(3, inst); - break; - case Opcode::V_BFREV_B32: - translator.V_BFREV_B32(inst); - break; - case Opcode::V_LDEXP_F32: - translator.V_LDEXP_F32(inst); - break; - case Opcode::V_FRACT_F32: - translator.V_FRACT_F32(inst); - break; - case Opcode::V_ADD_F32: - translator.V_ADD_F32(inst); - break; - case Opcode::V_CVT_OFF_F32_I4: - translator.V_CVT_OFF_F32_I4(inst); - break; - case Opcode::V_MED3_F32: - translator.V_MED3_F32(inst); - break; - case Opcode::V_FLOOR_F32: - translator.V_FLOOR_F32(inst); - break; - case Opcode::V_SUB_F32: - translator.V_SUB_F32(inst); - break; - case Opcode::V_FMA_F32: - case Opcode::V_MADAK_F32: // Yes these can share the opcode - translator.V_FMA_F32(inst); - break; - case Opcode::IMAGE_SAMPLE_LZ_O: - case Opcode::IMAGE_SAMPLE_O: - case Opcode::IMAGE_SAMPLE_C: - case Opcode::IMAGE_SAMPLE_C_LZ: - case Opcode::IMAGE_SAMPLE_LZ: - case Opcode::IMAGE_SAMPLE: - case Opcode::IMAGE_SAMPLE_L: - case Opcode::IMAGE_SAMPLE_C_O: - case Opcode::IMAGE_SAMPLE_B: - case Opcode::IMAGE_SAMPLE_C_LZ_O: - translator.IMAGE_SAMPLE(inst); - break; - case Opcode::IMAGE_ATOMIC_ADD: - translator.IMAGE_ATOMIC(AtomicOp::Add, inst); - break; - case Opcode::IMAGE_ATOMIC_AND: - translator.IMAGE_ATOMIC(AtomicOp::And, inst); - break; - case Opcode::IMAGE_ATOMIC_OR: - translator.IMAGE_ATOMIC(AtomicOp::Or, inst); - break; - case Opcode::IMAGE_ATOMIC_XOR: - translator.IMAGE_ATOMIC(AtomicOp::Xor, inst); - break; - case Opcode::IMAGE_ATOMIC_UMAX: - translator.IMAGE_ATOMIC(AtomicOp::Umax, inst); - break; - case Opcode::IMAGE_ATOMIC_SMAX: - translator.IMAGE_ATOMIC(AtomicOp::Smax, inst); - break; - case Opcode::IMAGE_ATOMIC_UMIN: - translator.IMAGE_ATOMIC(AtomicOp::Umin, inst); - break; - case Opcode::IMAGE_ATOMIC_SMIN: - translator.IMAGE_ATOMIC(AtomicOp::Smin, inst); - break; - case Opcode::IMAGE_ATOMIC_INC: - translator.IMAGE_ATOMIC(AtomicOp::Inc, inst); - break; - case Opcode::IMAGE_ATOMIC_DEC: - translator.IMAGE_ATOMIC(AtomicOp::Dec, inst); - break; - case Opcode::IMAGE_GET_LOD: - translator.IMAGE_GET_LOD(inst); - break; - case Opcode::IMAGE_GATHER4_C: - case Opcode::IMAGE_GATHER4_LZ: - case Opcode::IMAGE_GATHER4_LZ_O: - translator.IMAGE_GATHER(inst); - break; - case Opcode::IMAGE_STORE: - translator.IMAGE_STORE(inst); - break; - case Opcode::IMAGE_LOAD_MIP: - translator.IMAGE_LOAD(true, inst); - break; - case Opcode::IMAGE_LOAD: - translator.IMAGE_LOAD(false, inst); - break; - case Opcode::V_CMP_GE_I32: - translator.V_CMP_U32(ConditionOp::GE, true, false, inst); - break; - case Opcode::V_CMP_EQ_I32: - translator.V_CMP_U32(ConditionOp::EQ, true, false, inst); - break; - case Opcode::V_CMP_LE_I32: - translator.V_CMP_U32(ConditionOp::LE, true, false, inst); - break; - case Opcode::V_CMP_NE_I32: - translator.V_CMP_U32(ConditionOp::LG, true, false, inst); - break; - case Opcode::V_CMP_NE_U32: - translator.V_CMP_U32(ConditionOp::LG, false, false, inst); - break; - case Opcode::V_CMP_EQ_U32: - translator.V_CMP_U32(ConditionOp::EQ, false, false, inst); - break; - case Opcode::V_CMP_F_U32: - translator.V_CMP_U32(ConditionOp::F, false, false, inst); - break; - case Opcode::V_CMP_LT_U32: - translator.V_CMP_U32(ConditionOp::LT, false, false, inst); - break; - case Opcode::V_CMP_GT_U32: - translator.V_CMP_U32(ConditionOp::GT, false, false, inst); - break; - case Opcode::V_CMP_GE_U32: - translator.V_CMP_U32(ConditionOp::GE, false, false, inst); - break; - case Opcode::V_CMP_TRU_U32: - translator.V_CMP_U32(ConditionOp::TRU, false, false, inst); - break; - case Opcode::V_CMP_NEQ_F32: - translator.V_CMP_F32(ConditionOp::LG, false, inst); - break; - case Opcode::V_CMP_F_F32: - translator.V_CMP_F32(ConditionOp::F, false, inst); - break; - case Opcode::V_CMP_LT_F32: - translator.V_CMP_F32(ConditionOp::LT, false, inst); - break; - case Opcode::V_CMP_EQ_F32: - translator.V_CMP_F32(ConditionOp::EQ, false, inst); - break; - case Opcode::V_CMP_LE_F32: - translator.V_CMP_F32(ConditionOp::LE, false, inst); - break; - case Opcode::V_CMP_GT_F32: - translator.V_CMP_F32(ConditionOp::GT, false, inst); - break; - case Opcode::V_CMP_LG_F32: - translator.V_CMP_F32(ConditionOp::LG, false, inst); - break; - case Opcode::V_CMP_GE_F32: - translator.V_CMP_F32(ConditionOp::GE, false, inst); - break; - case Opcode::V_CMP_NLE_F32: - translator.V_CMP_F32(ConditionOp::GT, false, inst); - break; - case Opcode::V_CMP_NLT_F32: - translator.V_CMP_F32(ConditionOp::GE, false, inst); - break; - case Opcode::V_CMP_NGT_F32: - translator.V_CMP_F32(ConditionOp::LE, false, inst); - break; - case Opcode::V_CMP_NGE_F32: - translator.V_CMP_F32(ConditionOp::LT, false, inst); - break; - case Opcode::S_CMP_LT_U32: - translator.S_CMP(ConditionOp::LT, false, inst); - break; - case Opcode::S_CMP_LE_U32: - translator.S_CMP(ConditionOp::LE, false, inst); - break; - case Opcode::S_CMP_LG_U32: - translator.S_CMP(ConditionOp::LG, false, inst); - break; - case Opcode::S_CMP_LT_I32: - translator.S_CMP(ConditionOp::LT, true, inst); - break; - case Opcode::S_CMP_LG_I32: - translator.S_CMP(ConditionOp::LG, true, inst); - break; - case Opcode::S_CMP_GT_I32: - translator.S_CMP(ConditionOp::GT, true, inst); - break; - case Opcode::S_CMP_GE_I32: - translator.S_CMP(ConditionOp::GE, true, inst); - break; - case Opcode::S_CMP_EQ_I32: - translator.S_CMP(ConditionOp::EQ, true, inst); - break; - case Opcode::S_CMP_EQ_U32: - translator.S_CMP(ConditionOp::EQ, false, inst); - break; - case Opcode::S_LSHL_B32: - translator.S_LSHL_B32(inst); - break; - case Opcode::V_CNDMASK_B32: - translator.V_CNDMASK_B32(inst); - break; - case Opcode::TBUFFER_LOAD_FORMAT_X: - translator.BUFFER_LOAD_FORMAT(1, true, true, inst); - break; - case Opcode::TBUFFER_LOAD_FORMAT_XY: - translator.BUFFER_LOAD_FORMAT(2, true, true, inst); - break; - case Opcode::TBUFFER_LOAD_FORMAT_XYZ: - translator.BUFFER_LOAD_FORMAT(3, true, true, inst); - break; - case Opcode::TBUFFER_LOAD_FORMAT_XYZW: - translator.BUFFER_LOAD_FORMAT(4, true, true, inst); - break; - case Opcode::BUFFER_LOAD_FORMAT_X: - translator.BUFFER_LOAD_FORMAT(1, false, true, inst); - break; - case Opcode::BUFFER_LOAD_FORMAT_XY: - translator.BUFFER_LOAD_FORMAT(2, false, true, inst); - break; - case Opcode::BUFFER_LOAD_FORMAT_XYZ: - translator.BUFFER_LOAD_FORMAT(3, false, true, inst); - break; - case Opcode::BUFFER_LOAD_FORMAT_XYZW: - translator.BUFFER_LOAD_FORMAT(4, false, true, inst); - break; - case Opcode::BUFFER_LOAD_DWORD: - translator.BUFFER_LOAD_FORMAT(1, false, false, inst); - break; - case Opcode::BUFFER_LOAD_DWORDX2: - translator.BUFFER_LOAD_FORMAT(2, false, false, inst); - break; - case Opcode::BUFFER_LOAD_DWORDX3: - translator.BUFFER_LOAD_FORMAT(3, false, false, inst); - break; - case Opcode::BUFFER_LOAD_DWORDX4: - translator.BUFFER_LOAD_FORMAT(4, false, false, inst); - break; - case Opcode::BUFFER_STORE_FORMAT_X: - case Opcode::BUFFER_STORE_DWORD: - translator.BUFFER_STORE_FORMAT(1, false, inst); - break; - case Opcode::BUFFER_STORE_DWORDX3: - translator.BUFFER_STORE_FORMAT(3, false, inst); - break; - case Opcode::BUFFER_STORE_FORMAT_XYZW: - case Opcode::BUFFER_STORE_DWORDX4: - translator.BUFFER_STORE_FORMAT(4, false, inst); - break; - case Opcode::V_MAX_F32: - translator.V_MAX_F32(inst); - break; - case Opcode::V_MAX_I32: - translator.V_MAX_U32(true, inst); - break; - case Opcode::V_MAX_U32: - translator.V_MAX_U32(false, inst); - break; - case Opcode::V_NOT_B32: - translator.V_NOT_B32(inst); - break; - case Opcode::V_RSQ_F32: - translator.V_RSQ_F32(inst); - break; - case Opcode::S_ANDN2_B64: - translator.S_AND_B64(NegateMode::Src1, inst); - break; - case Opcode::S_ORN2_B64: - translator.S_OR_B64(NegateMode::Src1, false, inst); - break; - case Opcode::V_SIN_F32: - translator.V_SIN_F32(inst); - break; - case Opcode::V_COS_F32: - translator.V_COS_F32(inst); - break; - case Opcode::V_LOG_F32: - translator.V_LOG_F32(inst); - break; - case Opcode::V_EXP_F32: - translator.V_EXP_F32(inst); - break; - case Opcode::V_SQRT_F32: - translator.V_SQRT_F32(inst); - break; - case Opcode::V_MIN_F32: - translator.V_MIN_F32(inst); - break; - case Opcode::V_MIN_I32: - translator.V_MIN_I32(inst); - break; - case Opcode::V_MIN3_F32: - translator.V_MIN3_F32(inst); - break; - case Opcode::V_MIN_LEGACY_F32: - translator.V_MIN_F32(inst, true); - break; - case Opcode::V_MADMK_F32: - translator.V_MADMK_F32(inst); - break; - case Opcode::V_CUBEMA_F32: - translator.V_CUBEMA_F32(inst); - break; - case Opcode::V_CUBESC_F32: - translator.V_CUBESC_F32(inst); - break; - case Opcode::V_CUBETC_F32: - translator.V_CUBETC_F32(inst); - break; - case Opcode::V_CUBEID_F32: - translator.V_CUBEID_F32(inst); - break; - case Opcode::V_CVT_U32_F32: - translator.V_CVT_U32_F32(inst); - break; - case Opcode::V_CVT_I32_F32: - translator.V_CVT_I32_F32(inst); - break; - case Opcode::V_CVT_FLR_I32_F32: - translator.V_CVT_FLR_I32_F32(inst); - break; - case Opcode::V_SUBREV_F32: - translator.V_SUBREV_F32(inst); - break; - case Opcode::S_AND_SAVEEXEC_B64: - translator.S_AND_SAVEEXEC_B64(inst); - break; - case Opcode::S_MOV_B64: - translator.S_MOV_B64(inst); - break; - case Opcode::V_SUBREV_I32: - translator.V_SUBREV_I32(inst); - break; + continue; + } - case Opcode::V_CMPX_F_F32: - translator.V_CMP_F32(ConditionOp::F, true, inst); + // Emit instructions for each category. + switch (inst.category) { + case InstCategory::DataShare: + translator.EmitDataShare(inst); break; - case Opcode::V_CMPX_LT_F32: - translator.V_CMP_F32(ConditionOp::LT, true, inst); + case InstCategory::VectorInterpolation: + translator.EmitVectorInterpolation(inst); break; - case Opcode::V_CMPX_EQ_F32: - translator.V_CMP_F32(ConditionOp::EQ, true, inst); + case InstCategory::ScalarMemory: + translator.EmitScalarMemory(inst); break; - case Opcode::V_CMPX_LE_F32: - translator.V_CMP_F32(ConditionOp::LE, true, inst); + case InstCategory::VectorMemory: + translator.EmitVectorMemory(inst); break; - case Opcode::V_CMPX_GT_F32: - translator.V_CMP_F32(ConditionOp::GT, true, inst); + case InstCategory::Export: + translator.EmitExport(inst); break; - case Opcode::V_CMPX_LG_F32: - translator.V_CMP_F32(ConditionOp::LG, true, inst); + case InstCategory::FlowControl: + translator.EmitFlowControl(pc, inst); break; - case Opcode::V_CMPX_GE_F32: - translator.V_CMP_F32(ConditionOp::GE, true, inst); + case InstCategory::ScalarALU: + translator.EmitScalarAlu(inst); break; - case Opcode::V_CMPX_NGE_F32: - translator.V_CMP_F32(ConditionOp::LT, true, inst); + case InstCategory::VectorALU: + translator.EmitVectorAlu(inst); break; - case Opcode::V_CMPX_NLG_F32: - translator.V_CMP_F32(ConditionOp::EQ, true, inst); - break; - case Opcode::V_CMPX_NGT_F32: - translator.V_CMP_F32(ConditionOp::LE, true, inst); - break; - case Opcode::V_CMPX_NLE_F32: - translator.V_CMP_F32(ConditionOp::GT, true, inst); - break; - case Opcode::V_CMPX_NEQ_F32: - translator.V_CMP_F32(ConditionOp::LG, true, inst); - break; - case Opcode::V_CMPX_NLT_F32: - translator.V_CMP_F32(ConditionOp::GE, true, inst); - break; - case Opcode::V_CMPX_TRU_F32: - translator.V_CMP_F32(ConditionOp::TRU, true, inst); - break; - case Opcode::V_CMP_LE_U32: - translator.V_CMP_U32(ConditionOp::LE, false, false, inst); - break; - case Opcode::V_CMP_GT_I32: - translator.V_CMP_U32(ConditionOp::GT, true, false, inst); - break; - case Opcode::V_CMP_LT_I32: - translator.V_CMP_U32(ConditionOp::LT, true, false, inst); - break; - case Opcode::V_CMPX_LT_I32: - translator.V_CMP_U32(ConditionOp::LT, true, true, inst); - break; - case Opcode::V_CMPX_F_U32: - translator.V_CMP_U32(ConditionOp::F, false, true, inst); - break; - case Opcode::V_CMPX_LT_U32: - translator.V_CMP_U32(ConditionOp::LT, false, true, inst); - break; - case Opcode::V_CMPX_EQ_U32: - translator.V_CMP_U32(ConditionOp::EQ, false, true, inst); - break; - case Opcode::V_CMPX_LE_U32: - translator.V_CMP_U32(ConditionOp::LE, false, true, inst); - break; - case Opcode::V_CMPX_GT_U32: - translator.V_CMP_U32(ConditionOp::GT, false, true, inst); - break; - case Opcode::V_CMPX_NE_U32: - translator.V_CMP_U32(ConditionOp::LG, false, true, inst); - break; - case Opcode::V_CMPX_GE_U32: - translator.V_CMP_U32(ConditionOp::GE, false, true, inst); - break; - case Opcode::V_CMPX_TRU_U32: - translator.V_CMP_U32(ConditionOp::TRU, false, true, inst); - break; - case Opcode::S_OR_B64: - translator.S_OR_B64(NegateMode::None, false, inst); - break; - case Opcode::S_NOR_B64: - translator.S_OR_B64(NegateMode::Result, false, inst); - break; - case Opcode::S_XOR_B64: - translator.S_OR_B64(NegateMode::None, true, inst); - break; - case Opcode::S_AND_B64: - translator.S_AND_B64(NegateMode::None, inst); - break; - case Opcode::S_NOT_B64: - translator.S_NOT_B64(inst); - break; - case Opcode::S_NAND_B64: - translator.S_AND_B64(NegateMode::Result, inst); - break; - case Opcode::V_LSHRREV_B32: - translator.V_LSHRREV_B32(inst); - break; - case Opcode::S_ADD_I32: - translator.S_ADD_I32(inst); - break; - case Opcode::V_MUL_HI_U32: - translator.V_MUL_HI_U32(false, inst); - break; - case Opcode::V_MUL_LO_I32: - translator.V_MUL_LO_U32(inst); - break; - case Opcode::V_SAD_U32: - translator.V_SAD_U32(inst); - break; - case Opcode::V_BFE_U32: - translator.V_BFE_U32(false, inst); - break; - case Opcode::V_BFE_I32: - translator.V_BFE_U32(true, inst); - break; - case Opcode::V_MAD_I32_I24: - translator.V_MAD_I32_I24(inst); - break; - case Opcode::V_MUL_I32_I24: - case Opcode::V_MUL_U32_U24: - translator.V_MUL_I32_I24(inst); - break; - case Opcode::V_SUB_I32: - translator.V_SUB_I32(inst); - break; - case Opcode::V_LSHR_B32: - translator.V_LSHR_B32(inst); - break; - case Opcode::V_ASHRREV_I32: - translator.V_ASHRREV_I32(inst); - break; - case Opcode::V_MAD_U32_U24: - translator.V_MAD_U32_U24(inst); - break; - case Opcode::S_AND_B32: - translator.S_AND_B32(inst); - break; - case Opcode::S_ASHR_I32: - translator.S_ASHR_I32(inst); - break; - case Opcode::S_OR_B32: - translator.S_OR_B32(inst); - break; - case Opcode::S_LSHR_B32: - translator.S_LSHR_B32(inst); - break; - case Opcode::S_CSELECT_B32: - translator.S_CSELECT_B32(inst); - break; - case Opcode::S_CSELECT_B64: - translator.S_CSELECT_B64(inst); - break; - case Opcode::S_BFE_U32: - translator.S_BFE_U32(inst); - break; - case Opcode::V_RNDNE_F32: - translator.V_RNDNE_F32(inst); - break; - case Opcode::V_BCNT_U32_B32: - translator.V_BCNT_U32_B32(inst); - break; - case Opcode::V_MAX3_F32: - translator.V_MAX3_F32(inst); - break; - case Opcode::DS_SWIZZLE_B32: - translator.DS_SWIZZLE_B32(inst); - break; - case Opcode::V_MUL_LO_U32: - translator.V_MUL_LO_U32(inst); - break; - case Opcode::S_BFM_B32: - translator.S_BFM_B32(inst); - break; - case Opcode::V_MIN_U32: - translator.V_MIN_U32(inst); - break; - case Opcode::V_CMP_NE_U64: - translator.V_CMP_NE_U64(inst); - break; - case Opcode::V_CMP_CLASS_F32: - translator.V_CMP_CLASS_F32(inst); - break; - case Opcode::V_TRUNC_F32: - translator.V_TRUNC_F32(inst); - break; - case Opcode::V_CEIL_F32: - translator.V_CEIL_F32(inst); - break; - case Opcode::V_BFI_B32: - translator.V_BFI_B32(inst); - break; - case Opcode::S_BREV_B32: - translator.S_BREV_B32(inst); - break; - case Opcode::S_ADD_U32: - translator.S_ADD_U32(inst); - break; - case Opcode::S_ADDC_U32: - translator.S_ADDC_U32(inst); - break; - case Opcode::S_SUB_U32: - case Opcode::S_SUB_I32: - translator.S_SUB_U32(inst); - break; - // TODO: Separate implementation for legacy variants. - case Opcode::V_MUL_LEGACY_F32: - translator.V_MUL_F32(inst); - break; - case Opcode::V_MAC_LEGACY_F32: - translator.V_MAC_F32(inst); - break; - case Opcode::V_MAD_LEGACY_F32: - translator.V_MAD_F32(inst); - break; - case Opcode::V_MAX_LEGACY_F32: - translator.V_MAX_F32(inst, true); - break; - case Opcode::V_RSQ_LEGACY_F32: - case Opcode::V_RSQ_CLAMP_F32: - translator.V_RSQ_F32(inst); - break; - case Opcode::V_RCP_IFLAG_F32: - translator.V_RCP_F32(inst); - break; - case Opcode::IMAGE_GET_RESINFO: - translator.IMAGE_GET_RESINFO(inst); - break; - case Opcode::S_BARRIER: - translator.S_BARRIER(); - break; - case Opcode::S_TTRACEDATA: - LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!"); - break; - case Opcode::DS_READ_B32: - translator.DS_READ(32, false, false, inst); - break; - case Opcode::DS_READ2_B32: - translator.DS_READ(32, false, true, inst); - break; - case Opcode::DS_WRITE_B32: - translator.DS_WRITE(32, false, false, inst); - break; - case Opcode::DS_WRITE2_B32: - translator.DS_WRITE(32, false, true, inst); - break; - case Opcode::V_READFIRSTLANE_B32: - translator.V_READFIRSTLANE_B32(inst); - break; - case Opcode::S_GETPC_B64: - translator.S_GETPC_B64(block_base, inst); - break; - case Opcode::S_NOP: - case Opcode::S_CBRANCH_EXECZ: - case Opcode::S_CBRANCH_SCC0: - case Opcode::S_CBRANCH_SCC1: - case Opcode::S_CBRANCH_VCCNZ: - case Opcode::S_CBRANCH_VCCZ: - case Opcode::S_BRANCH: - case Opcode::S_WQM_B64: - case Opcode::V_INTERP_P1_F32: - case Opcode::S_ENDPGM: + case InstCategory::DebugProfile: break; default: - const u32 opcode = u32(inst.opcode); - LOG_ERROR(Render_Recompiler, "Unknown opcode {} ({})", - magic_enum::enum_name(inst.opcode), opcode); - info.translation_failed = true; + UNREACHABLE(); } } } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 2aa6f7124..9ebcb1163 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -11,7 +11,8 @@ namespace Shader { struct Info; -} +struct Profile; +} // namespace Shader namespace Shader::Gcn { @@ -24,6 +25,7 @@ enum class ConditionOp : u32 { LT, LE, TRU, + U, }; enum class AtomicOp : u32 { @@ -53,10 +55,19 @@ enum class NegateMode : u32 { class Translator { public: - explicit Translator(IR::Block* block_, Info& info); + explicit Translator(IR::Block* block_, Info& info, const Profile& profile); + // Instruction categories void EmitPrologue(); void EmitFetch(const GcnInst& inst); + void EmitDataShare(const GcnInst& inst); + void EmitVectorInterpolation(const GcnInst& inst); + void EmitScalarMemory(const GcnInst& inst); + void EmitVectorMemory(const GcnInst& inst); + void EmitExport(const GcnInst& inst); + void EmitFlowControl(u32 pc, const GcnInst& inst); + void EmitScalarAlu(const GcnInst& inst); + void EmitVectorAlu(const GcnInst& inst); // Scalar ALU void S_MOVK(const GcnInst& inst); @@ -83,6 +94,10 @@ public: void S_SUB_U32(const GcnInst& inst); void S_GETPC_B64(u32 pc, const GcnInst& inst); void S_ADDC_U32(const GcnInst& inst); + void S_MULK_I32(const GcnInst& inst); + void S_ADDK_I32(const GcnInst& inst); + void S_MAX_U32(const GcnInst& inst); + void S_MIN_U32(const GcnInst& inst); // Scalar Memory void S_LOAD_DWORD(int num_dwords, const GcnInst& inst); @@ -94,12 +109,15 @@ public: void V_MAC_F32(const GcnInst& inst); void V_CVT_PKRTZ_F16_F32(const GcnInst& inst); void V_CVT_F32_F16(const GcnInst& inst); + void V_CVT_F16_F32(const GcnInst& inst); void V_MUL_F32(const GcnInst& inst); void V_CNDMASK_B32(const GcnInst& inst); void V_OR_B32(bool is_xor, const GcnInst& inst); void V_AND_B32(const GcnInst& inst); void V_LSHLREV_B32(const GcnInst& inst); + void V_LSHL_B32(const GcnInst& inst); void V_ADD_I32(const GcnInst& inst); + void V_ADDC_U32(const GcnInst& inst); void V_CVT_F32_I32(const GcnInst& inst); void V_CVT_F32_U32(const GcnInst& inst); void V_MAD_F32(const GcnInst& inst); @@ -107,6 +125,7 @@ public: void V_ADD_F32(const GcnInst& inst); void V_CVT_OFF_F32_I4(const GcnInst& inst); void V_MED3_F32(const GcnInst& inst); + void V_MED3_I32(const GcnInst& inst); void V_FLOOR_F32(const GcnInst& inst); void V_SUB_F32(const GcnInst& inst); void V_RCP_F32(const GcnInst& inst); @@ -121,6 +140,7 @@ public: void V_SQRT_F32(const GcnInst& inst); void V_MIN_F32(const GcnInst& inst, bool is_legacy = false); void V_MIN3_F32(const GcnInst& inst); + void V_MIN3_I32(const GcnInst& inst); void V_MADMK_F32(const GcnInst& inst); void V_CUBEMA_F32(const GcnInst& inst); void V_CUBESC_F32(const GcnInst& inst); @@ -129,6 +149,7 @@ public: void V_CVT_U32_F32(const GcnInst& inst); void V_SUBREV_F32(const GcnInst& inst); void V_SUBREV_I32(const GcnInst& inst); + void V_MAD_U64_U32(const GcnInst& inst); void V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst); void V_LSHRREV_B32(const GcnInst& inst); void V_MUL_HI_U32(bool is_signed, const GcnInst& inst); @@ -139,11 +160,13 @@ public: void V_SUB_I32(const GcnInst& inst); void V_LSHR_B32(const GcnInst& inst); void V_ASHRREV_I32(const GcnInst& inst); + void V_ASHR_I32(const GcnInst& inst); void V_MAD_U32_U24(const GcnInst& inst); void V_RNDNE_F32(const GcnInst& inst); void V_BCNT_U32_B32(const GcnInst& inst); void V_COS_F32(const GcnInst& inst); void V_MAX3_F32(const GcnInst& inst); + void V_MAX3_U32(const GcnInst& inst); void V_CVT_I32_F32(const GcnInst& inst); void V_MIN_I32(const GcnInst& inst); void V_MUL_LO_U32(const GcnInst& inst); @@ -158,19 +181,24 @@ public: void V_LDEXP_F32(const GcnInst& inst); void V_CVT_FLR_I32_F32(const GcnInst& inst); void V_CMP_CLASS_F32(const GcnInst& inst); + void V_FFBL_B32(const GcnInst& inst); + void V_MBCNT_U32_B32(bool is_low, const GcnInst& inst); // Vector Memory void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst); - void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst); + void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst); // Vector interpolation void V_INTERP_P2_F32(const GcnInst& inst); + void V_INTERP_MOV_F32(const GcnInst& inst); // Data share void DS_SWIZZLE_B32(const GcnInst& inst); void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst); void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst); void V_READFIRSTLANE_B32(const GcnInst& inst); + void V_READLANE_B32(const GcnInst& inst); + void V_WRITELANE_B32(const GcnInst& inst); void S_BARRIER(); // MIMG @@ -182,19 +210,25 @@ public: void IMAGE_GET_LOD(const GcnInst& inst); void IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst); - // Export - void EXP(const GcnInst& inst); - private: - IR::U32F32 GetSrc(const InstOperand& operand, bool flt_zero = false); + template + [[nodiscard]] T GetSrc(const InstOperand& operand); + template + [[nodiscard]] T GetSrc64(const InstOperand& operand); void SetDst(const InstOperand& operand, const IR::U32F32& value); + void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw); + + void LogMissingOpcode(const GcnInst& inst); private: IR::IREmitter ir; Info& info; - static std::array exec_contexts; + const Profile& profile; + IR::U32 m0_value; + bool opcode_missing = false; }; -void Translate(IR::Block* block, u32 block_base, std::span inst_list, Info& info); +void Translate(IR::Block* block, u32 block_base, std::span inst_list, Info& info, + const Profile& profile); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index ca648f882..1bbc3c162 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -5,8 +5,313 @@ namespace Shader::Gcn { +void Translator::EmitVectorAlu(const GcnInst& inst) { + switch (inst.opcode) { + case Opcode::V_LSHLREV_B32: + return V_LSHLREV_B32(inst); + case Opcode::V_LSHL_B32: + return V_LSHL_B32(inst); + case Opcode::V_BFREV_B32: + return V_BFREV_B32(inst); + case Opcode::V_BFE_U32: + return V_BFE_U32(false, inst); + case Opcode::V_BFE_I32: + return V_BFE_U32(true, inst); + case Opcode::V_BFI_B32: + return V_BFI_B32(inst); + case Opcode::V_LSHR_B32: + return V_LSHR_B32(inst); + case Opcode::V_ASHRREV_I32: + return V_ASHRREV_I32(inst); + case Opcode::V_ASHR_I32: + return V_ASHR_I32(inst); + case Opcode::V_LSHRREV_B32: + return V_LSHRREV_B32(inst); + case Opcode::V_NOT_B32: + return V_NOT_B32(inst); + case Opcode::V_AND_B32: + return V_AND_B32(inst); + case Opcode::V_OR_B32: + return V_OR_B32(false, inst); + case Opcode::V_XOR_B32: + return V_OR_B32(true, inst); + case Opcode::V_FFBL_B32: + return V_FFBL_B32(inst); + + case Opcode::V_MOV_B32: + return V_MOV(inst); + case Opcode::V_ADD_I32: + return V_ADD_I32(inst); + case Opcode::V_ADDC_U32: + return V_ADDC_U32(inst); + case Opcode::V_CVT_F32_I32: + return V_CVT_F32_I32(inst); + case Opcode::V_CVT_F32_U32: + return V_CVT_F32_U32(inst); + case Opcode::V_CVT_PKRTZ_F16_F32: + return V_CVT_PKRTZ_F16_F32(inst); + case Opcode::V_CVT_F32_F16: + return V_CVT_F32_F16(inst); + case Opcode::V_CVT_F16_F32: + return V_CVT_F16_F32(inst); + case Opcode::V_CVT_F32_UBYTE0: + return V_CVT_F32_UBYTE(0, inst); + case Opcode::V_CVT_F32_UBYTE1: + return V_CVT_F32_UBYTE(1, inst); + case Opcode::V_CVT_F32_UBYTE2: + return V_CVT_F32_UBYTE(2, inst); + case Opcode::V_CVT_F32_UBYTE3: + return V_CVT_F32_UBYTE(3, inst); + case Opcode::V_CVT_OFF_F32_I4: + return V_CVT_OFF_F32_I4(inst); + case Opcode::V_MAD_U64_U32: + return V_MAD_U64_U32(inst); + case Opcode::V_CMP_GE_I32: + return V_CMP_U32(ConditionOp::GE, true, false, inst); + case Opcode::V_CMP_EQ_I32: + return V_CMP_U32(ConditionOp::EQ, true, false, inst); + case Opcode::V_CMP_LE_I32: + return V_CMP_U32(ConditionOp::LE, true, false, inst); + case Opcode::V_CMP_NE_I32: + return V_CMP_U32(ConditionOp::LG, true, false, inst); + case Opcode::V_CMP_NE_U32: + return V_CMP_U32(ConditionOp::LG, false, false, inst); + case Opcode::V_CMP_EQ_U32: + return V_CMP_U32(ConditionOp::EQ, false, false, inst); + case Opcode::V_CMP_F_U32: + return V_CMP_U32(ConditionOp::F, false, false, inst); + case Opcode::V_CMP_LT_U32: + return V_CMP_U32(ConditionOp::LT, false, false, inst); + case Opcode::V_CMP_GT_U32: + return V_CMP_U32(ConditionOp::GT, false, false, inst); + case Opcode::V_CMP_GE_U32: + return V_CMP_U32(ConditionOp::GE, false, false, inst); + case Opcode::V_CMP_TRU_U32: + return V_CMP_U32(ConditionOp::TRU, false, false, inst); + case Opcode::V_CMP_NEQ_F32: + return V_CMP_F32(ConditionOp::LG, false, inst); + case Opcode::V_CMP_F_F32: + return V_CMP_F32(ConditionOp::F, false, inst); + case Opcode::V_CMP_LT_F32: + return V_CMP_F32(ConditionOp::LT, false, inst); + case Opcode::V_CMP_EQ_F32: + return V_CMP_F32(ConditionOp::EQ, false, inst); + case Opcode::V_CMP_LE_F32: + return V_CMP_F32(ConditionOp::LE, false, inst); + case Opcode::V_CMP_GT_F32: + return V_CMP_F32(ConditionOp::GT, false, inst); + case Opcode::V_CMP_LG_F32: + return V_CMP_F32(ConditionOp::LG, false, inst); + case Opcode::V_CMP_GE_F32: + return V_CMP_F32(ConditionOp::GE, false, inst); + case Opcode::V_CMP_NLE_F32: + return V_CMP_F32(ConditionOp::GT, false, inst); + case Opcode::V_CMP_NLT_F32: + return V_CMP_F32(ConditionOp::GE, false, inst); + case Opcode::V_CMP_NGT_F32: + return V_CMP_F32(ConditionOp::LE, false, inst); + case Opcode::V_CMP_NGE_F32: + return V_CMP_F32(ConditionOp::LT, false, inst); + case Opcode::V_CMP_U_F32: + return V_CMP_F32(ConditionOp::U, false, inst); + case Opcode::V_CNDMASK_B32: + return V_CNDMASK_B32(inst); + case Opcode::V_MAX_I32: + return V_MAX_U32(true, inst); + case Opcode::V_MAX_U32: + return V_MAX_U32(false, inst); + case Opcode::V_MIN_I32: + return V_MIN_I32(inst); + case Opcode::V_CUBEMA_F32: + return V_CUBEMA_F32(inst); + case Opcode::V_CUBESC_F32: + return V_CUBESC_F32(inst); + case Opcode::V_CUBETC_F32: + return V_CUBETC_F32(inst); + case Opcode::V_CUBEID_F32: + return V_CUBEID_F32(inst); + case Opcode::V_CVT_U32_F32: + return V_CVT_U32_F32(inst); + case Opcode::V_CVT_I32_F32: + return V_CVT_I32_F32(inst); + case Opcode::V_CVT_FLR_I32_F32: + return V_CVT_FLR_I32_F32(inst); + case Opcode::V_SUBREV_I32: + return V_SUBREV_I32(inst); + case Opcode::V_MUL_HI_U32: + return V_MUL_HI_U32(false, inst); + case Opcode::V_MUL_LO_I32: + return V_MUL_LO_U32(inst); + case Opcode::V_SAD_U32: + return V_SAD_U32(inst); + case Opcode::V_SUB_I32: + return V_SUB_I32(inst); + case Opcode::V_MAD_I32_I24: + return V_MAD_I32_I24(inst); + case Opcode::V_MUL_I32_I24: + case Opcode::V_MUL_U32_U24: + return V_MUL_I32_I24(inst); + case Opcode::V_MAD_U32_U24: + return V_MAD_U32_U24(inst); + case Opcode::V_BCNT_U32_B32: + return V_BCNT_U32_B32(inst); + case Opcode::V_MUL_LO_U32: + return V_MUL_LO_U32(inst); + case Opcode::V_MIN_U32: + return V_MIN_U32(inst); + case Opcode::V_CMP_NE_U64: + return V_CMP_NE_U64(inst); + case Opcode::V_READFIRSTLANE_B32: + return V_READFIRSTLANE_B32(inst); + case Opcode::V_READLANE_B32: + return V_READLANE_B32(inst); + case Opcode::V_WRITELANE_B32: + return V_WRITELANE_B32(inst); + + case Opcode::V_MAD_F32: + return V_MAD_F32(inst); + case Opcode::V_MAC_F32: + return V_MAC_F32(inst); + case Opcode::V_MUL_F32: + return V_MUL_F32(inst); + case Opcode::V_RCP_F32: + return V_RCP_F32(inst); + case Opcode::V_LDEXP_F32: + return V_LDEXP_F32(inst); + case Opcode::V_FRACT_F32: + return V_FRACT_F32(inst); + case Opcode::V_ADD_F32: + return V_ADD_F32(inst); + case Opcode::V_MED3_F32: + return V_MED3_F32(inst); + case Opcode::V_MED3_I32: + return V_MED3_I32(inst); + case Opcode::V_FLOOR_F32: + return V_FLOOR_F32(inst); + case Opcode::V_SUB_F32: + return V_SUB_F32(inst); + case Opcode::V_FMA_F32: + case Opcode::V_MADAK_F32: + return V_FMA_F32(inst); + case Opcode::V_MAX_F32: + return V_MAX_F32(inst); + case Opcode::V_RSQ_F32: + return V_RSQ_F32(inst); + case Opcode::V_SIN_F32: + return V_SIN_F32(inst); + case Opcode::V_COS_F32: + return V_COS_F32(inst); + case Opcode::V_LOG_F32: + return V_LOG_F32(inst); + case Opcode::V_EXP_F32: + return V_EXP_F32(inst); + case Opcode::V_SQRT_F32: + return V_SQRT_F32(inst); + case Opcode::V_MIN_F32: + return V_MIN_F32(inst, false); + case Opcode::V_MIN3_F32: + return V_MIN3_F32(inst); + case Opcode::V_MIN3_I32: + return V_MIN3_I32(inst); + case Opcode::V_MIN_LEGACY_F32: + return V_MIN_F32(inst, true); + case Opcode::V_MADMK_F32: + return V_MADMK_F32(inst); + case Opcode::V_SUBREV_F32: + return V_SUBREV_F32(inst); + case Opcode::V_RNDNE_F32: + return V_RNDNE_F32(inst); + case Opcode::V_MAX3_F32: + return V_MAX3_F32(inst); + case Opcode::V_MAX3_U32: + return V_MAX3_U32(inst); + case Opcode::V_TRUNC_F32: + return V_TRUNC_F32(inst); + case Opcode::V_CEIL_F32: + return V_CEIL_F32(inst); + case Opcode::V_MUL_LEGACY_F32: + return V_MUL_F32(inst); + case Opcode::V_MAC_LEGACY_F32: + return V_MAC_F32(inst); + case Opcode::V_MAD_LEGACY_F32: + return V_MAD_F32(inst); + case Opcode::V_MAX_LEGACY_F32: + return V_MAX_F32(inst, true); + case Opcode::V_RSQ_LEGACY_F32: + case Opcode::V_RSQ_CLAMP_F32: + return V_RSQ_F32(inst); + case Opcode::V_RCP_IFLAG_F32: + return V_RCP_F32(inst); + + case Opcode::V_CMPX_F_F32: + return V_CMP_F32(ConditionOp::F, true, inst); + case Opcode::V_CMPX_LT_F32: + return V_CMP_F32(ConditionOp::LT, true, inst); + case Opcode::V_CMPX_EQ_F32: + return V_CMP_F32(ConditionOp::EQ, true, inst); + case Opcode::V_CMPX_LE_F32: + return V_CMP_F32(ConditionOp::LE, true, inst); + case Opcode::V_CMPX_GT_F32: + return V_CMP_F32(ConditionOp::GT, true, inst); + case Opcode::V_CMPX_LG_F32: + return V_CMP_F32(ConditionOp::LG, true, inst); + case Opcode::V_CMPX_GE_F32: + return V_CMP_F32(ConditionOp::GE, true, inst); + case Opcode::V_CMPX_NGE_F32: + return V_CMP_F32(ConditionOp::LT, true, inst); + case Opcode::V_CMPX_NLG_F32: + return V_CMP_F32(ConditionOp::EQ, true, inst); + case Opcode::V_CMPX_NGT_F32: + return V_CMP_F32(ConditionOp::LE, true, inst); + case Opcode::V_CMPX_NLE_F32: + return V_CMP_F32(ConditionOp::GT, true, inst); + case Opcode::V_CMPX_NEQ_F32: + return V_CMP_F32(ConditionOp::LG, true, inst); + case Opcode::V_CMPX_NLT_F32: + return V_CMP_F32(ConditionOp::GE, true, inst); + case Opcode::V_CMPX_TRU_F32: + return V_CMP_F32(ConditionOp::TRU, true, inst); + case Opcode::V_CMP_CLASS_F32: + return V_CMP_CLASS_F32(inst); + + case Opcode::V_CMP_LE_U32: + return V_CMP_U32(ConditionOp::LE, false, false, inst); + case Opcode::V_CMP_GT_I32: + return V_CMP_U32(ConditionOp::GT, true, false, inst); + case Opcode::V_CMP_LT_I32: + return V_CMP_U32(ConditionOp::LT, true, false, inst); + case Opcode::V_CMPX_LT_I32: + return V_CMP_U32(ConditionOp::LT, true, true, inst); + case Opcode::V_CMPX_F_U32: + return V_CMP_U32(ConditionOp::F, false, true, inst); + case Opcode::V_CMPX_LT_U32: + return V_CMP_U32(ConditionOp::LT, false, true, inst); + case Opcode::V_CMPX_EQ_U32: + return V_CMP_U32(ConditionOp::EQ, false, true, inst); + case Opcode::V_CMPX_LE_U32: + return V_CMP_U32(ConditionOp::LE, false, true, inst); + case Opcode::V_CMPX_GT_U32: + return V_CMP_U32(ConditionOp::GT, false, true, inst); + case Opcode::V_CMPX_NE_U32: + return V_CMP_U32(ConditionOp::LG, false, true, inst); + case Opcode::V_CMPX_GE_U32: + return V_CMP_U32(ConditionOp::GE, false, true, inst); + case Opcode::V_CMPX_TRU_U32: + return V_CMP_U32(ConditionOp::TRU, false, true, inst); + case Opcode::V_CMPX_LG_I32: + return V_CMP_U32(ConditionOp::LG, true, true, inst); + + case Opcode::V_MBCNT_LO_U32_B32: + return V_MBCNT_U32_B32(true, inst); + case Opcode::V_MBCNT_HI_U32_B32: + return V_MBCNT_U32_B32(false, inst); + default: + LogMissingOpcode(inst); + } +} + void Translator::V_MOV(const GcnInst& inst) { - SetDst(inst.dst[0], GetSrc(inst.src[0])); + SetDst(inst.dst[0], GetSrc(inst.src[0])); } void Translator::V_SAD(const GcnInst& inst) { @@ -15,14 +320,14 @@ void Translator::V_SAD(const GcnInst& inst) { } void Translator::V_MAC_F32(const GcnInst& inst) { - SetDst(inst.dst[0], ir.FPFma(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true), - GetSrc(inst.dst[0], true))); + SetDst(inst.dst[0], ir.FPFma(GetSrc(inst.src[0]), GetSrc(inst.src[1]), + GetSrc(inst.dst[0]))); } void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { const IR::VectorReg dst_reg{inst.dst[0].code}; const IR::Value vec_f32 = - ir.CompositeConstruct(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true)); + ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1])); ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32)); } @@ -32,8 +337,14 @@ void Translator::V_CVT_F32_F16(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPConvert(32, ir.BitCast(src0l))); } +void Translator::V_CVT_F16_F32(const GcnInst& inst) { + const IR::F32 src0 = GetSrc(inst.src[0]); + const IR::F16 src0fp16 = ir.FPConvert(16, src0); + SetDst(inst.dst[0], ir.UConvert(32, ir.BitCast(src0fp16))); +} + void Translator::V_MUL_F32(const GcnInst& inst) { - SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true))); + SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0]), GetSrc(inst.src[1]))); } void Translator::V_CNDMASK_B32(const GcnInst& inst) { @@ -42,24 +353,8 @@ void Translator::V_CNDMASK_B32(const GcnInst& inst) { const IR::U1 flag = inst.src[2].field == OperandField::ScalarGPR ? ir.GetThreadBitScalarReg(flag_reg) : ir.GetVcc(); - - // We can treat the instruction as integer most of the time, but when a source is - // a floating point constant we will force the other as float for better readability - // The other operand is also higly likely to be float as well. - const auto is_float_const = [](OperandField field) { - return field >= OperandField::ConstFloatPos_0_5 && field <= OperandField::ConstFloatNeg_4_0; - }; - const bool has_flt_source = - is_float_const(inst.src[0].field) || is_float_const(inst.src[1].field); - IR::U32F32 src0 = GetSrc(inst.src[0], has_flt_source); - IR::U32F32 src1 = GetSrc(inst.src[1], has_flt_source); - if (src0.Type() == IR::Type::F32 && src1.Type() == IR::Type::U32) { - src1 = ir.BitCast(src1); - } - if (src1.Type() == IR::Type::F32 && src0.Type() == IR::Type::U32) { - src0 = ir.BitCast(src0); - } - const IR::Value result = ir.Select(flag, src1, src0); + const IR::Value result = + ir.Select(flag, GetSrc(inst.src[1]), GetSrc(inst.src[0])); ir.SetVectorReg(dst_reg, IR::U32F32{result}); } @@ -67,7 +362,8 @@ void Translator::V_OR_B32(bool is_xor, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))}; const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg(dst_reg, is_xor ? ir.BitwiseXor(src0, src1) : ir.BitwiseOr(src0, src1)); + ir.SetVectorReg(dst_reg, + is_xor ? ir.BitwiseXor(src0, src1) : IR::U32(ir.BitwiseOr(src0, src1))); } void Translator::V_AND_B32(const GcnInst& inst) { @@ -84,6 +380,12 @@ void Translator::V_LSHLREV_B32(const GcnInst& inst) { ir.SetVectorReg(dst_reg, ir.ShiftLeftLogical(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F)))); } +void Translator::V_LSHL_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.ShiftLeftLogical(src0, ir.BitwiseAnd(src1, ir.Imm32(0x1F)))); +} + void Translator::V_ADD_I32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))}; @@ -92,6 +394,30 @@ void Translator::V_ADD_I32(const GcnInst& inst) { // TODO: Carry } +void Translator::V_ADDC_U32(const GcnInst& inst) { + + const auto src0 = GetSrc(inst.src[0]); + const auto src1 = GetSrc(inst.src[1]); + + IR::U32 scarry; + if (inst.src_count == 3) { // VOP3 + IR::U1 thread_bit{ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[2].code))}; + scarry = IR::U32{ir.Select(thread_bit, ir.Imm32(1), ir.Imm32(0))}; + } else { // VOP2 + scarry = ir.GetVccLo(); + } + + const IR::U32 result = ir.IAdd(ir.IAdd(src0, src1), scarry); + + const IR::VectorReg dst_reg{inst.dst[0].code}; + ir.SetVectorReg(dst_reg, result); + + const IR::U1 less_src0 = ir.ILessThan(result, src0, false); + const IR::U1 less_src1 = ir.ILessThan(result, src1, false); + const IR::U1 did_overflow = ir.LogicalOr(less_src0, less_src1); + ir.SetVcc(did_overflow); +} + void Translator::V_CVT_F32_I32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::VectorReg dst_reg{inst.dst[0].code}; @@ -105,21 +431,21 @@ void Translator::V_CVT_F32_U32(const GcnInst& inst) { } void Translator::V_MAD_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; - const IR::F32 src2{GetSrc(inst.src[2], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 src2{GetSrc(inst.src[2])}; SetDst(inst.dst[0], ir.FPFma(src0, src1, src2)); } void Translator::V_FRACT_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; const IR::VectorReg dst_reg{inst.dst[0].code}; ir.SetVectorReg(dst_reg, ir.Fract(src0)); } void Translator::V_ADD_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; SetDst(inst.dst[0], ir.FPAdd(src0, src1)); } @@ -133,40 +459,48 @@ void Translator::V_CVT_OFF_F32_I4(const GcnInst& inst) { } void Translator::V_MED3_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; - const IR::F32 src2{GetSrc(inst.src[2], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 src2{GetSrc(inst.src[2])}; const IR::F32 mmx = ir.FPMin(ir.FPMax(src0, src1), src2); SetDst(inst.dst[0], ir.FPMax(ir.FPMin(src0, src1), mmx)); } +void Translator::V_MED3_I32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 src2{GetSrc(inst.src[2])}; + const IR::U32 mmx = ir.SMin(ir.SMax(src0, src1), src2); + SetDst(inst.dst[0], ir.SMax(ir.SMin(src0, src1), mmx)); +} + void Translator::V_FLOOR_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; const IR::VectorReg dst_reg{inst.dst[0].code}; ir.SetVectorReg(dst_reg, ir.FPFloor(src0)); } void Translator::V_SUB_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; SetDst(inst.dst[0], ir.FPSub(src0, src1)); } void Translator::V_RCP_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPRecip(src0)); } void Translator::V_FMA_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; - const IR::F32 src2{GetSrc(inst.src[2], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 src2{GetSrc(inst.src[2])}; SetDst(inst.dst[0], ir.FPFma(src0, src1, src2)); } void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; const IR::U1 result = [&] { switch (op) { case ConditionOp::F: @@ -183,6 +517,8 @@ void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) { return ir.FPLessThanEqual(src0, src1); case ConditionOp::GE: return ir.FPGreaterThanEqual(src0, src1); + case ConditionOp::U: + return ir.LogicalNot(ir.LogicalAnd(ir.FPIsNan(src0), ir.FPIsNan(src1))); default: UNREACHABLE(); } @@ -204,8 +540,8 @@ void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) { } void Translator::V_MAX_F32(const GcnInst& inst, bool is_legacy) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; SetDst(inst.dst[0], ir.FPMax(src0, src1, is_legacy)); } @@ -216,47 +552,54 @@ void Translator::V_MAX_U32(bool is_signed, const GcnInst& inst) { } void Translator::V_RSQ_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPRecipSqrt(src0)); } void Translator::V_SIN_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPSin(src0)); } void Translator::V_LOG_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPLog2(src0)); } void Translator::V_EXP_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPExp2(src0)); } void Translator::V_SQRT_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPSqrt(src0)); } void Translator::V_MIN_F32(const GcnInst& inst, bool is_legacy) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; SetDst(inst.dst[0], ir.FPMin(src0, src1, is_legacy)); } void Translator::V_MIN3_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; - const IR::F32 src2{GetSrc(inst.src[2], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 src2{GetSrc(inst.src[2])}; SetDst(inst.dst[0], ir.FPMin(src0, ir.FPMin(src1, src2))); } +void Translator::V_MIN3_I32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 src2{GetSrc(inst.src[2])}; + SetDst(inst.dst[0], ir.SMin(src0, ir.SMin(src1, src2))); +} + void Translator::V_MADMK_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; - const IR::F32 k{GetSrc(inst.src[2], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 k{GetSrc(inst.src[2])}; SetDst(inst.dst[0], ir.FPFma(src0, k, src1)); } @@ -265,25 +608,25 @@ void Translator::V_CUBEMA_F32(const GcnInst& inst) { } void Translator::V_CUBESC_F32(const GcnInst& inst) { - SetDst(inst.dst[0], GetSrc(inst.src[0], true)); + SetDst(inst.dst[0], GetSrc(inst.src[0])); } void Translator::V_CUBETC_F32(const GcnInst& inst) { - SetDst(inst.dst[0], GetSrc(inst.src[1], true)); + SetDst(inst.dst[0], GetSrc(inst.src[1])); } void Translator::V_CUBEID_F32(const GcnInst& inst) { - SetDst(inst.dst[0], GetSrc(inst.src[2], true)); + SetDst(inst.dst[0], GetSrc(inst.src[2])); } void Translator::V_CVT_U32_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.ConvertFToU(32, src0)); } void Translator::V_SUBREV_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; SetDst(inst.dst[0], ir.FPSub(src1, src0)); } @@ -294,6 +637,24 @@ void Translator::V_SUBREV_I32(const GcnInst& inst) { // TODO: Carry-out } +void Translator::V_MAD_U64_U32(const GcnInst& inst) { + const auto src0 = GetSrc(inst.src[0]); + const auto src1 = GetSrc(inst.src[1]); + const auto src2 = GetSrc64(inst.src[2]); + + // const IR::U64 mul_result = ir.UConvert(64, ir.IMul(src0, src1)); + const IR::U64 mul_result = + ir.PackUint2x32(ir.CompositeConstruct(ir.IMul(src0, src1), ir.Imm32(0U))); + const IR::U64 sum_result = ir.IAdd(mul_result, src2); + + SetDst64(inst.dst[0], sum_result); + + const IR::U1 less_src0 = ir.ILessThan(sum_result, mul_result, false); + const IR::U1 less_src1 = ir.ILessThan(sum_result, src2, false); + const IR::U1 did_overflow = ir.LogicalOr(less_src0, less_src1); + ir.SetVcc(did_overflow); +} + void Translator::V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; @@ -349,9 +710,17 @@ void Translator::V_SAD_U32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; const IR::U32 src2{GetSrc(inst.src[2])}; - const IR::U32 max{ir.IMax(src0, src1, false)}; - const IR::U32 min{ir.IMin(src0, src1, false)}; - SetDst(inst.dst[0], ir.IAdd(ir.ISub(max, min), src2)); + IR::U32 result; + if (src0.IsImmediate() && src0.U32() == 0U) { + result = src1; + } else if (src1.IsImmediate() && src1.U32() == 0U) { + result = src0; + } else { + const IR::U32 max{ir.IMax(src0, src1, false)}; + const IR::U32 min{ir.IMin(src0, src1, false)}; + result = ir.ISub(max, min); + } + SetDst(inst.dst[0], ir.IAdd(result, src2)); } void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) { @@ -394,12 +763,18 @@ void Translator::V_ASHRREV_I32(const GcnInst& inst) { SetDst(inst.dst[0], ir.ShiftRightArithmetic(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F)))); } +void Translator::V_ASHR_I32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.ShiftRightArithmetic(src0, ir.BitwiseAnd(src1, ir.Imm32(0x1F)))); +} + void Translator::V_MAD_U32_U24(const GcnInst& inst) { V_MAD_I32_I24(inst, false); } void Translator::V_RNDNE_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPRoundEven(src0)); } @@ -410,19 +785,26 @@ void Translator::V_BCNT_U32_B32(const GcnInst& inst) { } void Translator::V_COS_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPCos(src0)); } void Translator::V_MAX3_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; - const IR::F32 src2{GetSrc(inst.src[2], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 src2{GetSrc(inst.src[2])}; SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2))); } +void Translator::V_MAX3_U32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 src2{GetSrc(inst.src[2])}; + SetDst(inst.dst[0], ir.UMax(src0, ir.UMax(src1, src2))); +} + void Translator::V_CVT_I32_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.ConvertFToS(32, src0)); } @@ -439,12 +821,12 @@ void Translator::V_MUL_LO_U32(const GcnInst& inst) { } void Translator::V_TRUNC_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPTrunc(src0)); } void Translator::V_CEIL_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPCeil(src0)); } @@ -508,49 +890,56 @@ void Translator::V_BFREV_B32(const GcnInst& inst) { } void Translator::V_LDEXP_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; SetDst(inst.dst[0], ir.FPLdexp(src0, src1)); } void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.ConvertFToI(32, true, ir.FPFloor(src0))); } void Translator::V_CMP_CLASS_F32(const GcnInst& inst) { - constexpr u32 SIGNALING_NAN = 1 << 0; - constexpr u32 QUIET_NAN = 1 << 1; - constexpr u32 NEGATIVE_INFINITY = 1 << 2; - constexpr u32 NEGATIVE_NORMAL = 1 << 3; - constexpr u32 NEGATIVE_DENORM = 1 << 4; - constexpr u32 NEGATIVE_ZERO = 1 << 5; - constexpr u32 POSITIVE_ZERO = 1 << 6; - constexpr u32 POSITIVE_DENORM = 1 << 7; - constexpr u32 POSITIVE_NORMAL = 1 << 8; - constexpr u32 POSITIVE_INFINITY = 1 << 9; - - const IR::F32F64 src0{GetSrc(inst.src[0])}; + const IR::F32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; + IR::U1 value; if (src1.IsImmediate()) { - const u32 class_mask = src1.U32(); - IR::U1 value; - if ((class_mask & (SIGNALING_NAN | QUIET_NAN)) == (SIGNALING_NAN | QUIET_NAN)) { + const auto class_mask = static_cast(src1.U32()); + if ((class_mask & IR::FloatClassFunc::NaN) == IR::FloatClassFunc::NaN) { value = ir.FPIsNan(src0); - } else if ((class_mask & (POSITIVE_INFINITY | NEGATIVE_INFINITY)) == - (POSITIVE_INFINITY | NEGATIVE_INFINITY)) { + } else if ((class_mask & IR::FloatClassFunc::Infinity) == IR::FloatClassFunc::Infinity) { value = ir.FPIsInf(src0); } else { UNREACHABLE(); } - if (inst.dst[1].field == OperandField::VccLo) { - return ir.SetVcc(value); - } else { - UNREACHABLE(); - } } else { + // We don't know the type yet, delay its resolution. + value = ir.FPCmpClass32(src0, src1); + } + + switch (inst.dst[1].field) { + case OperandField::VccLo: + return ir.SetVcc(value); + default: UNREACHABLE(); } } +void Translator::V_FFBL_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + SetDst(inst.dst[0], ir.FindILsb(src0)); +} + +void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + if (!is_low) { + ASSERT(src0.IsImmediate() && src0.U32() == ~0U && src1.IsImmediate() && src1.U32() == 0U); + return; + } + ASSERT(src0.IsImmediate() && src0.U32() == ~0U); + SetDst(inst.dst[0], ir.LaneId()); +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp index 55a2d624e..4ff846cf8 100644 --- a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp +++ b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp @@ -12,4 +12,24 @@ void Translator::V_INTERP_P2_F32(const GcnInst& inst) { ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan)); } +void Translator::V_INTERP_MOV_F32(const GcnInst& inst) { + const IR::VectorReg dst_reg{inst.dst[0].code}; + auto& attr = info.ps_inputs.at(inst.control.vintrp.attr); + const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index}; + ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan)); +} + +void Translator::EmitVectorInterpolation(const GcnInst& inst) { + switch (inst.opcode) { + case Opcode::V_INTERP_P1_F32: + return; + case Opcode::V_INTERP_P2_F32: + return V_INTERP_P2_F32(inst); + case Opcode::V_INTERP_MOV_F32: + return V_INTERP_MOV_F32(inst); + default: + LogMissingOpcode(inst); + } +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index f4383c61d..63f6c3b47 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -5,9 +5,108 @@ namespace Shader::Gcn { +void Translator::EmitVectorMemory(const GcnInst& inst) { + switch (inst.opcode) { + case Opcode::IMAGE_SAMPLE_LZ_O: + case Opcode::IMAGE_SAMPLE_O: + case Opcode::IMAGE_SAMPLE_C: + case Opcode::IMAGE_SAMPLE_C_LZ: + case Opcode::IMAGE_SAMPLE_LZ: + case Opcode::IMAGE_SAMPLE: + case Opcode::IMAGE_SAMPLE_L: + case Opcode::IMAGE_SAMPLE_C_O: + case Opcode::IMAGE_SAMPLE_B: + case Opcode::IMAGE_SAMPLE_C_LZ_O: + return IMAGE_SAMPLE(inst); + case Opcode::IMAGE_GATHER4_C: + case Opcode::IMAGE_GATHER4_LZ: + case Opcode::IMAGE_GATHER4_LZ_O: + return IMAGE_GATHER(inst); + case Opcode::IMAGE_ATOMIC_ADD: + return IMAGE_ATOMIC(AtomicOp::Add, inst); + case Opcode::IMAGE_ATOMIC_AND: + return IMAGE_ATOMIC(AtomicOp::And, inst); + case Opcode::IMAGE_ATOMIC_OR: + return IMAGE_ATOMIC(AtomicOp::Or, inst); + case Opcode::IMAGE_ATOMIC_XOR: + return IMAGE_ATOMIC(AtomicOp::Xor, inst); + case Opcode::IMAGE_ATOMIC_UMAX: + return IMAGE_ATOMIC(AtomicOp::Umax, inst); + case Opcode::IMAGE_ATOMIC_SMAX: + return IMAGE_ATOMIC(AtomicOp::Smax, inst); + case Opcode::IMAGE_ATOMIC_UMIN: + return IMAGE_ATOMIC(AtomicOp::Umin, inst); + case Opcode::IMAGE_ATOMIC_SMIN: + return IMAGE_ATOMIC(AtomicOp::Smin, inst); + case Opcode::IMAGE_ATOMIC_INC: + return IMAGE_ATOMIC(AtomicOp::Inc, inst); + case Opcode::IMAGE_ATOMIC_DEC: + return IMAGE_ATOMIC(AtomicOp::Dec, inst); + case Opcode::IMAGE_GET_LOD: + return IMAGE_GET_LOD(inst); + case Opcode::IMAGE_STORE: + return IMAGE_STORE(inst); + case Opcode::IMAGE_LOAD_MIP: + return IMAGE_LOAD(true, inst); + case Opcode::IMAGE_LOAD: + return IMAGE_LOAD(false, inst); + case Opcode::IMAGE_GET_RESINFO: + return IMAGE_GET_RESINFO(inst); + + // Buffer load operations + case Opcode::TBUFFER_LOAD_FORMAT_X: + return BUFFER_LOAD_FORMAT(1, true, true, inst); + case Opcode::TBUFFER_LOAD_FORMAT_XY: + return BUFFER_LOAD_FORMAT(2, true, true, inst); + case Opcode::TBUFFER_LOAD_FORMAT_XYZ: + return BUFFER_LOAD_FORMAT(3, true, true, inst); + case Opcode::TBUFFER_LOAD_FORMAT_XYZW: + return BUFFER_LOAD_FORMAT(4, true, true, inst); + + case Opcode::BUFFER_LOAD_FORMAT_X: + return BUFFER_LOAD_FORMAT(1, false, true, inst); + case Opcode::BUFFER_LOAD_FORMAT_XY: + return BUFFER_LOAD_FORMAT(2, false, true, inst); + case Opcode::BUFFER_LOAD_FORMAT_XYZ: + return BUFFER_LOAD_FORMAT(3, false, true, inst); + case Opcode::BUFFER_LOAD_FORMAT_XYZW: + return BUFFER_LOAD_FORMAT(4, false, true, inst); + + case Opcode::BUFFER_LOAD_DWORD: + return BUFFER_LOAD_FORMAT(1, false, false, inst); + case Opcode::BUFFER_LOAD_DWORDX2: + return BUFFER_LOAD_FORMAT(2, false, false, inst); + case Opcode::BUFFER_LOAD_DWORDX3: + return BUFFER_LOAD_FORMAT(3, false, false, inst); + case Opcode::BUFFER_LOAD_DWORDX4: + return BUFFER_LOAD_FORMAT(4, false, false, inst); + + // Buffer store operations + case Opcode::BUFFER_STORE_FORMAT_X: + return BUFFER_STORE_FORMAT(1, false, true, inst); + case Opcode::BUFFER_STORE_FORMAT_XY: + return BUFFER_STORE_FORMAT(2, false, true, inst); + case Opcode::BUFFER_STORE_FORMAT_XYZ: + return BUFFER_STORE_FORMAT(3, false, true, inst); + case Opcode::BUFFER_STORE_FORMAT_XYZW: + return BUFFER_STORE_FORMAT(4, false, true, inst); + + case Opcode::BUFFER_STORE_DWORD: + return BUFFER_STORE_FORMAT(1, false, false, inst); + case Opcode::BUFFER_STORE_DWORDX2: + return BUFFER_STORE_FORMAT(2, false, false, inst); + case Opcode::BUFFER_STORE_DWORDX3: + return BUFFER_STORE_FORMAT(3, false, false, inst); + case Opcode::BUFFER_STORE_DWORDX4: + return BUFFER_STORE_FORMAT(4, false, false, inst); + default: + LogMissingOpcode(inst); + } +} + void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) { IR::VectorReg dst_reg{inst.dst[0].code}; - const IR::ScalarReg tsharp_reg{inst.src[2].code}; + const IR::ScalarReg tsharp_reg{inst.src[2].code * 4}; const auto flags = ImageResFlags(inst.control.mimg.dmask); const bool has_mips = flags.test(ImageResComponent::MipCount); const IR::U32 lod = ir.GetVectorReg(IR::VectorReg(inst.src[0].code)); @@ -48,8 +147,8 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) { // Load first address components as denoted in 8.2.4 VGPR Usage Sea Islands Series Instruction // Set Architecture - const IR::Value offset = - flags.test(MimgModifier::Offset) ? ir.GetVectorReg(addr_reg++) : IR::Value{}; + const IR::U32 offset = + flags.test(MimgModifier::Offset) ? ir.GetVectorReg(addr_reg++) : IR::U32{}; const IR::F32 bias = flags.test(MimgModifier::LodBias) ? ir.GetVectorReg(addr_reg++) : IR::F32{}; const IR::F32 dref = @@ -81,18 +180,17 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) { // Issue IR instruction, leaving unknown fields blank to patch later. const IR::Value texel = [&]() -> IR::Value { - const IR::F32 lod = flags.test(MimgModifier::Level0) ? ir.Imm32(0.f) : IR::F32{}; if (!flags.test(MimgModifier::Pcf)) { if (explicit_lod) { - return ir.ImageSampleExplicitLod(handle, body, lod, offset, info); + return ir.ImageSampleExplicitLod(handle, body, offset, info); } else { - return ir.ImageSampleImplicitLod(handle, body, bias, offset, {}, info); + return ir.ImageSampleImplicitLod(handle, body, bias, offset, info); } } if (explicit_lod) { - return ir.ImageSampleDrefExplicitLod(handle, body, dref, lod, offset, info); + return ir.ImageSampleDrefExplicitLod(handle, body, dref, offset, info); } - return ir.ImageSampleDrefImplicitLod(handle, body, dref, bias, offset, {}, info); + return ir.ImageSampleDrefImplicitLod(handle, body, dref, bias, offset, info); }(); for (u32 i = 0; i < 4; i++) { @@ -157,17 +255,17 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) { info.has_bias.Assign(flags.test(MimgModifier::LodBias)); info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp)); info.force_level0.Assign(flags.test(MimgModifier::Level0)); - info.explicit_lod.Assign(explicit_lod); + // info.explicit_lod.Assign(explicit_lod); info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1); // Issue IR instruction, leaving unknown fields blank to patch later. const IR::Value texel = [&]() -> IR::Value { const IR::F32 lod = flags.test(MimgModifier::Level0) ? ir.Imm32(0.f) : IR::F32{}; if (!flags.test(MimgModifier::Pcf)) { - return ir.ImageGather(handle, body, offset, {}, info); + return ir.ImageGather(handle, body, offset, info); } ASSERT(mimg.dmask & 1); // should be always 1st (R) component - return ir.ImageGatherDref(handle, body, offset, {}, dref, info); + return ir.ImageGatherDref(handle, body, offset, dref, info); }(); // For gather4 instructions dmask selects which component to read and must have @@ -251,6 +349,11 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_forma if (is_typed) { info.dmft.Assign(static_cast(mtbuf.dfmt)); info.nfmt.Assign(static_cast(mtbuf.nfmt)); + ASSERT(info.nfmt == AmdGpu::NumberFormat::Float && + (info.dmft == AmdGpu::DataFormat::Format32_32_32_32 || + info.dmft == AmdGpu::DataFormat::Format32_32_32 || + info.dmft == AmdGpu::DataFormat::Format32_32 || + info.dmft == AmdGpu::DataFormat::Format32)); } const IR::Value handle = @@ -268,7 +371,8 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_forma } } -void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst) { +void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_format, + const GcnInst& inst) { const auto& mtbuf = inst.control.mtbuf; const IR::VectorReg vaddr{inst.src[0].code}; const IR::ScalarReg sharp{inst.src[2].code * 4}; @@ -319,7 +423,11 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnIns const IR::Value handle = ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3)); - ir.StoreBuffer(num_dwords, handle, address, value, info); + if (is_format) { + ir.StoreBufferFormat(num_dwords, handle, address, value, info); + } else { + ir.StoreBuffer(num_dwords, handle, address, value, info); + } } void Translator::IMAGE_GET_LOD(const GcnInst& inst) { diff --git a/src/shader_recompiler/ir/attribute.h b/src/shader_recompiler/ir/attribute.h index 3f95ff7ac..2c67411ff 100644 --- a/src/shader_recompiler/ir/attribute.h +++ b/src/shader_recompiler/ir/attribute.h @@ -105,7 +105,7 @@ struct fmt::formatter { constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - auto format(const Shader::IR::Attribute& attribute, format_context& ctx) const { + auto format(const Shader::IR::Attribute attribute, format_context& ctx) const { return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(attribute)); } }; diff --git a/src/shader_recompiler/ir/basic_block.cpp b/src/shader_recompiler/ir/basic_block.cpp index 622a6249d..60ba0647a 100644 --- a/src/shader_recompiler/ir/basic_block.cpp +++ b/src/shader_recompiler/ir/basic_block.cpp @@ -9,7 +9,7 @@ namespace Shader::IR { -Block::Block(ObjectPool& inst_pool_) : inst_pool{&inst_pool_} {} +Block::Block(Common::ObjectPool& inst_pool_) : inst_pool{&inst_pool_} {} Block::~Block() = default; diff --git a/src/shader_recompiler/ir/basic_block.h b/src/shader_recompiler/ir/basic_block.h index 5a7036c62..1eb11469c 100644 --- a/src/shader_recompiler/ir/basic_block.h +++ b/src/shader_recompiler/ir/basic_block.h @@ -9,10 +9,10 @@ #include #include +#include "common/object_pool.h" #include "common/types.h" #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/value.h" -#include "shader_recompiler/object_pool.h" namespace Shader::IR { @@ -25,7 +25,7 @@ public: using reverse_iterator = InstructionList::reverse_iterator; using const_reverse_iterator = InstructionList::const_reverse_iterator; - explicit Block(ObjectPool& inst_pool_); + explicit Block(Common::ObjectPool& inst_pool_); ~Block(); Block(const Block&) = delete; @@ -153,7 +153,7 @@ public: private: /// Memory pool for instruction list - ObjectPool* inst_pool; + Common::ObjectPool* inst_pool; /// List of instructions in this block InstructionList instructions; diff --git a/src/shader_recompiler/ir/breadth_first_search.h b/src/shader_recompiler/ir/breadth_first_search.h index 21a34a903..0156303f0 100644 --- a/src/shader_recompiler/ir/breadth_first_search.h +++ b/src/shader_recompiler/ir/breadth_first_search.h @@ -12,16 +12,16 @@ namespace Shader::IR { template -auto BreadthFirstSearch(const Value& value, Pred&& pred) - -> std::invoke_result_t { - if (value.IsImmediate()) { - // Nothing to do with immediates - return std::nullopt; +auto BreadthFirstSearch(const Inst* inst, Pred&& pred) -> std::invoke_result_t { + // Most often case the instruction is the desired already. + if (const std::optional result = pred(inst)) { + return result; } + // Breadth-first search visiting the right most arguments first boost::container::small_vector visited; std::queue queue; - queue.push(value.InstRecursive()); + queue.push(inst); while (!queue.empty()) { // Pop one instruction from the queue @@ -49,4 +49,14 @@ auto BreadthFirstSearch(const Value& value, Pred&& pred) return std::nullopt; } +template +auto BreadthFirstSearch(const Value& value, Pred&& pred) + -> std::invoke_result_t { + if (value.IsImmediate()) { + // Nothing to do with immediates + return std::nullopt; + } + return BreadthFirstSearch(value.InstRecursive(), pred); +} + } // namespace Shader::IR diff --git a/src/shader_recompiler/ir/condition.h b/src/shader_recompiler/ir/condition.h index 4b60be674..da986c48d 100644 --- a/src/shader_recompiler/ir/condition.h +++ b/src/shader_recompiler/ir/condition.h @@ -44,7 +44,7 @@ constexpr std::string_view NameOf(Condition condition) { template <> struct fmt::formatter : formatter { - auto format(const Shader::IR::Condition& cond, format_context& ctx) const { + auto format(const Shader::IR::Condition cond, format_context& ctx) const { return formatter::format(NameOf(cond), ctx); } }; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index cd4fdaa29..4271ac359 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -2,26 +2,19 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include "shader_recompiler/exception.h" #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/value.h" namespace Shader::IR { namespace { -[[noreturn]] void ThrowInvalidType(Type type) { - UNREACHABLE_MSG("Invalid type {}", u32(type)); -} - -Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) { - if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) { - return ir.CompositeConstruct(bias_lod, lod_clamp); - } else if (!bias_lod.IsEmpty()) { - return bias_lod; - } else if (!lod_clamp.IsEmpty()) { - return lod_clamp; - } else { - return Value{}; - } +[[noreturn]] void ThrowInvalidType(Type type, + std::source_location loc = std::source_location::current()) { + const std::string functionName = loc.function_name(); + const int lineNumber = loc.line(); + UNREACHABLE_MSG("Invalid type = {}, functionName = {}, line = {}", u32(type), functionName, + lineNumber); } } // Anonymous namespace @@ -266,14 +259,10 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) { switch (bit_size) { - case 8: - return Inst(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset); - case 16: - return Inst(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset); case 32: return Inst(Opcode::LoadSharedU32, offset); case 64: - return Inst(Opcode::LoadSharedU64, offset); + return Inst(Opcode::LoadSharedU64, offset); case 128: return Inst(Opcode::LoadSharedU128, offset); default: @@ -283,12 +272,6 @@ Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) { void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) { switch (bit_size) { - case 8: - Inst(Opcode::WriteSharedU8, offset, value); - break; - case 16: - Inst(Opcode::WriteSharedU16, offset, value); - break; case 32: Inst(Opcode::WriteSharedU32, offset, value); break; @@ -364,10 +347,34 @@ void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& ad } } +void IREmitter::StoreBufferFormat(int num_dwords, const Value& handle, const Value& address, + const Value& data, BufferInstInfo info) { + switch (num_dwords) { + case 1: + Inst(Opcode::StoreBufferFormatF32, Flags{info}, handle, address, data); + break; + case 2: + Inst(Opcode::StoreBufferFormatF32x2, Flags{info}, handle, address, data); + break; + case 3: + Inst(Opcode::StoreBufferFormatF32x3, Flags{info}, handle, address, data); + break; + case 4: + Inst(Opcode::StoreBufferFormatF32x4, Flags{info}, handle, address, data); + break; + default: + UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords); + } +} + U32 IREmitter::LaneId() { return Inst(Opcode::LaneId); } +U32 IREmitter::WarpId() { + return Inst(Opcode::WarpId); +} + U32 IREmitter::QuadShuffle(const U32& value, const U32& index) { return Inst(Opcode::QuadShuffle, value, index); } @@ -871,6 +878,10 @@ U1 IREmitter::FPIsInf(const F32F64& value) { } } +U1 IREmitter::FPCmpClass32(const F32& value, const U32& op) { + return Inst(Opcode::FPCmpClass32, value, op); +} + U1 IREmitter::FPOrdered(const F32F64& lhs, const F32F64& rhs) { if (lhs.Type() != rhs.Type()) { UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type()); @@ -964,8 +975,18 @@ IR::Value IREmitter::IMulExt(const U32& a, const U32& b, bool is_signed) { return Inst(is_signed ? Opcode::SMulExt : Opcode::UMulExt, a, b); } -U32 IREmitter::IMul(const U32& a, const U32& b) { - return Inst(Opcode::IMul32, a, b); +U32U64 IREmitter::IMul(const U32U64& a, const U32U64& b) { + if (a.Type() != b.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::U32: + return Inst(Opcode::IMul32, a, b); + case Type::U64: + return Inst(Opcode::IMul64, a, b); + default: + ThrowInvalidType(a.Type()); + } } U32 IREmitter::IDiv(const U32& a, const U32& b, bool is_signed) { @@ -1024,8 +1045,18 @@ U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) { return Inst(Opcode::BitwiseAnd32, a, b); } -U32 IREmitter::BitwiseOr(const U32& a, const U32& b) { - return Inst(Opcode::BitwiseOr32, a, b); +U32U64 IREmitter::BitwiseOr(const U32U64& a, const U32U64& b) { + if (a.Type() != b.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::U32: + return Inst(Opcode::BitwiseOr32, a, b); + case Type::U64: + return Inst(Opcode::BitwiseOr64, a, b); + default: + ThrowInvalidType(a.Type()); + } } U32 IREmitter::BitwiseXor(const U32& a, const U32& b) { @@ -1063,6 +1094,10 @@ U32 IREmitter::FindUMsb(const U32& value) { return Inst(Opcode::FindUMsb32, value); } +U32 IREmitter::FindILsb(const U32& value) { + return Inst(Opcode::FindILsb32, value); +} + U32 IREmitter::SMin(const U32& a, const U32& b) { return Inst(Opcode::SMin32, a, b); } @@ -1095,8 +1130,18 @@ U32 IREmitter::UClamp(const U32& value, const U32& min, const U32& max) { return Inst(Opcode::UClamp32, value, min, max); } -U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { - return Inst(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); +U1 IREmitter::ILessThan(const U32U64& lhs, const U32U64& rhs, bool is_signed) { + if (lhs.Type() != rhs.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::U32: + return Inst(is_signed ? Opcode::SLessThan32 : Opcode::ULessThan32, lhs, rhs); + case Type::U64: + return Inst(is_signed ? Opcode::SLessThan64 : Opcode::ULessThan64, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } } U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) { @@ -1155,8 +1200,9 @@ U32U64 IREmitter::ConvertFToS(size_t bitsize, const F32F64& value) { ThrowInvalidType(value.Type()); } default: - UNREACHABLE_MSG("Invalid destination bitsize {}", bitsize); + break; } + throw NotImplementedException("Invalid destination bitsize {}", bitsize); } U32U64 IREmitter::ConvertFToU(size_t bitsize, const F32F64& value) { @@ -1183,13 +1229,17 @@ F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Val switch (src_bitsize) { case 32: return Inst(Opcode::ConvertF32S32, value); + default: + break; } - break; case 64: switch (src_bitsize) { case 32: return Inst(Opcode::ConvertF64S32, value); + default: + break; } + default: break; } UNREACHABLE_MSG("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize); @@ -1203,13 +1253,17 @@ F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Val return Inst(Opcode::ConvertF32U16, value); case 32: return Inst(Opcode::ConvertF32U32, value); + default: + break; } - break; case 64: switch (src_bitsize) { case 32: return Inst(Opcode::ConvertF64U32, value); + default: + break; } + default: break; } UNREACHABLE_MSG("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize); @@ -1227,7 +1281,16 @@ U16U32U64 IREmitter::UConvert(size_t result_bitsize, const U16U32U64& value) { switch (value.Type()) { case Type::U32: return Inst(Opcode::ConvertU16U32, value); + default: + break; } + case 32: + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::ConvertU32U16, value); + } + default: + break; } throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); } @@ -1238,13 +1301,17 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { switch (value.Type()) { case Type::F32: return Inst(Opcode::ConvertF16F32, value); + default: + break; } - break; case 32: switch (value.Type()) { case Type::F16: return Inst(Opcode::ConvertF32F16, value); + default: + break; } + default: break; } throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); @@ -1317,41 +1384,37 @@ Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, c return Inst(Opcode::ImageAtomicExchange32, Flags{info}, handle, coords, value); } -Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias, - const Value& offset, const F32& lod_clamp, +Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& body, const F32& bias, + const U32& offset, TextureInstInfo info) { + return Inst(Opcode::ImageSampleImplicitLod, Flags{info}, handle, body, bias, offset); +} + +Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& body, const U32& offset, TextureInstInfo info) { - const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; - return Inst(Opcode::ImageSampleImplicitLod, Flags{info}, handle, coords, bias_lc, offset); + return Inst(Opcode::ImageSampleExplicitLod, Flags{info}, handle, body, IR::F32{}, offset); } -Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod, - const Value& offset, TextureInstInfo info) { - return Inst(Opcode::ImageSampleExplicitLod, Flags{info}, handle, coords, lod, offset); -} - -F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref, - const F32& bias, const Value& offset, - const F32& lod_clamp, TextureInstInfo info) { - const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; - return Inst(Opcode::ImageSampleDrefImplicitLod, Flags{info}, handle, coords, dref, bias_lc, +F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& body, const F32& dref, + const F32& bias, const U32& offset, + TextureInstInfo info) { + return Inst(Opcode::ImageSampleDrefImplicitLod, Flags{info}, handle, body, dref, bias, offset); } -F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref, - const F32& lod, const Value& offset, - TextureInstInfo info) { - return Inst(Opcode::ImageSampleDrefExplicitLod, Flags{info}, handle, coords, dref, lod, +F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& body, const F32& dref, + const U32& offset, TextureInstInfo info) { + return Inst(Opcode::ImageSampleDrefExplicitLod, Flags{info}, handle, body, dref, IR::F32{}, offset); } Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset, - const Value& offset2, TextureInstInfo info) { - return Inst(Opcode::ImageGather, Flags{info}, handle, coords, offset, offset2); + TextureInstInfo info) { + return Inst(Opcode::ImageGather, Flags{info}, handle, coords, offset); } Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, - const Value& offset2, const F32& dref, TextureInstInfo info) { - return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, offset2, dref); + const F32& dref, TextureInstInfo info) { + return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, dref); } Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset, diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index e7512430a..59ced93e3 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -93,8 +93,11 @@ public: BufferInstInfo info); void StoreBuffer(int num_dwords, const Value& handle, const Value& address, const Value& data, BufferInstInfo info); + void StoreBufferFormat(int num_dwords, const Value& handle, const Value& address, + const Value& data, BufferInstInfo info); [[nodiscard]] U32 LaneId(); + [[nodiscard]] U32 WarpId(); [[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); @@ -150,6 +153,7 @@ public: [[nodiscard]] U1 FPGreaterThan(const F32F64& lhs, const F32F64& rhs, bool ordered = true); [[nodiscard]] U1 FPIsNan(const F32F64& value); [[nodiscard]] U1 FPIsInf(const F32F64& value); + [[nodiscard]] U1 FPCmpClass32(const F32& value, const U32& op); [[nodiscard]] U1 FPOrdered(const F32F64& lhs, const F32F64& rhs); [[nodiscard]] U1 FPUnordered(const F32F64& lhs, const F32F64& rhs); [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, bool is_legacy = false); @@ -159,7 +163,7 @@ public: [[nodiscard]] Value IAddCary(const U32& a, const U32& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); [[nodiscard]] Value IMulExt(const U32& a, const U32& b, bool is_signed = false); - [[nodiscard]] U32 IMul(const U32& a, const U32& b); + [[nodiscard]] U32U64 IMul(const U32U64& a, const U32U64& b); [[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false); [[nodiscard]] U32U64 INeg(const U32U64& value); [[nodiscard]] U32 IAbs(const U32& value); @@ -167,7 +171,7 @@ public: [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift); [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift); [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b); - [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b); + [[nodiscard]] U32U64 BitwiseOr(const U32U64& a, const U32U64& b); [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b); [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset, const U32& count); @@ -179,6 +183,7 @@ public: [[nodiscard]] U32 FindSMsb(const U32& value); [[nodiscard]] U32 FindUMsb(const U32& value); + [[nodiscard]] U32 FindILsb(const U32& value); [[nodiscard]] U32 SMin(const U32& a, const U32& b); [[nodiscard]] U32 UMin(const U32& a, const U32& b); [[nodiscard]] U32 IMin(const U32& a, const U32& b, bool is_signed); @@ -188,7 +193,7 @@ public: [[nodiscard]] U32 SClamp(const U32& value, const U32& min, const U32& max); [[nodiscard]] U32 UClamp(const U32& value, const U32& min, const U32& max); - [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 ILessThan(const U32U64& lhs, const U32U64& rhs, bool is_signed); [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs); [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed); @@ -238,31 +243,32 @@ public: [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value, TextureInstInfo info); - [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords, - const F32& bias, const Value& offset, - const F32& lod_clamp, TextureInstInfo info); - [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords, - const F32& lod, const Value& offset, + [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& body, + const F32& bias, const U32& offset, TextureInstInfo info); - [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, + + [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& body, + const U32& offset, TextureInstInfo info); + + [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& body, const F32& dref, const F32& bias, - const Value& offset, const F32& lod_clamp, + const U32& offset, TextureInstInfo info); + + [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& body, + const F32& dref, const U32& offset, TextureInstInfo info); - [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, - const F32& dref, const F32& lod, - const Value& offset, TextureInstInfo info); - [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod, - const IR::U1& skip_mips); - [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod, - const IR::U1& skip_mips, TextureInstInfo info); + + [[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod, + const U1& skip_mips); + [[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod, + const U1& skip_mips, TextureInstInfo info); [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info); [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset, - const Value& offset2, TextureInstInfo info); + TextureInstInfo info); [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords, - const Value& offset, const Value& offset2, const F32& dref, - TextureInstInfo info); + const Value& offset, const F32& dref, TextureInstInfo info); [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, const U32& lod, const U32& multisampling, TextureInstInfo info); [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index aa03e3d6e..a8166125e 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -55,12 +55,14 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::StoreBufferF32x2: case Opcode::StoreBufferF32x3: case Opcode::StoreBufferF32x4: + case Opcode::StoreBufferFormatF32: + case Opcode::StoreBufferFormatF32x2: + case Opcode::StoreBufferFormatF32x3: + case Opcode::StoreBufferFormatF32x4: case Opcode::StoreBufferU32: case Opcode::WriteSharedU128: case Opcode::WriteSharedU64: case Opcode::WriteSharedU32: - case Opcode::WriteSharedU16: - case Opcode::WriteSharedU8: case Opcode::ImageWrite: case Opcode::ImageAtomicIAdd32: case Opcode::ImageAtomicSMin32: diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 9aefc8b39..4c6122a83 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -26,15 +26,9 @@ OPCODE(WorkgroupMemoryBarrier, Void, OPCODE(DeviceMemoryBarrier, Void, ) // Shared memory operations -OPCODE(LoadSharedU8, U32, U32, ) -OPCODE(LoadSharedS8, U32, U32, ) -OPCODE(LoadSharedU16, U32, U32, ) -OPCODE(LoadSharedS16, U32, U32, ) OPCODE(LoadSharedU32, U32, U32, ) OPCODE(LoadSharedU64, U32x2, U32, ) OPCODE(LoadSharedU128, U32x4, U32, ) -OPCODE(WriteSharedU8, Void, U32, U32, ) -OPCODE(WriteSharedU16, Void, U32, U32, ) OPCODE(WriteSharedU32, Void, U32, U32, ) OPCODE(WriteSharedU64, Void, U32, U32x2, ) OPCODE(WriteSharedU128, Void, U32, U32x4, ) @@ -88,6 +82,10 @@ OPCODE(StoreBufferF32, Void, Opaq OPCODE(StoreBufferF32x2, Void, Opaque, Opaque, F32x2, ) OPCODE(StoreBufferF32x3, Void, Opaque, Opaque, F32x3, ) OPCODE(StoreBufferF32x4, Void, Opaque, Opaque, F32x4, ) +OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, F32, ) +OPCODE(StoreBufferFormatF32x2, Void, Opaque, Opaque, F32x2, ) +OPCODE(StoreBufferFormatF32x3, Void, Opaque, Opaque, F32x3, ) +OPCODE(StoreBufferFormatF32x4, Void, Opaque, Opaque, F32x4, ) OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, ) // Vector utility @@ -219,6 +217,7 @@ OPCODE(FPIsNan32, U1, F32, OPCODE(FPIsNan64, U1, F64, ) OPCODE(FPIsInf32, U1, F32, ) OPCODE(FPIsInf64, U1, F64, ) +OPCODE(FPCmpClass32, U1, F32, U32 ) // Integer operations OPCODE(IAdd32, U32, U32, U32, ) @@ -227,6 +226,7 @@ OPCODE(IAddCary32, U32x2, U32, OPCODE(ISub32, U32, U32, U32, ) OPCODE(ISub64, U64, U64, U64, ) OPCODE(IMul32, U32, U32, U32, ) +OPCODE(IMul64, U64, U64, U64, ) OPCODE(SMulExt, U32x2, U32, U32, ) OPCODE(UMulExt, U32x2, U32, U32, ) OPCODE(SDiv32, U32, U32, U32, ) @@ -242,6 +242,7 @@ OPCODE(ShiftRightArithmetic32, U32, U32, OPCODE(ShiftRightArithmetic64, U64, U64, U32, ) OPCODE(BitwiseAnd32, U32, U32, U32, ) OPCODE(BitwiseOr32, U32, U32, U32, ) +OPCODE(BitwiseOr64, U64, U64, U64, ) OPCODE(BitwiseXor32, U32, U32, U32, ) OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) @@ -252,14 +253,17 @@ OPCODE(BitwiseNot32, U32, U32, OPCODE(FindSMsb32, U32, U32, ) OPCODE(FindUMsb32, U32, U32, ) +OPCODE(FindILsb32, U32, U32, ) OPCODE(SMin32, U32, U32, U32, ) OPCODE(UMin32, U32, U32, U32, ) OPCODE(SMax32, U32, U32, U32, ) OPCODE(UMax32, U32, U32, U32, ) OPCODE(SClamp32, U32, U32, U32, U32, ) OPCODE(UClamp32, U32, U32, U32, U32, ) -OPCODE(SLessThan, U1, U32, U32, ) -OPCODE(ULessThan, U1, U32, U32, ) +OPCODE(SLessThan32, U1, U32, U32, ) +OPCODE(SLessThan64, U1, U64, U64, ) +OPCODE(ULessThan32, U1, U32, U32, ) +OPCODE(ULessThan64, U1, U64, U64, ) OPCODE(IEqual, U1, U32, U32, ) OPCODE(SLessThanEqual, U1, U32, U32, ) OPCODE(ULessThanEqual, U1, U32, U32, ) @@ -289,14 +293,15 @@ OPCODE(ConvertF64S32, F64, U32, OPCODE(ConvertF64U32, F64, U32, ) OPCODE(ConvertF32U16, F32, U16, ) OPCODE(ConvertU16U32, U16, U32, ) +OPCODE(ConvertU32U16, U32, U16, ) // Image operations -OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) -OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) -OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, ) -OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, ) -OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, ) -OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, ) +OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, F32, Opaque, ) +OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, U32, Opaque, ) +OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, Opaque, F32, Opaque, ) +OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, Opaque, U32, Opaque, ) +OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, ) +OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, ) OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) @@ -319,4 +324,5 @@ OPCODE(ImageAtomicExchange32, U32, Opaq // Warp operations OPCODE(LaneId, U32, ) +OPCODE(WarpId, U32, ) OPCODE(QuadShuffle, U32, U32, U32 ) diff --git a/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp index 7cd896fbd..94218b32f 100644 --- a/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp +++ b/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp @@ -21,6 +21,8 @@ template return value.F32(); } else if constexpr (std::is_same_v) { return value.U64(); + } else if constexpr (std::is_same_v) { + return static_cast(value.U64()); } } @@ -236,6 +238,18 @@ void FoldBooleanConvert(IR::Inst& inst) { } } +void FoldCmpClass(IR::Inst& inst) { + ASSERT_MSG(inst.Arg(1).IsImmediate(), "Unable to resolve compare operation"); + const auto class_mask = static_cast(inst.Arg(1).U32()); + if ((class_mask & IR::FloatClassFunc::NaN) == IR::FloatClassFunc::NaN) { + inst.ReplaceOpcode(IR::Opcode::FPIsNan32); + } else if ((class_mask & IR::FloatClassFunc::Infinity) == IR::FloatClassFunc::Infinity) { + inst.ReplaceOpcode(IR::Opcode::FPIsInf32); + } else { + UNREACHABLE(); + } +} + void ConstantPropagation(IR::Block& block, IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::IAdd32: @@ -249,6 +263,9 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::IMul32: FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; }); return; + case IR::Opcode::FPCmpClass32: + FoldCmpClass(inst); + return; case IR::Opcode::ShiftRightArithmetic32: FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return static_cast(a >> b); }); return; @@ -281,12 +298,18 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return FoldLogicalOr(inst); case IR::Opcode::LogicalNot: return FoldLogicalNot(inst); - case IR::Opcode::SLessThan: + case IR::Opcode::SLessThan32: FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; }); return; - case IR::Opcode::ULessThan: + case IR::Opcode::SLessThan64: + FoldWhenAllImmediates(inst, [](s64 a, s64 b) { return a < b; }); + return; + case IR::Opcode::ULessThan32: FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); return; + case IR::Opcode::ULessThan64: + FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a < b; }); + return; case IR::Opcode::SLessThanEqual: FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; }); return; diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index bf2ba4d66..7e2b962b5 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -14,5 +14,6 @@ void DeadCodeEliminationPass(IR::Program& program); void ConstantPropagationPass(IR::BlockList& program); void ResourceTrackingPass(IR::Program& program); void CollectShaderInfoPass(IR::Program& program); +void LowerSharedMemToRegisters(IR::Program& program); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp b/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp new file mode 100644 index 000000000..a87cf31b1 --- /dev/null +++ b/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp @@ -0,0 +1,39 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include "shader_recompiler/ir/program.h" + +namespace Shader::Optimization { + +void LowerSharedMemToRegisters(IR::Program& program) { + boost::container::small_vector ds_writes; + Info& info{program.info}; + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + const auto opcode = inst.GetOpcode(); + if (opcode == IR::Opcode::WriteSharedU32 || opcode == IR::Opcode::WriteSharedU64) { + ds_writes.emplace_back(&inst); + continue; + } + if (opcode == IR::Opcode::LoadSharedU32 || opcode == IR::Opcode::LoadSharedU64) { + // Search for write instruction with same offset + const IR::Inst* prod = inst.Arg(0).InstRecursive(); + const auto it = std::ranges::find_if(ds_writes, [&](const IR::Inst* write) { + const IR::Inst* write_prod = write->Arg(0).InstRecursive(); + return write_prod->Arg(1).U32() == prod->Arg(1).U32() && + write_prod->Arg(0) == prod->Arg(0); + }); + ASSERT(it != ds_writes.end()); + // Replace data read with value written. + inst.ReplaceUsesWith((*it)->Arg(1)); + } + } + } + // We should have eliminated everything. Invalidate data write instructions. + for (const auto inst : ds_writes) { + inst->Invalidate(); + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 6526ece62..97fc5b999 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include #include #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/breadth_first_search.h" @@ -38,6 +37,10 @@ bool IsBufferInstruction(const IR::Inst& inst) { case IR::Opcode::StoreBufferF32x2: case IR::Opcode::StoreBufferF32x3: case IR::Opcode::StoreBufferF32x4: + case IR::Opcode::StoreBufferFormatF32: + case IR::Opcode::StoreBufferFormatF32x2: + case IR::Opcode::StoreBufferFormatF32x3: + case IR::Opcode::StoreBufferFormatF32x4: case IR::Opcode::StoreBufferU32: return true; default: @@ -74,6 +77,10 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) { case IR::Opcode::LoadBufferFormatF32x2: case IR::Opcode::LoadBufferFormatF32x3: case IR::Opcode::LoadBufferFormatF32x4: + case IR::Opcode::StoreBufferFormatF32: + case IR::Opcode::StoreBufferFormatF32x2: + case IR::Opcode::StoreBufferFormatF32x3: + case IR::Opcode::StoreBufferFormatF32x4: switch (num_format) { case AmdGpu::NumberFormat::Unorm: case AmdGpu::NumberFormat::Snorm: @@ -113,6 +120,10 @@ bool IsBufferStore(const IR::Inst& inst) { case IR::Opcode::StoreBufferF32x2: case IR::Opcode::StoreBufferF32x3: case IR::Opcode::StoreBufferF32x4: + case IR::Opcode::StoreBufferFormatF32: + case IR::Opcode::StoreBufferFormatF32x2: + case IR::Opcode::StoreBufferFormatF32x3: + case IR::Opcode::StoreBufferFormatF32x4: case IR::Opcode::StoreBufferU32: return true; default: @@ -172,12 +183,27 @@ bool IsImageStorageInstruction(const IR::Inst& inst) { } } +u32 ImageOffsetArgumentPosition(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::ImageGather: + case IR::Opcode::ImageGatherDref: + return 2; + case IR::Opcode::ImageSampleExplicitLod: + case IR::Opcode::ImageSampleImplicitLod: + return 3; + case IR::Opcode::ImageSampleDrefExplicitLod: + case IR::Opcode::ImageSampleDrefImplicitLod: + return 4; + default: + UNREACHABLE(); + } +} + class Descriptors { public: - explicit Descriptors(BufferResourceList& buffer_resources_, ImageResourceList& image_resources_, - SamplerResourceList& sampler_resources_) - : buffer_resources{buffer_resources_}, image_resources{image_resources_}, - sampler_resources{sampler_resources_} {} + explicit Descriptors(Info& info_) + : info{info_}, buffer_resources{info_.buffers}, image_resources{info_.images}, + sampler_resources{info_.samplers} {} u32 Add(const BufferResource& desc) { const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) { @@ -189,6 +215,7 @@ public: ASSERT(buffer.length == desc.length); buffer.is_storage |= desc.is_storage; buffer.used_types |= desc.used_types; + buffer.is_written |= desc.is_written; return index; } @@ -202,9 +229,16 @@ public: } u32 Add(const SamplerResource& desc) { - const u32 index{Add(sampler_resources, desc, [&desc](const auto& existing) { - return desc.sgpr_base == existing.sgpr_base && - desc.dword_offset == existing.dword_offset; + const u32 index{Add(sampler_resources, desc, [this, &desc](const auto& existing) { + if (desc.sgpr_base == existing.sgpr_base && + desc.dword_offset == existing.dword_offset) { + return true; + } + // Samplers with different bindings might still be the same. + const auto old_sharp = + info.ReadUd(existing.sgpr_base, existing.dword_offset); + const auto new_sharp = info.ReadUd(desc.sgpr_base, desc.dword_offset); + return old_sharp == new_sharp; })}; return index; } @@ -220,6 +254,7 @@ private: return static_cast(descriptors.size()) - 1; } + const Info& info; BufferResourceList& buffer_resources; ImageResourceList& image_resources; SamplerResourceList& sampler_resources; @@ -273,9 +308,18 @@ std::pair TryDisableAnisoLod0(const IR::Inst* inst) { } SharpLocation TrackSharp(const IR::Inst* inst) { - while (inst->GetOpcode() == IR::Opcode::Phi) { - inst = inst->Arg(0).InstRecursive(); - } + // Search until we find a potential sharp source. + const auto pred0 = [](const IR::Inst* inst) -> std::optional { + if (inst->GetOpcode() == IR::Opcode::GetUserData || + inst->GetOpcode() == IR::Opcode::ReadConst) { + return inst; + } + return std::nullopt; + }; + const auto result = IR::BreadthFirstSearch(inst, pred0); + ASSERT_MSG(result, "Unable to track sharp source"); + inst = result.value(); + // If its from user data not much else to do. if (inst->GetOpcode() == IR::Opcode::GetUserData) { return SharpLocation{ .sgpr_base = u32(IR::ScalarReg::Max), @@ -289,14 +333,14 @@ SharpLocation TrackSharp(const IR::Inst* inst) { const IR::Inst* spgpr_base = inst->Arg(0).InstRecursive(); // Retrieve SGPR pair that holds sbase - const auto pred = [](const IR::Inst* inst) -> std::optional { + const auto pred1 = [](const IR::Inst* inst) -> std::optional { if (inst->GetOpcode() == IR::Opcode::GetUserData) { return inst->Arg(0).ScalarReg(); } return std::nullopt; }; - const auto base0 = IR::BreadthFirstSearch(spgpr_base->Arg(0), pred); - const auto base1 = IR::BreadthFirstSearch(spgpr_base->Arg(1), pred); + const auto base0 = IR::BreadthFirstSearch(spgpr_base->Arg(0), pred1); + const auto base1 = IR::BreadthFirstSearch(spgpr_base->Arg(1), pred1); ASSERT_MSG(base0 && base1, "Nested resource loads not supported"); // Return retrieved location. @@ -320,16 +364,6 @@ static bool IsLoadBufferFormat(const IR::Inst& inst) { } } -static bool IsReadConstBuffer(const IR::Inst& inst) { - switch (inst.GetOpcode()) { - case IR::Opcode::ReadConstBuffer: - case IR::Opcode::ReadConstBufferU32: - return true; - default: - return false; - } -} - static u32 BufferLength(const AmdGpu::Buffer& buffer) { const auto stride = buffer.GetStride(); if (stride < sizeof(f32)) { @@ -370,9 +404,11 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors, return -1; } // We have found this pattern. Build the sharp. - std::array buffer; + std::array buffer; buffer[0] = info.pgm_base + p0->Arg(0).U32() + p0->Arg(1).U32(); - buffer[1] = handle->Arg(2).U32() | handle->Arg(3).U64() << 32; + buffer[1] = 0; + buffer[2] = handle->Arg(2).U32(); + buffer[3] = handle->Arg(3).U32(); cbuf = std::bit_cast(buffer); // Assign a binding to this sharp. return descriptors.Add(BufferResource{ @@ -393,30 +429,37 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, IR::Inst* handle = inst.Arg(0).InstRecursive(); IR::Inst* producer = handle->Arg(0).InstRecursive(); const auto sharp = TrackSharp(producer); + const bool is_store = IsBufferStore(inst); buffer = info.ReadUd(sharp.sgpr_base, sharp.dword_offset); binding = descriptors.Add(BufferResource{ .sgpr_base = sharp.sgpr_base, .dword_offset = sharp.dword_offset, .length = BufferLength(buffer), .used_types = BufferDataType(inst, buffer.GetNumberFmt()), - .is_storage = IsBufferStore(inst) || buffer.GetSize() > MaxUboSize, + .is_storage = is_store || buffer.GetSize() > MaxUboSize, + .is_written = is_store, }); } + // Update buffer descriptor format. const auto inst_info = inst.Flags(); - IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; - // Replace handle with binding index in buffer resource list. - inst.SetArg(0, ir.Imm32(binding)); - ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable); + auto& buffer_desc = info.buffers[binding]; if (inst_info.is_typed) { - ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float && - (inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32 || - inst_info.dmft == AmdGpu::DataFormat::Format32_32_32 || - inst_info.dmft == AmdGpu::DataFormat::Format32_32 || - inst_info.dmft == AmdGpu::DataFormat::Format32)); + buffer_desc.dfmt = inst_info.dmft; + buffer_desc.nfmt = inst_info.nfmt; + } else { + buffer_desc.dfmt = buffer.GetDataFmt(); + buffer_desc.nfmt = buffer.GetNumberFmt(); } - if (IsReadConstBuffer(inst)) { + // Replace handle with binding index in buffer resource list. + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.SetArg(0, ir.Imm32(binding)); + ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable); + + // Address of constant buffer reads can be calculated at IR emittion time. + if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer || + inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) { return; } @@ -426,10 +469,14 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, } } else { const u32 stride = buffer.GetStride(); - ASSERT_MSG(stride >= 4, "non-formatting load_buffer_* is not implemented for stride {}", - stride); + if (stride < 4) { + LOG_WARNING(Render_Vulkan, + "non-formatting load_buffer_* is not implemented for stride {}", stride); + } } + // Compute address of the buffer using the stride. + // Todo: What if buffer is rebound with different stride? IR::U32 address = ir.Imm32(inst_info.inst_offset.Value()); if (inst_info.index_enable) { const IR::U32 index = inst_info.offset_enable ? IR::U32{ir.CompositeExtract(inst.Arg(1), 0)} @@ -456,36 +503,33 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& } void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { - std::deque insts{&inst}; - const auto& pred = [](auto opcode) -> bool { - return (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler) - opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only) - opcode == IR::Opcode::GetUserData); + const auto pred = [](const IR::Inst* inst) -> std::optional { + const auto opcode = inst->GetOpcode(); + if (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler) + opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only) + opcode == IR::Opcode::GetUserData) { + return inst; + } + return std::nullopt; }; - - IR::Inst* producer{}; - while (!insts.empty() && (producer = insts.front(), !pred(producer->GetOpcode()))) { - for (auto arg_idx = 0u; arg_idx < producer->NumArgs(); ++arg_idx) { - const auto arg = producer->Arg(arg_idx); - if (arg.TryInstRecursive()) { - insts.push_back(arg.InstRecursive()); - } - } - insts.pop_front(); - } - ASSERT(pred(producer->GetOpcode())); - auto [tsharp_handle, ssharp_handle] = [&] -> std::pair { - if (producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2) { - return std::make_pair(producer->Arg(0).InstRecursive(), - producer->Arg(1).InstRecursive()); - } - return std::make_pair(producer, nullptr); - }(); + const auto result = IR::BreadthFirstSearch(&inst, pred); + ASSERT_MSG(result, "Unable to find image sharp source"); + const IR::Inst* producer = result.value(); + const bool has_sampler = producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2; + const auto tsharp_handle = has_sampler ? producer->Arg(0).InstRecursive() : producer; // Read image sharp. const auto tsharp = TrackSharp(tsharp_handle); const auto image = info.ReadUd(tsharp.sgpr_base, tsharp.dword_offset); const auto inst_info = inst.Flags(); + if (!image.Valid()) { + LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!"); + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.ReplaceUsesWith( + ir.CompositeConstruct(ir.Imm32(0.f), ir.Imm32(0.f), ir.Imm32(0.f), ir.Imm32(0.f))); + return; + } + ASSERT(image.GetType() != AmdGpu::ImageType::Invalid); u32 image_binding = descriptors.Add(ImageResource{ .sgpr_base = tsharp.sgpr_base, .dword_offset = tsharp.dword_offset, @@ -496,17 +540,32 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip }); // Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions - if (ssharp_handle) { + const u32 sampler_binding = [&] { + if (!has_sampler) { + return 0U; + } + const IR::Value& handle = producer->Arg(1); + // Inline sampler resource. + if (handle.IsImmediate()) { + LOG_WARNING(Render_Vulkan, "Inline sampler detected"); + return descriptors.Add(SamplerResource{ + .sgpr_base = std::numeric_limits::max(), + .dword_offset = 0, + .inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()}, + }); + } + // Normal sampler resource. + const auto ssharp_handle = handle.InstRecursive(); const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle); const auto ssharp = TrackSharp(ssharp_ud); - const u32 sampler_binding = descriptors.Add(SamplerResource{ + return descriptors.Add(SamplerResource{ .sgpr_base = ssharp.sgpr_base, .dword_offset = ssharp.dword_offset, .associated_image = image_binding, .disable_aniso = disable_aniso, }); - image_binding |= (sampler_binding << 16); - } + }(); + image_binding |= (sampler_binding << 16); // Patch image handle IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; @@ -543,25 +602,43 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip if (inst_info.has_offset) { // The offsets are six-bit signed integers: X=[5:0], Y=[13:8], and Z=[21:16]. - const bool is_gather = inst.GetOpcode() == IR::Opcode::ImageGather || - inst.GetOpcode() == IR::Opcode::ImageGatherDref; - const u32 arg_pos = is_gather ? 2 : (inst_info.is_depth ? 4 : 3); + const u32 arg_pos = ImageOffsetArgumentPosition(inst); const IR::Value arg = inst.Arg(arg_pos); ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type"); - const auto sign_ext = [&](u32 value) { return ir.Imm32(s32(value << 24) >> 24); }; - union { - u32 raw; - BitField<0, 6, u32> x; - BitField<8, 6, u32> y; - BitField<16, 6, u32> z; - } offset{arg.U32()}; - const IR::Value value = ir.CompositeConstruct(sign_ext(offset.x), sign_ext(offset.y)); - inst.SetArg(arg_pos, value); + + const auto read = [&](u32 offset) -> auto { + return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(offset), ir.Imm32(6), true); + }; + + switch (image.GetType()) { + case AmdGpu::ImageType::Color1D: + case AmdGpu::ImageType::Color1DArray: + inst.SetArg(arg_pos, read(0)); + break; + case AmdGpu::ImageType::Color2D: + case AmdGpu::ImageType::Color2DArray: + inst.SetArg(arg_pos, ir.CompositeConstruct(read(0), read(8))); + break; + case AmdGpu::ImageType::Color3D: + inst.SetArg(arg_pos, ir.CompositeConstruct(read(0), read(8), read(16))); + break; + default: + UNREACHABLE(); + } } if (inst_info.has_lod_clamp) { - // Final argument contains lod_clamp - const u32 arg_pos = inst_info.is_depth ? 5 : 4; + const u32 arg_pos = [&]() -> u32 { + switch (inst.GetOpcode()) { + case IR::Opcode::ImageSampleImplicitLod: + return 2; + case IR::Opcode::ImageSampleDrefImplicitLod: + return 3; + default: + break; + } + return inst_info.is_depth ? 5 : 4; + }(); inst.SetArg(arg_pos, arg); } if (inst_info.explicit_lod) { @@ -569,45 +646,16 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod || inst.GetOpcode() == IR::Opcode::ImageSampleDrefExplicitLod); const u32 pos = inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod ? 2 : 3; - inst.SetArg(pos, arg); + const IR::Value value = inst_info.force_level0 ? ir.Imm32(0.f) : arg; + inst.SetArg(pos, value); } } void ResourceTrackingPass(IR::Program& program) { - // When loading data from untyped buffer we don't have if it is float or integer. - // Most of the time it is float so that is the default. This pass detects float buffer loads - // combined with bitcasts and patches them to be integer loads. - for (IR::Block* const block : program.post_order_blocks) { - break; - for (IR::Inst& inst : block->Instructions()) { - if (inst.GetOpcode() != IR::Opcode::BitCastU32F32) { - continue; - } - // Replace the bitcast with a typed buffer read - IR::Inst* const arg_inst{inst.Arg(0).TryInstRecursive()}; - if (!arg_inst) { - continue; - } - const auto replace{[&](IR::Opcode new_opcode) { - inst.ReplaceOpcode(new_opcode); - inst.SetArg(0, arg_inst->Arg(0)); - inst.SetArg(1, arg_inst->Arg(1)); - inst.SetFlags(arg_inst->Flags()); - arg_inst->Invalidate(); - }}; - if (arg_inst->GetOpcode() == IR::Opcode::ReadConstBuffer) { - replace(IR::Opcode::ReadConstBufferU32); - } - if (arg_inst->GetOpcode() == IR::Opcode::LoadBufferF32) { - replace(IR::Opcode::LoadBufferU32); - } - } - } - // Iterate resource instructions and patch them after finding the sharp. auto& info = program.info; - Descriptors descriptors{info.buffers, info.images, info.samplers}; - for (IR::Block* const block : program.post_order_blocks) { + Descriptors descriptors{info}; + for (IR::Block* const block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { if (IsBufferInstruction(inst)) { PatchBufferInstruction(*block, inst, info, descriptors); diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index b51ce94ee..52087a653 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -16,15 +16,11 @@ void Visit(Info& info, IR::Inst& inst) { info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32()); break; } - case IR::Opcode::LoadSharedS8: - case IR::Opcode::LoadSharedU8: - case IR::Opcode::WriteSharedU8: - info.uses_shared_u8 = true; - break; - case IR::Opcode::LoadSharedS16: - case IR::Opcode::LoadSharedU16: - case IR::Opcode::WriteSharedU16: - info.uses_shared_u16 = true; + case IR::Opcode::LoadSharedU32: + case IR::Opcode::LoadSharedU64: + case IR::Opcode::WriteSharedU32: + case IR::Opcode::WriteSharedU64: + info.uses_shared = true; break; case IR::Opcode::ConvertF32F16: case IR::Opcode::BitCastF16U16: diff --git a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp index 6a43ad6be..eef73a659 100644 --- a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp @@ -129,19 +129,19 @@ IR::Opcode UndefOpcode(IR::VectorReg) noexcept { return IR::Opcode::UndefU32; } -IR::Opcode UndefOpcode(const VccLoTag&) noexcept { +IR::Opcode UndefOpcode(const VccLoTag) noexcept { return IR::Opcode::UndefU32; } -IR::Opcode UndefOpcode(const SccLoTag&) noexcept { +IR::Opcode UndefOpcode(const SccLoTag) noexcept { return IR::Opcode::UndefU32; } -IR::Opcode UndefOpcode(const VccHiTag&) noexcept { +IR::Opcode UndefOpcode(const VccHiTag) noexcept { return IR::Opcode::UndefU32; } -IR::Opcode UndefOpcode(const FlagTag&) noexcept { +IR::Opcode UndefOpcode(const FlagTag) noexcept { return IR::Opcode::UndefU1; } @@ -348,13 +348,15 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetThreadBitScalarReg: case IR::Opcode::GetScalarRegister: { const IR::ScalarReg reg{inst.Arg(0).ScalarReg()}; - inst.ReplaceUsesWith( - pass.ReadVariable(reg, block, opcode == IR::Opcode::GetThreadBitScalarReg)); + const bool thread_bit = opcode == IR::Opcode::GetThreadBitScalarReg; + const IR::Value value = pass.ReadVariable(reg, block, thread_bit); + inst.ReplaceUsesWith(value); break; } case IR::Opcode::GetVectorRegister: { const IR::VectorReg reg{inst.Arg(0).VectorReg()}; - inst.ReplaceUsesWith(pass.ReadVariable(reg, block)); + const IR::Value value = pass.ReadVariable(reg, block); + inst.ReplaceUsesWith(value); break; } case IR::Opcode::GetGotoVariable: diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index d9e9b0307..e3d04260b 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/bit_field.h" +#include "common/enum.h" #include "common/types.h" #include "video_core/amdgpu/pixel_format.h" @@ -24,6 +25,23 @@ enum class FpDenormMode : u32 { InOutAllow = 3, }; +enum class FloatClassFunc : u32 { + SignalingNan = 1 << 0, + QuietNan = 1 << 1, + NegativeInfinity = 1 << 2, + NegativeNormal = 1 << 3, + NegativeDenorm = 1 << 4, + NegativeZero = 1 << 5, + PositiveZero = 1 << 6, + PositiveDenorm = 1 << 7, + PositiveNormal = 1 << 8, + PositiveInfinity = 1 << 9, + + NaN = SignalingNan | QuietNan, + Infinity = PositiveInfinity | NegativeInfinity, +}; +DECLARE_ENUM_FLAG_OPERATORS(FloatClassFunc) + union Mode { BitField<0, 4, FpRoundMode> fp_round; BitField<4, 2, FpDenormMode> fp_denorm_single; diff --git a/src/shader_recompiler/ir/value.h b/src/shader_recompiler/ir/value.h index a43c17f5b..db939eaa5 100644 --- a/src/shader_recompiler/ir/value.h +++ b/src/shader_recompiler/ir/value.h @@ -220,6 +220,7 @@ using F16 = TypedValue; using F32 = TypedValue; using F64 = TypedValue; using U32F32 = TypedValue; +using U64F64 = TypedValue; using U32U64 = TypedValue; using U16U32U64 = TypedValue; using F32F64 = TypedValue; diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 54b347300..badd54554 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -9,6 +9,7 @@ namespace Shader { struct Profile { u32 supported_spirv{0x00010000}; + u32 subgroup_size{}; bool unified_descriptor_binding{}; bool support_descriptor_aliasing{}; bool support_int8{}; diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index f2834abf1..0f9fd6d41 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -27,8 +27,9 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { return blocks; } -IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, - std::span token, const Info&& info) { +IR::Program TranslateProgram(Common::ObjectPool& inst_pool, + Common::ObjectPool& block_pool, std::span token, + const Info&& info, const Profile& profile) { // Ensure first instruction is expected. constexpr u32 token_mov_vcchi = 0xBEEB03FF; ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm"); @@ -44,12 +45,12 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool gcn_block_pool{64}; + Common::ObjectPool gcn_block_pool{64}; Gcn::CFG cfg{gcn_block_pool, program.ins_list}; // Structurize control flow graph and create program. program.info = std::move(info); - program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info); + program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info, profile); program.blocks = GenerateBlocks(program.syntax_list); program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); @@ -57,12 +58,13 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& inst_pool, - ObjectPool& block_pool, - std::span code, const Info&& info); +struct Profile; + +[[nodiscard]] IR::Program TranslateProgram(Common::ObjectPool& inst_pool, + Common::ObjectPool& block_pool, + std::span code, const Info&& info, + const Profile& profile); } // namespace Shader diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 8824e344b..674d7c5f9 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -77,8 +77,11 @@ struct BufferResource { u32 length; IR::Type used_types; AmdGpu::Buffer inline_cbuf; - bool is_storage{false}; - bool is_instance_data{false}; + AmdGpu::DataFormat dfmt; + AmdGpu::NumberFormat nfmt; + bool is_storage{}; + bool is_instance_data{}; + bool is_written{}; constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept; }; @@ -97,11 +100,27 @@ using ImageResourceList = boost::container::static_vector; struct SamplerResource { u32 sgpr_base; u32 dword_offset; + AmdGpu::Sampler inline_sampler{}; u32 associated_image : 4; u32 disable_aniso : 1; + + constexpr AmdGpu::Sampler GetSsharp(const Info& info) const noexcept; }; using SamplerResourceList = boost::container::static_vector; +struct PushData { + static constexpr size_t BufOffsetIndex = 2; + + u32 step0; + u32 step1; + std::array buf_offsets; + + void AddOffset(u32 binding, u32 offset) { + ASSERT(offset < 256 && binding < buf_offsets.size()); + buf_offsets[binding] = offset; + } +}; + struct Info { struct VsInput { enum InstanceIdType : u8 { @@ -175,9 +194,9 @@ struct Info { bool has_image_gather{}; bool has_image_query{}; bool uses_group_quad{}; - bool uses_shared_u8{}; - bool uses_shared_u16{}; + bool uses_shared{}; bool uses_fp16{}; + bool uses_step_rates{}; bool translation_failed{}; // indicates that shader has unsupported instructions template @@ -196,6 +215,10 @@ constexpr AmdGpu::Buffer BufferResource::GetVsharp(const Info& info) const noexc return inline_cbuf ? inline_cbuf : info.ReadUd(sgpr_base, dword_offset); } +constexpr AmdGpu::Sampler SamplerResource::GetSsharp(const Info& info) const noexcept { + return inline_sampler ? inline_sampler : info.ReadUd(sgpr_base, dword_offset); +} + } // namespace Shader template <> @@ -203,7 +226,7 @@ struct fmt::formatter { constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - auto format(const Shader::Stage& stage, format_context& ctx) const { + auto format(const Shader::Stage stage, format_context& ctx) const { constexpr static std::array names = {"fs", "vs", "gs", "es", "hs", "ls", "cs"}; return fmt::format_to(ctx.out(), "{}", names[static_cast(stage)]); } diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index ab7ad2415..dce2d4b42 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -5,8 +5,10 @@ #include "common/debug.h" #include "common/polyfill_thread.h" #include "common/thread.h" +#include "core/libraries/videoout/driver.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" +#include "video_core/renderdoc.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" namespace AmdGpu { @@ -32,15 +34,34 @@ void Liverpool::Process(std::stop_token stoken) { while (!stoken.stop_requested()) { { std::unique_lock lk{submit_mutex}; - Common::CondvarWait(submit_cv, lk, stoken, [this] { return num_submits != 0; }); + Common::CondvarWait(submit_cv, lk, stoken, + [this] { return num_commands || num_submits || submit_done; }); } if (stoken.stop_requested()) { break; } + VideoCore::StartCapture(); + int qid = -1; - while (num_submits) { + while (num_submits || num_commands) { + + // Process incoming commands with high priority + while (num_commands) { + + Common::UniqueFunction callback{}; + { + std::unique_lock lk{submit_mutex}; + callback = std::move(command_queue.back()); + command_queue.pop(); + } + + callback(); + + --num_commands; + } + qid = (qid + 1) % NumTotalQueues; auto& queue = mapped_queues[qid]; @@ -48,11 +69,9 @@ void Liverpool::Process(std::stop_token stoken) { Task::Handle task{}; { std::scoped_lock lock{queue.m_access}; - if (queue.submits.empty()) { continue; } - task = queue.submits.front(); } task.resume(); @@ -64,9 +83,20 @@ void Liverpool::Process(std::stop_token stoken) { queue.submits.pop(); --num_submits; + std::scoped_lock lock2{submit_mutex}; + submit_cv.notify_all(); } } + if (submit_done) { + VideoCore::EndCapture(); + + if (rasterizer) { + rasterizer->Flush(); + } + submit_done = false; + } + Platform::IrqC::Instance()->Signal(Platform::InterruptId::GpuIdle); } } @@ -166,6 +196,17 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanSignal(Platform::InterruptId::GfxFlip); break; } + case PM4CmdNop::PayloadType::DebugMarkerPush: { + const auto marker_sz = nop->header.count.Value() * 2; + const std::string_view label{reinterpret_cast(&nop->data_block[1]), + marker_sz}; + rasterizer->ScopeMarkerBegin(label); + break; + } + case PM4CmdNop::PayloadType::DebugMarkerPop: { + rasterizer->ScopeMarkerEnd(); + break; + } default: break; } @@ -194,7 +235,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span dcb, std::spantype3.count; - if (nop_offset == 0x0e || nop_offset == 0x0d) { + if (nop_offset == 0x0e || nop_offset == 0x0d || nop_offset == 0x0b) { ASSERT_MSG(payload[nop_offset] == 0xc0001000, "NOP hint is missing in CB setup sequence"); last_cb_extent[col_buf_id].raw = payload[nop_offset + 1]; @@ -281,8 +322,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanindex_count; regs.draw_initiator = draw_index->draw_initiator; if (rasterizer) { - rasterizer->ScopeMarkerBegin( - fmt::format("dcb:{}:DrawIndex2", reinterpret_cast(dcb.data()))); + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndex2", cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->Draw(true); rasterizer->ScopeMarkerEnd(); } @@ -294,8 +336,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanindex_count; regs.draw_initiator = draw_index_off->draw_initiator; if (rasterizer) { - rasterizer->ScopeMarkerBegin(fmt::format( - "dcb:{}:DrawIndexOffset2", reinterpret_cast(dcb.data()))); + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndexOffset2", cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->Draw(true, draw_index_off->index_offset); rasterizer->ScopeMarkerEnd(); } @@ -306,8 +349,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanindex_count; regs.draw_initiator = draw_index->draw_initiator; if (rasterizer) { - rasterizer->ScopeMarkerBegin( - fmt::format("dcb:{}:DrawIndexAuto", reinterpret_cast(dcb.data()))); + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndexAuto", cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->Draw(false); rasterizer->ScopeMarkerEnd(); } @@ -320,8 +364,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spandim_z; regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { - rasterizer->ScopeMarkerBegin( - fmt::format("dcb:{}:Dispatch", reinterpret_cast(dcb.data()))); + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:Dispatch", cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->DispatchDirect(); rasterizer->ScopeMarkerEnd(); } @@ -365,8 +410,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5); const u32 data_size = (header->type3.count.Value() - 2) * 4; + u64* address = write_data->Address(); if (!write_data->wr_one_addr.Value()) { - std::memcpy(write_data->Address(), write_data->data, data_size); + std::memcpy(address, write_data->data, data_size); } else { UNREACHABLE(); } @@ -378,11 +424,21 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); - ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); + // ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); + // Optimization: VO label waits are special because the emulator + // will write to the label when presentation is finished. So if + // there are no other submits to yield to we can sleep the thread + // instead and allow other tasks to run. + const u64* wait_addr = wait_reg_mem->Address(); + if (vo_port->IsVoLabel(wait_addr) && num_submits == 1) { + vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); }); + } while (!wait_reg_mem->Test()) { + mapped_queues[GfxQueueId].cs_state = regs.cs_program; TracyFiberLeave; co_yield {}; TracyFiberEnter(dcb_task_name); + regs.cs_program = mapped_queues[GfxQueueId].cs_state; } break; } @@ -461,8 +517,9 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { regs.cs_program.dim_z = dispatch_direct->dim_z; regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { - rasterizer->ScopeMarkerBegin(fmt::format( - "acb[{}]:{}:Dispatch", vqid, reinterpret_cast(acb.data()))); + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->DispatchDirect(); rasterizer->ScopeMarkerEnd(); } @@ -483,9 +540,11 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { const auto* wait_reg_mem = reinterpret_cast(header); ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); while (!wait_reg_mem->Test()) { + mapped_queues[vqid].cs_state = regs.cs_program; TracyFiberLeave; co_yield {}; TracyFiberEnter(acb_task_name); + regs.cs_program = mapped_queues[vqid].cs_state; } break; } @@ -506,12 +565,11 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { } void Liverpool::SubmitGfx(std::span dcb, std::span ccb) { - static constexpr u32 GfxQueueId = 0u; auto& queue = mapped_queues[GfxQueueId]; auto task = ProcessGraphics(dcb, ccb); { - std::unique_lock lock{queue.m_access}; + std::scoped_lock lock{queue.m_access}; queue.submits.emplace(task.handle); } @@ -526,7 +584,7 @@ void Liverpool::SubmitAsc(u32 vqid, std::span acb) { const auto& task = ProcessCompute(acb, vqid); { - std::unique_lock lock{queue.m_access}; + std::scoped_lock lock{queue.m_access}; queue.submits.emplace(task.handle); } diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index b87c80ed9..706da8ec5 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -6,14 +6,17 @@ #include #include #include +#include #include #include #include #include + #include "common/assert.h" #include "common/bit_field.h" #include "common/polyfill_thread.h" #include "common/types.h" +#include "common/unique_function.h" #include "video_core/amdgpu/pixel_format.h" #include "video_core/amdgpu/resource.h" @@ -21,6 +24,10 @@ namespace Vulkan { class Rasterizer; } +namespace Libraries::VideoOut { +struct VideoOutPort; +} + namespace AmdGpu { #define GFX6_3D_REG_INDEX(field_name) (offsetof(AmdGpu::Liverpool::Regs, field_name) / sizeof(u32)) @@ -31,6 +38,7 @@ namespace AmdGpu { [[maybe_unused]] std::array CONCAT2(pad, __LINE__) struct Liverpool { + static constexpr u32 GfxQueueId = 0u; static constexpr u32 NumGfxRings = 1u; // actually 2, but HP is reserved by system software static constexpr u32 NumComputePipes = 7u; // actually 8, but #7 is reserved by system software static constexpr u32 NumQueuesPerPipe = 8u; @@ -372,9 +380,13 @@ struct Liverpool { return 1u << z_info.num_samples; // spec doesn't say it is a log2 } + u32 NumBits() const { + return z_info.format == ZFormat::Z32Float ? 32 : 16; + } + size_t GetDepthSliceSize() const { ASSERT(z_info.format != ZFormat::Invalid); - const auto bpe = z_info.format == ZFormat::Z32Float ? 4 : 2; + const auto bpe = NumBits() >> 3; // in bytes return (depth_slice.tile_max + 1) * 64 * bpe * NumSamples(); } }; @@ -486,7 +498,7 @@ struct Liverpool { template T Address() const { - return reinterpret_cast((base_addr_lo & ~1U) | u64(base_addr_hi) << 32); + return std::bit_cast((base_addr_lo & ~1U) | u64(base_addr_hi) << 32); } }; @@ -756,7 +768,8 @@ struct Liverpool { } TilingMode GetTilingMode() const { - return attrib.tile_mode_index; + return info.linear_general ? TilingMode::Display_Linear + : attrib.tile_mode_index.Value(); } bool IsTiled() const { @@ -856,6 +869,33 @@ struct Liverpool { } }; + union ShaderStageEnable { + u32 raw; + BitField<0, 2, u32> ls_en; + BitField<2, 1, u32> hs_en; + BitField<3, 2, u32> es_en; + BitField<5, 1, u32> gs_en; + BitField<6, 1, u32> vs_en; + + bool IsStageEnabled(u32 stage) { + switch (stage) { + case 0: + case 1: + return true; + case 2: + return gs_en.Value(); + case 3: + return es_en.Value(); + case 4: + return hs_en.Value(); + case 5: + return ls_en.Value(); + default: + UNREACHABLE(); + } + } + }; + union Regs { struct { INSERT_PADDING_WORDS(0x2C08); @@ -892,7 +932,9 @@ struct Liverpool { INSERT_PADDING_WORDS(0xA094 - 0xA08E - 2); std::array viewport_scissors; std::array viewport_depths; - INSERT_PADDING_WORDS(0xA105 - 0xA0D4); + INSERT_PADDING_WORDS(0xA103 - 0xA0D4); + u32 primitive_reset_index; + INSERT_PADDING_WORDS(1); BlendConstants blend_constants; INSERT_PADDING_WORDS(0xA10B - 0xA105 - 4); StencilControl stencil_control; @@ -934,7 +976,9 @@ struct Liverpool { INSERT_PADDING_WORDS(0xA2A8 - 0xA2A1 - 1); u32 vgt_instance_step_rate_0; u32 vgt_instance_step_rate_1; - INSERT_PADDING_WORDS(0xA2DF - 0xA2A9 - 1); + INSERT_PADDING_WORDS(0xA2D5 - 0xA2A9 - 1); + ShaderStageEnable stage_enable; + INSERT_PADDING_WORDS(9); PolygonOffset poly_offset; INSERT_PADDING_WORDS(0xA2F8 - 0xA2DF - 5); AaConfig aa_config; @@ -991,14 +1035,36 @@ public: void SubmitGfx(std::span dcb, std::span ccb); void SubmitAsc(u32 vqid, std::span acb); + void SubmitDone() noexcept { + std::scoped_lock lk{submit_mutex}; + submit_done = true; + submit_cv.notify_one(); + } + + void WaitGpuIdle() noexcept { + std::unique_lock lk{submit_mutex}; + submit_cv.wait(lk, [this] { return num_submits == 0; }); + } + bool IsGpuIdle() const { return num_submits == 0; } + void SetVoPort(Libraries::VideoOut::VideoOutPort* port) { + vo_port = port; + } + void BindRasterizer(Vulkan::Rasterizer* rasterizer_) { rasterizer = rasterizer_; } + void SendCommand(Common::UniqueFunction&& func) { + std::scoped_lock lk{submit_mutex}; + command_queue.emplace(std::move(func)); + ++num_commands; + submit_cv.notify_one(); + } + private: struct Task { struct promise_type { @@ -1015,7 +1081,11 @@ private: return {}; } void unhandled_exception() { - UNREACHABLE(); + try { + std::rethrow_exception(std::current_exception()); + } catch (const std::exception& e) { + UNREACHABLE_MSG("Unhandled exception: {}", e.what()); + } } void return_void() {} struct empty {}; @@ -1037,6 +1107,7 @@ private: struct GpuQueue { std::mutex m_access{}; std::queue submits{}; + ComputeProgram cs_state{}; }; std::array mapped_queues{}; @@ -1059,10 +1130,14 @@ private: } cblock{}; Vulkan::Rasterizer* rasterizer{}; + Libraries::VideoOut::VideoOutPort* vo_port{}; std::jthread process_thread{}; std::atomic num_submits{}; + std::atomic num_commands{}; + std::atomic submit_done{}; std::mutex submit_mutex; std::condition_variable_any submit_cv; + std::queue> command_queue{}; }; static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); @@ -1085,6 +1160,7 @@ static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017); static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E); static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F); static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094); +static_assert(GFX6_3D_REG_INDEX(primitive_reset_index) == 0xA103); static_assert(GFX6_3D_REG_INDEX(stencil_control) == 0xA10B); static_assert(GFX6_3D_REG_INDEX(viewports) == 0xA10F); static_assert(GFX6_3D_REG_INDEX(clip_user_data) == 0xA16F); @@ -1107,6 +1183,7 @@ static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F); static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1); static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8); static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9); +static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5); static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF); static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8); static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318); diff --git a/src/video_core/amdgpu/pixel_format.cpp b/src/video_core/amdgpu/pixel_format.cpp index 6618e72ad..6744891a6 100644 --- a/src/video_core/amdgpu/pixel_format.cpp +++ b/src/video_core/amdgpu/pixel_format.cpp @@ -7,6 +7,77 @@ namespace AmdGpu { +std::string_view NameOf(DataFormat fmt) { + switch (fmt) { + case DataFormat::FormatInvalid: + return "FormatInvalid"; + case DataFormat::Format8: + return "Format8"; + case DataFormat::Format16: + return "Format16"; + case DataFormat::Format8_8: + return "Format8_8"; + case DataFormat::Format32: + return "Format32"; + case DataFormat::Format16_16: + return "Format16_16"; + case DataFormat::Format10_11_11: + return "Format10_11_11"; + case DataFormat::Format11_11_10: + return "Format11_11_10"; + case DataFormat::Format10_10_10_2: + return "Format10_10_10_2"; + case DataFormat::Format2_10_10_10: + return "Format2_10_10_10"; + case DataFormat::Format8_8_8_8: + return "Format8_8_8_8"; + case DataFormat::Format32_32: + return "Format32_32"; + case DataFormat::Format16_16_16_16: + return "Format16_16_16_16"; + case DataFormat::Format32_32_32: + return "Format32_32_32"; + case DataFormat::Format32_32_32_32: + return "Format32_32_32_32"; + case DataFormat::Format5_6_5: + return "Format5_6_5"; + case DataFormat::Format1_5_5_5: + return "Format1_5_5_5"; + case DataFormat::Format5_5_5_1: + return "Format5_5_5_1"; + case DataFormat::Format4_4_4_4: + return "Format4_4_4_4"; + case DataFormat::Format8_24: + return "Format8_24"; + case DataFormat::Format24_8: + return "Format24_8"; + case DataFormat::FormatX24_8_32: + return "FormatX24_8_32"; + case DataFormat::FormatGB_GR: + return "FormatGB_GR"; + case DataFormat::FormatBG_RG: + return "FormatBG_RG"; + case DataFormat::Format5_9_9_9: + return "Format5_9_9_9"; + case DataFormat::FormatBc1: + return "FormatBc1"; + case DataFormat::FormatBc2: + return "FormatBc2"; + case DataFormat::FormatBc3: + return "FormatBc3"; + case DataFormat::FormatBc4: + return "FormatBc4"; + case DataFormat::FormatBc5: + return "FormatBc5"; + case DataFormat::FormatBc6: + return "FormatBc6"; + case DataFormat::FormatBc7: + return "FormatBc7"; + default: + UNREACHABLE(); + } +} + std::string_view NameOf(NumberFormat fmt) { switch (fmt) { case NumberFormat::Unorm: diff --git a/src/video_core/amdgpu/pixel_format.h b/src/video_core/amdgpu/pixel_format.h index 2a38c5a02..1004ed7d2 100644 --- a/src/video_core/amdgpu/pixel_format.h +++ b/src/video_core/amdgpu/pixel_format.h @@ -61,6 +61,7 @@ enum class NumberFormat : u32 { Ubscaled = 13, }; +[[nodiscard]] std::string_view NameOf(DataFormat fmt); [[nodiscard]] std::string_view NameOf(NumberFormat fmt); int NumComponents(DataFormat format); @@ -70,6 +71,16 @@ s32 ComponentOffset(DataFormat format, u32 comp); } // namespace AmdGpu +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(AmdGpu::DataFormat fmt, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(fmt)); + } +}; + template <> struct fmt::formatter { constexpr auto parse(format_parse_context& ctx) { diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index eded2de37..5ab233fdc 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -282,6 +282,13 @@ enum class InterruptSelect : u32 { IrqUndocumented = 3, }; +static u64 GetGpuClock64() { + auto now = std::chrono::high_resolution_clock::now(); + auto duration = now.time_since_epoch(); + auto ticks = std::chrono::duration_cast(duration).count(); + return static_cast(ticks); +} + struct PM4CmdEventWriteEop { PM4Type3Header header; union { @@ -325,6 +332,10 @@ struct PM4CmdEventWriteEop { *Address() = DataQWord(); break; } + case DataSelect::GpuClock64: { + *Address() = GetGpuClock64(); + break; + } case DataSelect::PerfCounter: { *Address() = Common::FencedRDTSC(); break; @@ -404,8 +415,9 @@ struct PM4CmdWaitRegMem { u32 mask; u32 poll_interval; - u32* Address() const { - return reinterpret_cast((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo); + template + T Address() const { + return reinterpret_cast((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo); } bool Test() const { @@ -464,8 +476,8 @@ struct PM4CmdWriteData { } template - T* Address() const { - return reinterpret_cast(addr64); + T Address() const { + return reinterpret_cast(addr64); } }; @@ -494,8 +506,9 @@ struct PM4CmdEventWriteEos { BitField<16, 16, u32> size; ///< Number of DWs to read from the GDS }; - u32* Address() const { - return reinterpret_cast(address_lo | u64(address_hi) << 32); + template + T Address() const { + return reinterpret_cast(address_lo | u64(address_hi) << 32); } u32 DataDWord() const { @@ -660,6 +673,10 @@ struct PM4CmdReleaseMem { *Address() = DataQWord(); break; } + case DataSelect::GpuClock64: { + *Address() = GetGpuClock64(); + break; + } case DataSelect::PerfCounter: { *Address() = Common::FencedRDTSC(); break; diff --git a/src/video_core/amdgpu/pm4_opcodes.h b/src/video_core/amdgpu/pm4_opcodes.h index 8922c4ea3..fba0cbb9f 100644 --- a/src/video_core/amdgpu/pm4_opcodes.h +++ b/src/video_core/amdgpu/pm4_opcodes.h @@ -41,6 +41,7 @@ enum class PM4ItOpcode : u32 { CondIndirectBuffer = 0x3F, CopyData = 0x40, CommandProcessorDma = 0x41, + PfpSyncMe = 0x42, SurfaceSync = 0x43, CondWrite = 0x45, EventWrite = 0x46, diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 6ab3306b4..ef5bf1b66 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -75,7 +75,7 @@ struct Buffer { static_assert(sizeof(Buffer) == 16); // 128bits enum class ImageType : u64 { - Buffer = 0, + Invalid = 0, Color1D = 8, Color2D = 9, Color3D = 10, @@ -88,8 +88,8 @@ enum class ImageType : u64 { constexpr std::string_view NameOf(ImageType type) { switch (type) { - case ImageType::Buffer: - return "Buffer"; + case ImageType::Invalid: + return "Invalid"; case ImageType::Color1D: return "Color1D"; case ImageType::Color2D: @@ -179,6 +179,40 @@ struct Image { return base_address << 8; } + u32 DstSelect() const { + return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9); + } + + static char SelectComp(u32 sel) { + switch (sel) { + case 0: + return '0'; + case 1: + return '1'; + case 4: + return 'R'; + case 5: + return 'G'; + case 6: + return 'B'; + case 7: + return 'A'; + default: + UNREACHABLE(); + } + } + + std::string DstSelectName() const { + std::string result = "["; + u32 dst_sel = DstSelect(); + for (u32 i = 0; i < 4; i++) { + result += SelectComp(dst_sel & 7); + dst_sel >>= 3; + } + result += ']'; + return result; + } + u32 Pitch() const { return pitch + 1; } @@ -290,6 +324,7 @@ enum class BorderColor : u64 { // Table 8.12 Sampler Resource Definition struct Sampler { union { + u64 raw0; BitField<0, 3, ClampMode> clamp_x; BitField<3, 3, ClampMode> clamp_y; BitField<6, 3, ClampMode> clamp_z; @@ -309,6 +344,7 @@ struct Sampler { BitField<60, 4, u64> perf_z; }; union { + u64 raw1; BitField<0, 14, u64> lod_bias; BitField<14, 6, u64> lod_bias_sec; BitField<20, 2, Filter> xy_mag_filter; @@ -323,6 +359,14 @@ struct Sampler { BitField<62, 2, BorderColor> border_color_type; }; + operator bool() const noexcept { + return raw0 != 0 || raw1 != 0; + } + + bool operator==(const Sampler& other) const noexcept { + return std::memcmp(this, &other, sizeof(Sampler)) == 0; + } + float LodBias() const noexcept { return static_cast(static_cast((lod_bias.Value() ^ 0x2000u) - 0x2000u)) / 256.0f; diff --git a/src/video_core/buffer_cache/buffer.cpp b/src/video_core/buffer_cache/buffer.cpp new file mode 100644 index 000000000..d112864d5 --- /dev/null +++ b/src/video_core/buffer_cache/buffer.cpp @@ -0,0 +1,223 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/alignment.h" +#include "common/assert.h" +#include "video_core/buffer_cache/buffer.h" +#include "video_core/renderer_vulkan/liverpool_to_vk.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_platform.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" + +#include + +namespace VideoCore { + +constexpr vk::BufferUsageFlags AllFlags = + vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst | + vk::BufferUsageFlagBits::eUniformTexelBuffer | vk::BufferUsageFlagBits::eStorageTexelBuffer | + vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer | + vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer; + +std::string_view BufferTypeName(MemoryUsage type) { + switch (type) { + case MemoryUsage::Upload: + return "Upload"; + case MemoryUsage::Download: + return "Download"; + case MemoryUsage::Stream: + return "Stream"; + case MemoryUsage::DeviceLocal: + return "DeviceLocal"; + default: + return "Invalid"; + } +} + +[[nodiscard]] VkMemoryPropertyFlags MemoryUsagePreferredVmaFlags(MemoryUsage usage) { + return usage != MemoryUsage::DeviceLocal ? VK_MEMORY_PROPERTY_HOST_COHERENT_BIT + : VkMemoryPropertyFlagBits{}; +} + +[[nodiscard]] VmaAllocationCreateFlags MemoryUsageVmaFlags(MemoryUsage usage) { + switch (usage) { + case MemoryUsage::Upload: + case MemoryUsage::Stream: + return VMA_ALLOCATION_CREATE_MAPPED_BIT | + VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + case MemoryUsage::Download: + return VMA_ALLOCATION_CREATE_MAPPED_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + case MemoryUsage::DeviceLocal: + return {}; + } + return {}; +} + +[[nodiscard]] VmaMemoryUsage MemoryUsageVma(MemoryUsage usage) { + switch (usage) { + case MemoryUsage::DeviceLocal: + case MemoryUsage::Stream: + return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; + case MemoryUsage::Upload: + case MemoryUsage::Download: + return VMA_MEMORY_USAGE_AUTO_PREFER_HOST; + } + return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; +} + +UniqueBuffer::UniqueBuffer(vk::Device device_, VmaAllocator allocator_) + : device{device_}, allocator{allocator_} {} + +UniqueBuffer::~UniqueBuffer() { + if (buffer) { + vmaDestroyBuffer(allocator, buffer, allocation); + } +} + +void UniqueBuffer::Create(const vk::BufferCreateInfo& buffer_ci, MemoryUsage usage, + VmaAllocationInfo* out_alloc_info) { + const VmaAllocationCreateInfo alloc_ci = { + .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage), + .usage = MemoryUsageVma(usage), + .requiredFlags = 0, + .preferredFlags = MemoryUsagePreferredVmaFlags(usage), + .pool = VK_NULL_HANDLE, + .pUserData = nullptr, + }; + + const VkBufferCreateInfo buffer_ci_unsafe = static_cast(buffer_ci); + VkBuffer unsafe_buffer{}; + VkResult result = vmaCreateBuffer(allocator, &buffer_ci_unsafe, &alloc_ci, &unsafe_buffer, + &allocation, out_alloc_info); + ASSERT_MSG(result == VK_SUCCESS, "Failed allocating buffer with error {}", + vk::to_string(vk::Result{result})); + buffer = vk::Buffer{unsafe_buffer}; +} + +Buffer::Buffer(const Vulkan::Instance& instance_, MemoryUsage usage_, VAddr cpu_addr_, + u64 size_bytes_) + : cpu_addr{cpu_addr_}, size_bytes{size_bytes_}, instance{&instance_}, usage{usage_}, + buffer{instance->GetDevice(), instance->GetAllocator()} { + // Create buffer object. + const vk::BufferCreateInfo buffer_ci = { + .size = size_bytes, + .usage = AllFlags, + }; + VmaAllocationInfo alloc_info{}; + buffer.Create(buffer_ci, usage, &alloc_info); + + const auto device = instance->GetDevice(); + Vulkan::SetObjectName(device, Handle(), "Buffer {:#x}:{:#x}", cpu_addr, size_bytes); + + // Map it if it is host visible. + VkMemoryPropertyFlags property_flags{}; + vmaGetAllocationMemoryProperties(instance->GetAllocator(), buffer.allocation, &property_flags); + if (alloc_info.pMappedData) { + mapped_data = std::span{std::bit_cast(alloc_info.pMappedData), size_bytes}; + } + is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; +} + +vk::BufferView Buffer::View(u32 offset, u32 size, AmdGpu::DataFormat dfmt, + AmdGpu::NumberFormat nfmt) { + const auto it{std::ranges::find_if(views, [offset, size, dfmt, nfmt](const BufferView& view) { + return offset == view.offset && size == view.size && dfmt == view.dfmt && nfmt == view.nfmt; + })}; + if (it != views.end()) { + return it->handle; + } + views.push_back({ + .offset = offset, + .size = size, + .dfmt = dfmt, + .nfmt = nfmt, + .handle = instance->GetDevice().createBufferView({ + .buffer = buffer.buffer, + .format = Vulkan::LiverpoolToVK::SurfaceFormat(dfmt, nfmt), + .offset = offset, + .range = size, + }), + }); + return views.back().handle; +} + +constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; +constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; + +StreamBuffer::StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler_, + MemoryUsage usage, u64 size_bytes) + : Buffer{instance, usage, 0, size_bytes}, scheduler{scheduler_} { + ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); + ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); + const auto device = instance.GetDevice(); + Vulkan::SetObjectName(device, Handle(), "StreamBuffer({}):{:#x}", BufferTypeName(usage), + size_bytes); +} + +std::pair StreamBuffer::Map(u64 size, u64 alignment) { + if (!is_coherent && usage == MemoryUsage::Stream) { + size = Common::AlignUp(size, instance->NonCoherentAtomSize()); + } + + ASSERT(size <= this->size_bytes); + mapped_size = size; + + if (alignment > 0) { + offset = Common::AlignUp(offset, alignment); + } + + if (offset + size > this->size_bytes) { + // The buffer would overflow, save the amount of used watches and reset the state. + invalidation_mark = current_watch_cursor; + current_watch_cursor = 0; + offset = 0; + + // Swap watches and reset waiting cursors. + std::swap(previous_watches, current_watches); + wait_cursor = 0; + wait_bound = 0; + } + + const u64 mapped_upper_bound = offset + size; + WaitPendingOperations(mapped_upper_bound); + return std::make_pair(mapped_data.data() + offset, offset); +} + +void StreamBuffer::Commit() { + if (!is_coherent) { + if (usage == MemoryUsage::Download) { + vmaInvalidateAllocation(instance->GetAllocator(), buffer.allocation, offset, + mapped_size); + } else { + vmaFlushAllocation(instance->GetAllocator(), buffer.allocation, offset, mapped_size); + } + } + + offset += mapped_size; + if (current_watch_cursor + 1 >= current_watches.size()) { + // Ensure that there are enough watches. + ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK); + } + + auto& watch = current_watches[current_watch_cursor++]; + watch.upper_bound = offset; + watch.tick = scheduler.CurrentTick(); +} + +void StreamBuffer::ReserveWatches(std::vector& watches, std::size_t grow_size) { + watches.resize(watches.size() + grow_size); +} + +void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) { + if (!invalidation_mark) { + return; + } + while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) { + auto& watch = previous_watches[wait_cursor]; + wait_bound = watch.upper_bound; + scheduler.Wait(watch.tick); + ++wait_cursor; + } +} + +} // namespace VideoCore diff --git a/src/video_core/buffer_cache/buffer.h b/src/video_core/buffer_cache/buffer.h new file mode 100644 index 000000000..d373fbffb --- /dev/null +++ b/src/video_core/buffer_cache/buffer.h @@ -0,0 +1,177 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include "common/types.h" +#include "video_core/amdgpu/resource.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { +class Instance; +class Scheduler; +} // namespace Vulkan + +VK_DEFINE_HANDLE(VmaAllocation) +VK_DEFINE_HANDLE(VmaAllocator) + +struct VmaAllocationInfo; + +namespace VideoCore { + +/// Hints and requirements for the backing memory type of a commit +enum class MemoryUsage { + DeviceLocal, ///< Requests device local buffer. + Upload, ///< Requires a host visible memory type optimized for CPU to GPU uploads + Download, ///< Requires a host visible memory type optimized for GPU to CPU readbacks + Stream, ///< Requests device local host visible buffer, falling back host memory. +}; + +struct UniqueBuffer { + explicit UniqueBuffer(vk::Device device, VmaAllocator allocator); + ~UniqueBuffer(); + + UniqueBuffer(const UniqueBuffer&) = delete; + UniqueBuffer& operator=(const UniqueBuffer&) = delete; + + UniqueBuffer(UniqueBuffer&& other) + : buffer{std::exchange(other.buffer, VK_NULL_HANDLE)}, + allocator{std::exchange(other.allocator, VK_NULL_HANDLE)}, + allocation{std::exchange(other.allocation, VK_NULL_HANDLE)} {} + UniqueBuffer& operator=(UniqueBuffer&& other) { + buffer = std::exchange(other.buffer, VK_NULL_HANDLE); + allocator = std::exchange(other.allocator, VK_NULL_HANDLE); + allocation = std::exchange(other.allocation, VK_NULL_HANDLE); + return *this; + } + + void Create(const vk::BufferCreateInfo& image_ci, MemoryUsage usage, + VmaAllocationInfo* out_alloc_info); + + operator vk::Buffer() const { + return buffer; + } + + vk::Device device; + VmaAllocator allocator; + VmaAllocation allocation; + vk::Buffer buffer{}; +}; + +class Buffer { +public: + explicit Buffer(const Vulkan::Instance& instance, MemoryUsage usage, VAddr cpu_addr_, + u64 size_bytes_); + + Buffer& operator=(const Buffer&) = delete; + Buffer(const Buffer&) = delete; + + Buffer& operator=(Buffer&&) = default; + Buffer(Buffer&&) = default; + + vk::BufferView View(u32 offset, u32 size, AmdGpu::DataFormat dfmt, AmdGpu::NumberFormat nfmt); + + /// Increases the likeliness of this being a stream buffer + void IncreaseStreamScore(int score) noexcept { + stream_score += score; + } + + /// Returns the likeliness of this being a stream buffer + [[nodiscard]] int StreamScore() const noexcept { + return stream_score; + } + + /// Returns true when vaddr -> vaddr+size is fully contained in the buffer + [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept { + return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes(); + } + + /// Returns the base CPU address of the buffer + [[nodiscard]] VAddr CpuAddr() const noexcept { + return cpu_addr; + } + + /// Returns the offset relative to the given CPU address + [[nodiscard]] u32 Offset(VAddr other_cpu_addr) const noexcept { + return static_cast(other_cpu_addr - cpu_addr); + } + + size_t SizeBytes() const { + return size_bytes; + } + + vk::Buffer Handle() const noexcept { + return buffer; + } + +public: + VAddr cpu_addr = 0; + bool is_picked{}; + bool is_coherent{}; + int stream_score = 0; + size_t size_bytes = 0; + std::span mapped_data; + const Vulkan::Instance* instance{}; + MemoryUsage usage; + UniqueBuffer buffer; + struct BufferView { + u32 offset; + u32 size; + AmdGpu::DataFormat dfmt; + AmdGpu::NumberFormat nfmt; + vk::BufferView handle; + }; + std::vector views; +}; + +class StreamBuffer : public Buffer { +public: + explicit StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, + MemoryUsage usage, u64 size_bytes_); + + /// Reserves a region of memory from the stream buffer. + std::pair Map(u64 size, u64 alignment = 0); + + /// Ensures that reserved bytes of memory are available to the GPU. + void Commit(); + + /// Maps and commits a memory region with user provided data + u64 Copy(VAddr src, size_t size, size_t alignment = 0) { + const auto [data, offset] = Map(size, alignment); + std::memcpy(data, reinterpret_cast(src), size); + Commit(); + return offset; + } + + u64 GetFreeSize() const { + return size_bytes - offset - mapped_size; + } + +private: + struct Watch { + u64 tick{}; + u64 upper_bound{}; + }; + + /// Increases the amount of watches available. + void ReserveWatches(std::vector& watches, std::size_t grow_size); + + /// Waits pending watches until requested upper bound. + void WaitPendingOperations(u64 requested_upper_bound); + +private: + Vulkan::Scheduler& scheduler; + u64 offset{}; + u64 mapped_size{}; + std::vector current_watches; + std::size_t current_watch_cursor{}; + std::optional invalidation_mark; + std::vector previous_watches; + std::size_t wait_cursor{}; + u64 wait_bound{}; +}; + +} // namespace VideoCore diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp new file mode 100644 index 000000000..2246807ad --- /dev/null +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -0,0 +1,538 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include "common/alignment.h" +#include "common/scope_exit.h" +#include "shader_recompiler/runtime_info.h" +#include "video_core/amdgpu/liverpool.h" +#include "video_core/buffer_cache/buffer_cache.h" +#include "video_core/renderer_vulkan/liverpool_to_vk.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" + +namespace VideoCore { + +static constexpr size_t StagingBufferSize = 256_MB; +static constexpr size_t UboStreamBufferSize = 64_MB; + +BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, + const AmdGpu::Liverpool* liverpool_, PageManager& tracker_) + : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, tracker{tracker_}, + staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize}, + stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize}, + memory_tracker{&tracker} { + // Ensure the first slot is used for the null buffer + void(slot_buffers.insert(instance, MemoryUsage::DeviceLocal, 0, 1)); +} + +BufferCache::~BufferCache() = default; + +void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) { + std::scoped_lock lk{mutex}; + const bool is_tracked = IsRegionRegistered(device_addr, size); + if (!is_tracked) { + return; + } + // Mark the page as CPU modified to stop tracking writes. + SCOPE_EXIT { + memory_tracker.MarkRegionAsCpuModified(device_addr, size); + }; + if (!memory_tracker.IsRegionGpuModified(device_addr, size)) { + // Page has not been modified by the GPU, nothing to do. + return; + } +} + +void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size) { + boost::container::small_vector copies; + u64 total_size_bytes = 0; + u64 largest_copy = 0; + memory_tracker.ForEachDownloadRange( + device_addr, size, [&](u64 device_addr_out, u64 range_size) { + const VAddr buffer_addr = buffer.CpuAddr(); + const auto add_download = [&](VAddr start, VAddr end, u64) { + const u64 new_offset = start - buffer_addr; + const u64 new_size = end - start; + copies.push_back(vk::BufferCopy{ + .srcOffset = new_offset, + .dstOffset = total_size_bytes, + .size = new_size, + }); + // Align up to avoid cache conflicts + constexpr u64 align = 64ULL; + constexpr u64 mask = ~(align - 1ULL); + total_size_bytes += (new_size + align - 1) & mask; + largest_copy = std::max(largest_copy, new_size); + }; + }); + if (total_size_bytes == 0) { + return; + } + const auto [staging, offset] = staging_buffer.Map(total_size_bytes); + for (auto& copy : copies) { + // Modify copies to have the staging offset in mind + copy.dstOffset += offset; + } + staging_buffer.Commit(); + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.copyBuffer(buffer.buffer, staging_buffer.Handle(), copies); + scheduler.Finish(); + for (const auto& copy : copies) { + const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset; + const u64 dst_offset = copy.dstOffset - offset; + std::memcpy(std::bit_cast(copy_device_addr), staging + dst_offset, copy.size); + } +} + +bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) { + boost::container::small_vector attributes; + boost::container::small_vector bindings; + SCOPE_EXIT { + if (instance.IsVertexInputDynamicState()) { + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.setVertexInputEXT(bindings, attributes); + } + }; + + if (vs_info.vs_inputs.empty()) { + return false; + } + + std::array host_buffers; + std::array host_offsets; + boost::container::static_vector guest_buffers; + + struct BufferRange { + VAddr base_address; + VAddr end_address; + vk::Buffer vk_buffer; + u64 offset; + + size_t GetSize() const { + return end_address - base_address; + } + }; + + // Calculate buffers memory overlaps + bool has_step_rate = false; + boost::container::static_vector ranges{}; + for (const auto& input : vs_info.vs_inputs) { + if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || + input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) { + has_step_rate = true; + continue; + } + + const auto& buffer = vs_info.ReadUd(input.sgpr_base, input.dword_offset); + if (buffer.GetSize() == 0) { + continue; + } + guest_buffers.emplace_back(buffer); + ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize()); + attributes.push_back({ + .location = input.binding, + .binding = input.binding, + .format = + Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()), + .offset = 0, + }); + bindings.push_back({ + .binding = input.binding, + .stride = buffer.GetStride(), + .inputRate = input.instance_step_rate == Shader::Info::VsInput::None + ? vk::VertexInputRate::eVertex + : vk::VertexInputRate::eInstance, + .divisor = 1, + }); + } + + std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) { + return lhv.base_address < rhv.base_address; + }); + + boost::container::static_vector ranges_merged{ranges[0]}; + for (auto range : ranges) { + auto& prev_range = ranges_merged.back(); + if (prev_range.end_address < range.base_address) { + ranges_merged.emplace_back(range); + } else { + prev_range.end_address = std::max(prev_range.end_address, range.end_address); + } + } + + // Map buffers + for (auto& range : ranges_merged) { + const auto [buffer, offset] = ObtainBuffer(range.base_address, range.GetSize(), false); + range.vk_buffer = buffer->buffer; + range.offset = offset; + } + + // Bind vertex buffers + const size_t num_buffers = guest_buffers.size(); + for (u32 i = 0; i < num_buffers; ++i) { + const auto& buffer = guest_buffers[i]; + const auto host_buffer = std::ranges::find_if(ranges_merged, [&](const BufferRange& range) { + return (buffer.base_address >= range.base_address && + buffer.base_address < range.end_address); + }); + ASSERT(host_buffer != ranges_merged.cend()); + + host_buffers[i] = host_buffer->vk_buffer; + host_offsets[i] = host_buffer->offset + buffer.base_address - host_buffer->base_address; + } + + if (num_buffers > 0) { + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.bindVertexBuffers(0, num_buffers, host_buffers.data(), host_offsets.data()); + } + + return has_step_rate; +} + +u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) { + // Emulate QuadList primitive type with CPU made index buffer. + const auto& regs = liverpool->regs; + if (regs.primitive_type == AmdGpu::Liverpool::PrimitiveType::QuadList) { + is_indexed = true; + + // Emit indices. + const u32 index_size = 3 * regs.num_indices; + const auto [data, offset] = stream_buffer.Map(index_size); + Vulkan::LiverpoolToVK::EmitQuadToTriangleListIndices(data, regs.num_indices); + stream_buffer.Commit(); + + // Bind index buffer. + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.bindIndexBuffer(stream_buffer.Handle(), offset, vk::IndexType::eUint16); + return index_size / sizeof(u16); + } + if (!is_indexed) { + return regs.num_indices; + } + + // Figure out index type and size. + const bool is_index16 = + regs.index_buffer_type.index_type == AmdGpu::Liverpool::IndexType::Index16; + const vk::IndexType index_type = is_index16 ? vk::IndexType::eUint16 : vk::IndexType::eUint32; + const u32 index_size = is_index16 ? sizeof(u16) : sizeof(u32); + VAddr index_address = regs.index_base_address.Address(); + index_address += index_offset * index_size; + + // Bind index buffer. + const u32 index_buffer_size = regs.num_indices * index_size; + const auto [vk_buffer, offset] = ObtainBuffer(index_address, index_buffer_size, false); + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.bindIndexBuffer(vk_buffer->Handle(), offset, index_type); + return regs.num_indices; +} + +std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written) { + std::scoped_lock lk{mutex}; + static constexpr u64 StreamThreshold = CACHING_PAGESIZE; + const bool is_gpu_dirty = memory_tracker.IsRegionGpuModified(device_addr, size); + if (!is_written && size < StreamThreshold && !is_gpu_dirty) { + // For small uniform buffers that have not been modified by gpu + // use device local stream buffer to reduce renderpass breaks. + const u64 offset = stream_buffer.Copy(device_addr, size, instance.UniformMinAlignment()); + return {&stream_buffer, offset}; + } + + const BufferId buffer_id = FindBuffer(device_addr, size); + Buffer& buffer = slot_buffers[buffer_id]; + SynchronizeBuffer(buffer, device_addr, size); + if (is_written) { + memory_tracker.MarkRegionAsGpuModified(device_addr, size); + } + return {&buffer, buffer.Offset(device_addr)}; +} + +std::pair BufferCache::ObtainTempBuffer(VAddr gpu_addr, u32 size) { + const u64 page = gpu_addr >> CACHING_PAGEBITS; + const BufferId buffer_id = page_table[page]; + if (buffer_id) { + const Buffer& buffer = slot_buffers[buffer_id]; + if (buffer.IsInBounds(gpu_addr, size)) { + return {&buffer, buffer.Offset(gpu_addr)}; + } + } + const u32 offset = staging_buffer.Copy(gpu_addr, size, 16); + return {&staging_buffer, offset}; +} + +bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) { + const VAddr end_addr = addr + size; + const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE); + for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) { + const BufferId buffer_id = page_table[page]; + if (!buffer_id) { + ++page; + continue; + } + Buffer& buffer = slot_buffers[buffer_id]; + const VAddr buf_start_addr = buffer.CpuAddr(); + const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes(); + if (buf_start_addr < end_addr && addr < buf_end_addr) { + return true; + } + page = Common::DivCeil(end_addr, CACHING_PAGESIZE); + } + return false; +} + +bool BufferCache::IsRegionCpuModified(VAddr addr, size_t size) { + return memory_tracker.IsRegionCpuModified(addr, size); +} + +bool BufferCache::IsRegionGpuModified(VAddr addr, size_t size) { + return memory_tracker.IsRegionGpuModified(addr, size); +} + +BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) { + if (device_addr == 0) { + return NULL_BUFFER_ID; + } + const u64 page = device_addr >> CACHING_PAGEBITS; + const BufferId buffer_id = page_table[page]; + if (!buffer_id) { + return CreateBuffer(device_addr, size); + } + const Buffer& buffer = slot_buffers[buffer_id]; + if (buffer.IsInBounds(device_addr, size)) { + return buffer_id; + } + return CreateBuffer(device_addr, size); +} + +BufferCache::OverlapResult BufferCache::ResolveOverlaps(VAddr device_addr, u32 wanted_size) { + static constexpr int STREAM_LEAP_THRESHOLD = 16; + boost::container::small_vector overlap_ids; + VAddr begin = device_addr; + VAddr end = device_addr + wanted_size; + int stream_score = 0; + bool has_stream_leap = false; + const auto expand_begin = [&](VAddr add_value) { + static constexpr VAddr min_page = CACHING_PAGESIZE + DEVICE_PAGESIZE; + if (add_value > begin - min_page) { + begin = min_page; + device_addr = DEVICE_PAGESIZE; + return; + } + begin -= add_value; + device_addr = begin - CACHING_PAGESIZE; + }; + const auto expand_end = [&](VAddr add_value) { + static constexpr VAddr max_page = 1ULL << MemoryTracker::MAX_CPU_PAGE_BITS; + if (add_value > max_page - end) { + end = max_page; + return; + } + end += add_value; + }; + if (begin == 0) { + return OverlapResult{ + .ids = std::move(overlap_ids), + .begin = begin, + .end = end, + .has_stream_leap = has_stream_leap, + }; + } + for (; device_addr >> CACHING_PAGEBITS < Common::DivCeil(end, CACHING_PAGESIZE); + device_addr += CACHING_PAGESIZE) { + const BufferId overlap_id = page_table[device_addr >> CACHING_PAGEBITS]; + if (!overlap_id) { + continue; + } + Buffer& overlap = slot_buffers[overlap_id]; + if (overlap.is_picked) { + continue; + } + overlap_ids.push_back(overlap_id); + overlap.is_picked = true; + const VAddr overlap_device_addr = overlap.CpuAddr(); + const bool expands_left = overlap_device_addr < begin; + if (expands_left) { + begin = overlap_device_addr; + } + const VAddr overlap_end = overlap_device_addr + overlap.SizeBytes(); + const bool expands_right = overlap_end > end; + if (overlap_end > end) { + end = overlap_end; + } + stream_score += overlap.StreamScore(); + if (stream_score > STREAM_LEAP_THRESHOLD && !has_stream_leap) { + // When this memory region has been joined a bunch of times, we assume it's being used + // as a stream buffer. Increase the size to skip constantly recreating buffers. + has_stream_leap = true; + if (expands_right) { + expand_begin(CACHING_PAGESIZE * 128); + } + if (expands_left) { + expand_end(CACHING_PAGESIZE * 128); + } + } + } + return OverlapResult{ + .ids = std::move(overlap_ids), + .begin = begin, + .end = end, + .has_stream_leap = has_stream_leap, + }; +} + +void BufferCache::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, + bool accumulate_stream_score) { + Buffer& new_buffer = slot_buffers[new_buffer_id]; + Buffer& overlap = slot_buffers[overlap_id]; + if (accumulate_stream_score) { + new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1); + } + const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr(); + const vk::BufferCopy copy = { + .srcOffset = 0, + .dstOffset = dst_base_offset, + .size = overlap.SizeBytes(), + }; + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + static constexpr vk::MemoryBarrier READ_BARRIER{ + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, + }; + static constexpr vk::MemoryBarrier WRITE_BARRIER{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + }; + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, + READ_BARRIER, {}, {}); + cmdbuf.copyBuffer(overlap.buffer, new_buffer.buffer, copy); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); + DeleteBuffer(overlap_id, true); +} + +BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) { + const VAddr device_addr_end = Common::AlignUp(device_addr + wanted_size, CACHING_PAGESIZE); + device_addr = Common::AlignDown(device_addr, CACHING_PAGESIZE); + wanted_size = static_cast(device_addr_end - device_addr); + const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size); + const u32 size = static_cast(overlap.end - overlap.begin); + const BufferId new_buffer_id = + slot_buffers.insert(instance, MemoryUsage::DeviceLocal, overlap.begin, size); + auto& new_buffer = slot_buffers[new_buffer_id]; + const size_t size_bytes = new_buffer.SizeBytes(); + const auto cmdbuf = scheduler.CommandBuffer(); + scheduler.EndRendering(); + cmdbuf.fillBuffer(new_buffer.buffer, 0, size_bytes, 0); + for (const BufferId overlap_id : overlap.ids) { + JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); + } + Register(new_buffer_id); + return new_buffer_id; +} + +void BufferCache::Register(BufferId buffer_id) { + ChangeRegister(buffer_id); +} + +void BufferCache::Unregister(BufferId buffer_id) { + ChangeRegister(buffer_id); +} + +template +void BufferCache::ChangeRegister(BufferId buffer_id) { + Buffer& buffer = slot_buffers[buffer_id]; + const auto size = buffer.SizeBytes(); + const VAddr device_addr_begin = buffer.CpuAddr(); + const VAddr device_addr_end = device_addr_begin + size; + const u64 page_begin = device_addr_begin / CACHING_PAGESIZE; + const u64 page_end = Common::DivCeil(device_addr_end, CACHING_PAGESIZE); + for (u64 page = page_begin; page != page_end; ++page) { + if constexpr (insert) { + page_table[page] = buffer_id; + } else { + page_table[page] = BufferId{}; + } + } +} + +bool BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size) { + boost::container::small_vector copies; + u64 total_size_bytes = 0; + u64 largest_copy = 0; + VAddr buffer_start = buffer.CpuAddr(); + const auto add_copy = [&](VAddr device_addr_out, u64 range_size) { + copies.push_back(vk::BufferCopy{ + .srcOffset = total_size_bytes, + .dstOffset = device_addr_out - buffer_start, + .size = range_size, + }); + total_size_bytes += range_size; + largest_copy = std::max(largest_copy, range_size); + }; + memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) { + add_copy(device_addr_out, range_size); + // Prevent uploading to gpu modified regions. + // gpu_modified_ranges.ForEachNotInRange(device_addr_out, range_size, add_copy); + }); + if (total_size_bytes == 0) { + return true; + } + vk::Buffer src_buffer = staging_buffer.Handle(); + if (total_size_bytes < StagingBufferSize) { + const auto [staging, offset] = staging_buffer.Map(total_size_bytes); + for (auto& copy : copies) { + u8* const src_pointer = staging + copy.srcOffset; + const VAddr device_addr = buffer.CpuAddr() + copy.dstOffset; + std::memcpy(src_pointer, std::bit_cast(device_addr), copy.size); + // Apply the staging offset + copy.srcOffset += offset; + } + staging_buffer.Commit(); + } else { + // For large one time transfers use a temporary host buffer. + // RenderDoc can lag quite a bit if the stream buffer is too large. + Buffer temp_buffer{instance, MemoryUsage::Upload, 0, total_size_bytes}; + src_buffer = temp_buffer.Handle(); + u8* const staging = temp_buffer.mapped_data.data(); + for (auto& copy : copies) { + u8* const src_pointer = staging + copy.srcOffset; + const VAddr device_addr = buffer.CpuAddr() + copy.dstOffset; + std::memcpy(src_pointer, std::bit_cast(device_addr), copy.size); + } + scheduler.DeferOperation([buffer = std::move(temp_buffer)]() mutable {}); + } + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + static constexpr vk::MemoryBarrier READ_BARRIER{ + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, + }; + static constexpr vk::MemoryBarrier WRITE_BARRIER{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + }; + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, + READ_BARRIER, {}, {}); + cmdbuf.copyBuffer(src_buffer, buffer.buffer, copies); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); + return false; +} + +void BufferCache::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { + // Mark the whole buffer as CPU written to stop tracking CPU writes + if (!do_not_mark) { + Buffer& buffer = slot_buffers[buffer_id]; + memory_tracker.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes()); + } + Unregister(buffer_id); + scheduler.DeferOperation([this, buffer_id] { slot_buffers.erase(buffer_id); }); +} + +} // namespace VideoCore diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h new file mode 100644 index 000000000..33ea3f86b --- /dev/null +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -0,0 +1,135 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include +#include +#include "common/div_ceil.h" +#include "common/slot_vector.h" +#include "common/types.h" +#include "video_core/buffer_cache/buffer.h" +#include "video_core/buffer_cache/memory_tracker_base.h" +#include "video_core/multi_level_page_table.h" + +namespace AmdGpu { +struct Liverpool; +} + +namespace Shader { +struct Info; +} + +namespace VideoCore { + +using BufferId = Common::SlotId; + +static constexpr BufferId NULL_BUFFER_ID{0}; + +static constexpr u32 NUM_VERTEX_BUFFERS = 32; + +class BufferCache { +public: + static constexpr u32 CACHING_PAGEBITS = 12; + static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS; + static constexpr u64 DEVICE_PAGESIZE = 4_KB; + + struct Traits { + using Entry = BufferId; + static constexpr size_t AddressSpaceBits = 39; + static constexpr size_t FirstLevelBits = 14; + static constexpr size_t PageBits = CACHING_PAGEBITS; + }; + using PageTable = MultiLevelPageTable; + + struct OverlapResult { + boost::container::small_vector ids; + VAddr begin; + VAddr end; + bool has_stream_leap = false; + }; + +public: + explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, + const AmdGpu::Liverpool* liverpool, PageManager& tracker); + ~BufferCache(); + + /// Invalidates any buffer in the logical page range. + void InvalidateMemory(VAddr device_addr, u64 size); + + /// Binds host vertex buffers for the current draw. + bool BindVertexBuffers(const Shader::Info& vs_info); + + /// Bind host index buffer for the current draw. + u32 BindIndexBuffer(bool& is_indexed, u32 index_offset); + + /// Obtains a buffer for the specified region. + [[nodiscard]] std::pair ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written); + + /// Obtains a temporary buffer for usage in texture cache. + [[nodiscard]] std::pair ObtainTempBuffer(VAddr gpu_addr, u32 size); + + /// Return true when a region is registered on the cache + [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); + + /// Return true when a CPU region is modified from the CPU + [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); + + /// Return true when a CPU region is modified from the GPU + [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); + +private: + template + void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) { + const u64 page_end = Common::DivCeil(device_addr + size, CACHING_PAGESIZE); + for (u64 page = device_addr >> CACHING_PAGEBITS; page < page_end;) { + const BufferId buffer_id = page_table[page]; + if (!buffer_id) { + ++page; + continue; + } + Buffer& buffer = slot_buffers[buffer_id]; + func(buffer_id, buffer); + + const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); + page = Common::DivCeil(end_addr, CACHING_PAGESIZE); + } + } + + void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size); + + [[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size); + + [[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size); + + void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); + + [[nodiscard]] BufferId CreateBuffer(VAddr device_addr, u32 wanted_size); + + void Register(BufferId buffer_id); + + void Unregister(BufferId buffer_id); + + template + void ChangeRegister(BufferId buffer_id); + + bool SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size); + + void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false); + + const Vulkan::Instance& instance; + Vulkan::Scheduler& scheduler; + const AmdGpu::Liverpool* liverpool; + PageManager& tracker; + StreamBuffer staging_buffer; + StreamBuffer stream_buffer; + std::recursive_mutex mutex; + Common::SlotVector slot_buffers; + MemoryTracker memory_tracker; + PageTable page_table; +}; + +} // namespace VideoCore diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker_base.h new file mode 100644 index 000000000..375701c4c --- /dev/null +++ b/src/video_core/buffer_cache/memory_tracker_base.h @@ -0,0 +1,175 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include +#include "common/types.h" +#include "video_core/buffer_cache/word_manager.h" + +namespace VideoCore { + +class MemoryTracker { +public: + static constexpr size_t MAX_CPU_PAGE_BITS = 39; + static constexpr size_t HIGHER_PAGE_BITS = 22; + static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; + static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; + static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); + static constexpr size_t MANAGER_POOL_SIZE = 32; + static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD; + using Manager = WordManager; + +public: + explicit MemoryTracker(PageManager* tracker_) : tracker{tracker_} {} + ~MemoryTracker() = default; + + /// Returns true if a region has been modified from the CPU + [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { + return IteratePages( + query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { + return manager->template IsRegionModified(offset, size); + }); + } + + /// Returns true if a region has been modified from the GPU + [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { + return IteratePages( + query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { + return manager->template IsRegionModified(offset, size); + }); + } + + /// Mark region as CPU modified, notifying the device_tracker about this change + void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { + IteratePages(dirty_cpu_addr, query_size, + [](Manager* manager, u64 offset, size_t size) { + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); + }); + } + + /// Unmark region as CPU modified, notifying the device_tracker about this change + void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { + IteratePages(dirty_cpu_addr, query_size, + [](Manager* manager, u64 offset, size_t size) { + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); + }); + } + + /// Mark region as modified from the host GPU + void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { + IteratePages(dirty_cpu_addr, query_size, + [](Manager* manager, u64 offset, size_t size) { + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); + }); + } + + /// Unmark region as modified from the host GPU + void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { + IteratePages(dirty_cpu_addr, query_size, + [](Manager* manager, u64 offset, size_t size) { + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); + }); + } + + /// Call 'func' for each CPU modified range and unmark those pages as CPU modified + template + void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { + IteratePages(query_cpu_range, query_size, + [&func](Manager* manager, u64 offset, size_t size) { + manager->template ForEachModifiedRange( + manager->GetCpuAddr() + offset, size, func); + }); + } + + /// Call 'func' for each GPU modified range and unmark those pages as GPU modified + template + void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { + IteratePages(query_cpu_range, query_size, + [&func](Manager* manager, u64 offset, size_t size) { + if constexpr (clear) { + manager->template ForEachModifiedRange( + manager->GetCpuAddr() + offset, size, func); + } else { + manager->template ForEachModifiedRange( + manager->GetCpuAddr() + offset, size, func); + } + }); + } + +private: + /** + * @brief IteratePages Iterates L2 word manager page table. + * @param cpu_address Start byte cpu address + * @param size Size in bytes of the region of iterate. + * @param func Callback for each word manager. + * @return + */ + template + bool IteratePages(VAddr cpu_address, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool BOOL_BREAK = std::is_same_v; + std::size_t remaining_size{size}; + std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS}; + u64 page_offset{cpu_address & HIGHER_PAGE_MASK}; + while (remaining_size > 0) { + const std::size_t copy_amount{ + std::min(HIGHER_PAGE_SIZE - page_offset, remaining_size)}; + auto* manager{top_tier[page_index]}; + if (manager) { + if constexpr (BOOL_BREAK) { + if (func(manager, page_offset, copy_amount)) { + return true; + } + } else { + func(manager, page_offset, copy_amount); + } + } else if constexpr (create_region_on_fail) { + CreateRegion(page_index); + manager = top_tier[page_index]; + if constexpr (BOOL_BREAK) { + if (func(manager, page_offset, copy_amount)) { + return true; + } + } else { + func(manager, page_offset, copy_amount); + } + } + page_index++; + page_offset = 0; + remaining_size -= copy_amount; + } + return false; + } + + void CreateRegion(std::size_t page_index) { + const VAddr base_cpu_addr = page_index << HIGHER_PAGE_BITS; + if (free_managers.empty()) { + manager_pool.emplace_back(); + auto& last_pool = manager_pool.back(); + for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { + std::construct_at(&last_pool[i], tracker, 0, HIGHER_PAGE_SIZE); + free_managers.push_back(&last_pool[i]); + } + } + // Each manager tracks a 4_MB virtual address space. + auto* new_manager = free_managers.back(); + new_manager->SetCpuAddress(base_cpu_addr); + free_managers.pop_back(); + top_tier[page_index] = new_manager; + } + + PageManager* tracker; + std::deque> manager_pool; + std::vector free_managers; + std::array top_tier{}; +}; + +} // namespace VideoCore diff --git a/src/video_core/buffer_cache/range_set.h b/src/video_core/buffer_cache/range_set.h new file mode 100644 index 000000000..fe54aff8f --- /dev/null +++ b/src/video_core/buffer_cache/range_set.h @@ -0,0 +1,159 @@ +// SPDX-FileCopyrightText: 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include +#include "common/types.h" + +namespace VideoCore { + +template +using RangeSetsAllocator = + boost::fast_pool_allocator; + +struct RangeSet { + using IntervalSet = + boost::icl::interval_set; + using IntervalType = typename IntervalSet::interval_type; + + explicit RangeSet() = default; + ~RangeSet() = default; + + void Add(VAddr base_address, size_t size) { + const VAddr end_address = base_address + size; + IntervalType interval{base_address, end_address}; + m_ranges_set.add(interval); + } + + void Subtract(VAddr base_address, size_t size) { + const VAddr end_address = base_address + size; + IntervalType interval{base_address, end_address}; + m_ranges_set.subtract(interval); + } + + template + void ForEach(Func&& func) const { + if (m_ranges_set.empty()) { + return; + } + auto it = m_ranges_set.begin(); + auto end_it = m_ranges_set.end(); + for (; it != end_it; it++) { + const VAddr inter_addr_end = it->upper(); + const VAddr inter_addr = it->lower(); + func(inter_addr, inter_addr_end); + } + } + + template + void ForEachInRange(VAddr base_addr, size_t size, Func&& func) const { + if (m_ranges_set.empty()) { + return; + } + const VAddr start_address = base_addr; + const VAddr end_address = start_address + size; + const IntervalType search_interval{start_address, end_address}; + auto it = m_ranges_set.lower_bound(search_interval); + if (it == m_ranges_set.end()) { + return; + } + auto end_it = m_ranges_set.upper_bound(search_interval); + for (; it != end_it; it++) { + VAddr inter_addr_end = it->upper(); + VAddr inter_addr = it->lower(); + if (inter_addr_end > end_address) { + inter_addr_end = end_address; + } + if (inter_addr < start_address) { + inter_addr = start_address; + } + func(inter_addr, inter_addr_end); + } + } + + IntervalSet m_ranges_set; +}; + +class RangeMap { +public: + using IntervalMap = + boost::icl::interval_map; + using IntervalType = typename IntervalMap::interval_type; + +public: + RangeMap() = default; + ~RangeMap() = default; + + RangeMap(RangeMap const&) = delete; + RangeMap& operator=(RangeMap const&) = delete; + + RangeMap(RangeMap&& other); + RangeMap& operator=(RangeMap&& other); + + void Add(VAddr base_address, size_t size, u64 value) { + const VAddr end_address = base_address + size; + IntervalType interval{base_address, end_address}; + m_ranges_map.add({interval, value}); + } + + void Subtract(VAddr base_address, size_t size) { + const VAddr end_address = base_address + size; + IntervalType interval{base_address, end_address}; + m_ranges_map -= interval; + } + + template + void ForEachInRange(VAddr base_addr, size_t size, Func&& func) const { + if (m_ranges_map.empty()) { + return; + } + const VAddr start_address = base_addr; + const VAddr end_address = start_address + size; + const IntervalType search_interval{start_address, end_address}; + auto it = m_ranges_map.lower_bound(search_interval); + if (it == m_ranges_map.end()) { + return; + } + auto end_it = m_ranges_map.upper_bound(search_interval); + for (; it != end_it; it++) { + VAddr inter_addr_end = it->first.upper(); + VAddr inter_addr = it->first.lower(); + if (inter_addr_end > end_address) { + inter_addr_end = end_address; + } + if (inter_addr < start_address) { + inter_addr = start_address; + } + func(inter_addr, inter_addr_end, it->second); + } + } + + template + void ForEachNotInRange(VAddr base_addr, size_t size, Func&& func) const { + const VAddr end_addr = base_addr + size; + ForEachInRange(base_addr, size, [&](VAddr range_addr, VAddr range_end, u64) { + if (size_t gap_size = range_addr - base_addr; gap_size != 0) { + func(base_addr, gap_size); + } + base_addr = range_end; + }); + if (base_addr != end_addr) { + func(base_addr, end_addr - base_addr); + } + } + +private: + IntervalMap m_ranges_map; +}; + +} // namespace VideoCore diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h new file mode 100644 index 000000000..549d2a9ed --- /dev/null +++ b/src/video_core/buffer_cache/word_manager.h @@ -0,0 +1,398 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include "common/div_ceil.h" +#include "common/types.h" +#include "video_core/page_manager.h" + +namespace VideoCore { + +constexpr u64 PAGES_PER_WORD = 64; +constexpr u64 BYTES_PER_PAGE = 4_KB; +constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; + +enum class Type { + CPU, + GPU, + Untracked, +}; + +/// Vector tracking modified pages tightly packed with small vector optimization +template +struct WordsArray { + /// Returns the pointer to the words state + [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { + return is_short ? stack.data() : heap; + } + + /// Returns the pointer to the words state + [[nodiscard]] u64* Pointer(bool is_short) noexcept { + return is_short ? stack.data() : heap; + } + + std::array stack{}; ///< Small buffers storage + u64* heap; ///< Not-small buffers pointer to the storage +}; + +template +struct Words { + explicit Words() = default; + explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} { + num_words = Common::DivCeil(size_bytes, BYTES_PER_WORD); + if (IsShort()) { + cpu.stack.fill(~u64{0}); + gpu.stack.fill(0); + untracked.stack.fill(~u64{0}); + } else { + // Share allocation between CPU and GPU pages and set their default values + u64* const alloc = new u64[num_words * 3]; + cpu.heap = alloc; + gpu.heap = alloc + num_words; + untracked.heap = alloc + num_words * 2; + std::fill_n(cpu.heap, num_words, ~u64{0}); + std::fill_n(gpu.heap, num_words, 0); + std::fill_n(untracked.heap, num_words, ~u64{0}); + } + // Clean up tailing bits + const u64 last_word_size = size_bytes % BYTES_PER_WORD; + const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE); + const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; + const u64 last_word = (~u64{0} << shift) >> shift; + cpu.Pointer(IsShort())[NumWords() - 1] = last_word; + untracked.Pointer(IsShort())[NumWords() - 1] = last_word; + } + + ~Words() { + Release(); + } + + Words& operator=(Words&& rhs) noexcept { + Release(); + size_bytes = rhs.size_bytes; + num_words = rhs.num_words; + cpu = rhs.cpu; + gpu = rhs.gpu; + untracked = rhs.untracked; + rhs.cpu.heap = nullptr; + return *this; + } + + Words(Words&& rhs) noexcept + : size_bytes{rhs.size_bytes}, num_words{rhs.num_words}, cpu{rhs.cpu}, gpu{rhs.gpu}, + untracked{rhs.untracked} { + rhs.cpu.heap = nullptr; + } + + Words& operator=(const Words&) = delete; + Words(const Words&) = delete; + + /// Returns true when the buffer fits in the small vector optimization + [[nodiscard]] bool IsShort() const noexcept { + return num_words <= stack_words; + } + + /// Returns the number of words of the buffer + [[nodiscard]] size_t NumWords() const noexcept { + return num_words; + } + + /// Release buffer resources + void Release() { + if (!IsShort()) { + // CPU written words is the base for the heap allocation + delete[] cpu.heap; + } + } + + template + std::span Span() noexcept { + if constexpr (type == Type::CPU) { + return std::span(cpu.Pointer(IsShort()), num_words); + } else if constexpr (type == Type::GPU) { + return std::span(gpu.Pointer(IsShort()), num_words); + } else if constexpr (type == Type::Untracked) { + return std::span(untracked.Pointer(IsShort()), num_words); + } + } + + template + std::span Span() const noexcept { + if constexpr (type == Type::CPU) { + return std::span(cpu.Pointer(IsShort()), num_words); + } else if constexpr (type == Type::GPU) { + return std::span(gpu.Pointer(IsShort()), num_words); + } else if constexpr (type == Type::Untracked) { + return std::span(untracked.Pointer(IsShort()), num_words); + } + } + + u64 size_bytes = 0; + size_t num_words = 0; + WordsArray cpu; + WordsArray gpu; + WordsArray untracked; +}; + +template +class WordManager { +public: + explicit WordManager(PageManager* tracker_, VAddr cpu_addr_, u64 size_bytes) + : tracker{tracker_}, cpu_addr{cpu_addr_}, words{size_bytes} {} + + explicit WordManager() = default; + + void SetCpuAddress(VAddr new_cpu_addr) { + cpu_addr = new_cpu_addr; + } + + VAddr GetCpuAddr() const { + return cpu_addr; + } + + static u64 ExtractBits(u64 word, size_t page_start, size_t page_end) { + constexpr size_t number_bits = sizeof(u64) * 8; + const size_t limit_page_end = number_bits - std::min(page_end, number_bits); + u64 bits = (word >> page_start) << page_start; + bits = (bits << limit_page_end) >> limit_page_end; + return bits; + } + + static std::pair GetWordPage(VAddr address) { + const size_t converted_address = static_cast(address); + const size_t word_number = converted_address / BYTES_PER_WORD; + const size_t amount_pages = converted_address % BYTES_PER_WORD; + return std::make_pair(word_number, amount_pages / BYTES_PER_PAGE); + } + + template + void IterateWords(size_t offset, size_t size, Func&& func) const { + using FuncReturn = std::invoke_result_t; + static constexpr bool BOOL_BREAK = std::is_same_v; + const size_t start = static_cast(std::max(static_cast(offset), 0LL)); + const size_t end = static_cast(std::max(static_cast(offset + size), 0LL)); + if (start >= SizeBytes() || end <= start) { + return; + } + auto [start_word, start_page] = GetWordPage(start); + auto [end_word, end_page] = GetWordPage(end + BYTES_PER_PAGE - 1ULL); + const size_t num_words = NumWords(); + start_word = std::min(start_word, num_words); + end_word = std::min(end_word, num_words); + const size_t diff = end_word - start_word; + end_word += (end_page + PAGES_PER_WORD - 1ULL) / PAGES_PER_WORD; + end_word = std::min(end_word, num_words); + end_page += diff * PAGES_PER_WORD; + constexpr u64 base_mask{~0ULL}; + for (size_t word_index = start_word; word_index < end_word; word_index++) { + const u64 mask = ExtractBits(base_mask, start_page, end_page); + start_page = 0; + end_page -= PAGES_PER_WORD; + if constexpr (BOOL_BREAK) { + if (func(word_index, mask)) { + return; + } + } else { + func(word_index, mask); + } + } + } + + template + void IteratePages(u64 mask, Func&& func) const { + size_t offset = 0; + while (mask != 0) { + const size_t empty_bits = std::countr_zero(mask); + offset += empty_bits; + mask = mask >> empty_bits; + + const size_t continuous_bits = std::countr_one(mask); + func(offset, continuous_bits); + mask = continuous_bits < PAGES_PER_WORD ? (mask >> continuous_bits) : 0; + offset += continuous_bits; + } + } + + /** + * Change the state of a range of pages + * + * @param dirty_addr Base address to mark or unmark as modified + * @param size Size in bytes to mark or unmark as modified + */ + template + void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) { + std::span state_words = words.template Span(); + [[maybe_unused]] std::span untracked_words = words.template Span(); + IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) { + if constexpr (type == Type::CPU) { + NotifyPageTracker(index, untracked_words[index], mask); + } + if constexpr (enable) { + state_words[index] |= mask; + if constexpr (type == Type::CPU) { + untracked_words[index] |= mask; + } + } else { + state_words[index] &= ~mask; + if constexpr (type == Type::CPU) { + untracked_words[index] &= ~mask; + } + } + }); + } + + /** + * Loop over each page in the given range, turn off those bits and notify the tracker if + * needed. Call the given function on each turned off range. + * + * @param query_cpu_range Base CPU address to loop over + * @param size Size in bytes of the CPU range to loop over + * @param func Function to call for each turned off region + */ + template + void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { + static_assert(type != Type::Untracked); + + std::span state_words = words.template Span(); + [[maybe_unused]] std::span untracked_words = words.template Span(); + const size_t offset = query_cpu_range - cpu_addr; + bool pending = false; + size_t pending_offset{}; + size_t pending_pointer{}; + const auto release = [&]() { + func(cpu_addr + pending_offset * BYTES_PER_PAGE, + (pending_pointer - pending_offset) * BYTES_PER_PAGE); + }; + IterateWords(offset, size, [&](size_t index, u64 mask) { + if constexpr (type == Type::GPU) { + mask &= ~untracked_words[index]; + } + const u64 word = state_words[index] & mask; + if constexpr (clear) { + if constexpr (type == Type::CPU) { + NotifyPageTracker(index, untracked_words[index], mask); + } + state_words[index] &= ~mask; + if constexpr (type == Type::CPU) { + untracked_words[index] &= ~mask; + } + } + const size_t base_offset = index * PAGES_PER_WORD; + IteratePages(word, [&](size_t pages_offset, size_t pages_size) { + const auto reset = [&]() { + pending_offset = base_offset + pages_offset; + pending_pointer = base_offset + pages_offset + pages_size; + }; + if (!pending) { + reset(); + pending = true; + return; + } + if (pending_pointer == base_offset + pages_offset) { + pending_pointer += pages_size; + return; + } + release(); + reset(); + }); + }); + if (pending) { + release(); + } + } + + /** + * Returns true when a region has been modified + * + * @param offset Offset in bytes from the start of the buffer + * @param size Size in bytes of the region to query for modifications + */ + template + [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { + static_assert(type != Type::Untracked); + + const std::span state_words = words.template Span(); + [[maybe_unused]] const std::span untracked_words = + words.template Span(); + bool result = false; + IterateWords(offset, size, [&](size_t index, u64 mask) { + if constexpr (type == Type::GPU) { + mask &= ~untracked_words[index]; + } + const u64 word = state_words[index] & mask; + if (word != 0) { + result = true; + return true; + } + return false; + }); + return result; + } + + /// Returns the number of words of the manager + [[nodiscard]] size_t NumWords() const noexcept { + return words.NumWords(); + } + + /// Returns the size in bytes of the manager + [[nodiscard]] u64 SizeBytes() const noexcept { + return words.size_bytes; + } + + /// Returns true when the buffer fits in the small vector optimization + [[nodiscard]] bool IsShort() const noexcept { + return words.IsShort(); + } + +private: + template + u64* Array() noexcept { + if constexpr (type == Type::CPU) { + return words.cpu.Pointer(IsShort()); + } else if constexpr (type == Type::GPU) { + return words.gpu.Pointer(IsShort()); + } else if constexpr (type == Type::Untracked) { + return words.untracked.Pointer(IsShort()); + } + } + + template + const u64* Array() const noexcept { + if constexpr (type == Type::CPU) { + return words.cpu.Pointer(IsShort()); + } else if constexpr (type == Type::GPU) { + return words.gpu.Pointer(IsShort()); + } else if constexpr (type == Type::Untracked) { + return words.untracked.Pointer(IsShort()); + } + } + + /** + * Notify tracker about changes in the CPU tracking state of a word in the buffer + * + * @param word_index Index to the word to notify to the tracker + * @param current_bits Current state of the word + * @param new_bits New state of the word + * + * @tparam add_to_tracker True when the tracker should start tracking the new pages + */ + template + void NotifyPageTracker(u64 word_index, u64 current_bits, u64 new_bits) const { + u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits; + VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; + IteratePages(changed_bits, [&](size_t offset, size_t size) { + tracker->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, size * BYTES_PER_PAGE, + add_to_tracker ? 1 : -1); + }); + } + + PageManager* tracker; + VAddr cpu_addr = 0; + Words words; +}; + +} // namespace VideoCore diff --git a/src/video_core/host_shaders/detile_m32x1.comp b/src/video_core/host_shaders/detile_m32x1.comp index f3e84c753..fecea109b 100644 --- a/src/video_core/host_shaders/detile_m32x1.comp +++ b/src/video_core/host_shaders/detile_m32x1.comp @@ -8,10 +8,14 @@ layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; layout(std430, binding = 0) buffer input_buf { uint in_data[]; }; -layout(r32ui, binding = 1) uniform writeonly uimage2D output_img; +layout(std430, binding = 1) buffer output_buf { + uint out_data[]; +}; layout(push_constant) uniform image_info { + uint num_levels; uint pitch; + uint sizes[14]; } info; // Inverse morton LUT, small enough to fit into K$ @@ -31,20 +35,22 @@ uint rmort[16] = { #define TEXELS_PER_ELEMENT (1) void main() { + uint tile_base = gl_GlobalInvocationID.x - gl_LocalInvocationID.x; // WG*16 + uint p0 = in_data[gl_GlobalInvocationID.x]; uint bit_ofs = 8 * (gl_LocalInvocationID.x % 4); uint packed_pos = rmort[gl_LocalInvocationID.x >> 2] >> bit_ofs; uint col = bitfieldExtract(packed_pos, 4, 4); uint row = bitfieldExtract(packed_pos, 0, 4); - uint p0 = in_data[gl_GlobalInvocationID.x]; + uint mip = 0; + for (int m = 0; m < info.num_levels; ++m) { + mip += (gl_GlobalInvocationID.x * 4) >= info.sizes[m] ? 1 : 0; + } - uint tiles_per_pitch = info.pitch >> 3; // log2(MICRO_TILE_DIM) + uint tiles_per_pitch = max((info.pitch >> mip) / MICRO_TILE_DIM, 1); uint target_tile_x = gl_WorkGroupID.x % tiles_per_pitch; uint target_tile_y = gl_WorkGroupID.x / tiles_per_pitch; - - uint dw_ofs_x = target_tile_x * MICRO_TILE_DIM + TEXELS_PER_ELEMENT * col; - uint dw_ofs_y = target_tile_y * MICRO_TILE_DIM + row; - - ivec2 img_pos = ivec2(dw_ofs_x, dw_ofs_y); - imageStore(output_img, img_pos, uvec4(p0, 0, 0, 0)); -} \ No newline at end of file + uint dw_ofs_x = target_tile_x * MICRO_TILE_DIM + col; + uint dw_ofs_y = (target_tile_y * tiles_per_pitch * 64) + row * tiles_per_pitch * MICRO_TILE_DIM; + out_data[dw_ofs_x + dw_ofs_y] = p0; +} diff --git a/src/video_core/host_shaders/detile_m32x2.comp b/src/video_core/host_shaders/detile_m32x2.comp index 2853f8b78..c2caa62c2 100644 --- a/src/video_core/host_shaders/detile_m32x2.comp +++ b/src/video_core/host_shaders/detile_m32x2.comp @@ -8,10 +8,14 @@ layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; layout(std430, binding = 0) buffer input_buf { uint in_data[]; }; -layout(rg32ui, binding = 1) uniform writeonly uimage2D output_img; +layout(std430, binding = 1) buffer output_buf { + uint out_data[]; +}; layout(push_constant) uniform image_info { + uint num_levels; uint pitch; + uint sizes[14]; } info; // Inverse morton LUT, small enough to fit into K$ @@ -30,19 +34,25 @@ uint rmort[16] = { #define MICRO_TILE_DIM (8) void main() { + uint block_ofs = 2 * gl_GlobalInvocationID.x; + uint p0 = in_data[block_ofs + 0]; + uint p1 = in_data[block_ofs + 1]; + uint bit_ofs = 8 * (gl_LocalInvocationID.x % 4); uint packed_pos = rmort[gl_LocalInvocationID.x >> 2] >> bit_ofs; uint col = bitfieldExtract(packed_pos, 4, 4); uint row = bitfieldExtract(packed_pos, 0, 4); - uint block_ofs = 2 * gl_GlobalInvocationID.x; - uint p0 = in_data[block_ofs + 0]; - uint p1 = in_data[block_ofs + 1]; + uint mip = 0; + for (int m = 0; m < info.num_levels; ++m) { + mip += (gl_GlobalInvocationID.x * 8) >= info.sizes[m] ? 1 : 0; + } - uint tiles_per_pitch = (info.pitch >> 3) >> 2; // log2(MICRO_TILE_DIM) / 4 - ivec2 img_pos = MICRO_TILE_DIM * ivec2( - gl_WorkGroupID.x % tiles_per_pitch, - gl_WorkGroupID.x / tiles_per_pitch - ); - imageStore(output_img, img_pos + ivec2(col, row), uvec4(p0, p1, 0, 0)); -} \ No newline at end of file + uint tiles_per_pitch = max((info.pitch >> mip) / MICRO_TILE_DIM, 1) * 2; + uint target_tile_x = 2 * gl_WorkGroupID.x % tiles_per_pitch; + uint target_tile_y = 2 * gl_WorkGroupID.x / tiles_per_pitch; + uint dw_ofs_x = target_tile_x * MICRO_TILE_DIM + col * 2; + uint dw_ofs_y = (target_tile_y * tiles_per_pitch * 64) + row * tiles_per_pitch * MICRO_TILE_DIM; + out_data[dw_ofs_x + dw_ofs_y] = p0; + out_data[dw_ofs_x + dw_ofs_y + 1] = p1; +} diff --git a/src/video_core/host_shaders/detile_m32x4.comp b/src/video_core/host_shaders/detile_m32x4.comp index 64f34e6fa..113538706 100644 --- a/src/video_core/host_shaders/detile_m32x4.comp +++ b/src/video_core/host_shaders/detile_m32x4.comp @@ -8,10 +8,14 @@ layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; layout(std430, binding = 0) buffer input_buf { uint in_data[]; }; -layout(rgba32ui, binding = 1) uniform writeonly uimage2D output_img; +layout(std430, binding = 1) buffer output_buf { + uint out_data[]; +}; layout(push_constant) uniform image_info { + uint num_levels; uint pitch; + uint sizes[14]; } info; // Inverse morton LUT, small enough to fit into K$ @@ -30,21 +34,29 @@ uint rmort[16] = { #define MICRO_TILE_DIM (8) void main() { - uint bit_ofs = 8 * (gl_LocalInvocationID.x % 4); - uint packed_pos = rmort[gl_LocalInvocationID.x >> 2] >> bit_ofs; - uint col = bitfieldExtract(packed_pos, 4, 4); - uint row = bitfieldExtract(packed_pos, 0, 4); - uint block_ofs = 4 * gl_GlobalInvocationID.x; uint p0 = in_data[block_ofs + 0]; uint p1 = in_data[block_ofs + 1]; uint p2 = in_data[block_ofs + 2]; uint p3 = in_data[block_ofs + 3]; - uint tiles_per_pitch = (info.pitch >> 3) >> 2; // log2(MICRO_TILE_DIM) / 4 - ivec2 img_pos = MICRO_TILE_DIM * ivec2( - gl_WorkGroupID.x % tiles_per_pitch, - gl_WorkGroupID.x / tiles_per_pitch - ); - imageStore(output_img, img_pos + ivec2(col, row), uvec4(p0, p1, p2, p3)); -} \ No newline at end of file + uint bit_ofs = 8 * (gl_LocalInvocationID.x % 4); + uint packed_pos = rmort[gl_LocalInvocationID.x >> 2] >> bit_ofs; + uint col = bitfieldExtract(packed_pos, 4, 4); + uint row = bitfieldExtract(packed_pos, 0, 4); + + uint mip = 0; + for (int m = 0; m < info.num_levels; ++m) { + mip += (gl_GlobalInvocationID.x * 16) >= info.sizes[m] ? 1 : 0; + } + + uint tiles_per_pitch = max(((info.pitch >> mip) / MICRO_TILE_DIM), 1u) * 4; + uint target_tile_x = 4 * gl_WorkGroupID.x % tiles_per_pitch; + uint target_tile_y = 4 * gl_WorkGroupID.x / tiles_per_pitch; + uint dw_ofs_x = (target_tile_x * MICRO_TILE_DIM) + 4 * col; + uint dw_ofs_y = ((target_tile_y * tiles_per_pitch) * 64u) + ((row * tiles_per_pitch) * MICRO_TILE_DIM); + out_data[dw_ofs_x + dw_ofs_y] = p0; + out_data[dw_ofs_x + dw_ofs_y + 1] = p1; + out_data[dw_ofs_x + dw_ofs_y + 2] = p2; + out_data[dw_ofs_x + dw_ofs_y + 3] = p3; +} diff --git a/src/video_core/host_shaders/detile_m8x1.comp b/src/video_core/host_shaders/detile_m8x1.comp index b4d920e68..5ec48fae2 100644 --- a/src/video_core/host_shaders/detile_m8x1.comp +++ b/src/video_core/host_shaders/detile_m8x1.comp @@ -11,10 +11,14 @@ layout (local_size_x = 16, local_size_y = 1, local_size_z = 1) in; layout(std430, binding = 0) buffer input_buf { uint in_data[]; }; -layout(r8ui, binding = 1) uniform writeonly uimage2D output_img; +layout(std430, binding = 1) buffer output_buf { + uint out_data[]; +}; layout(push_constant) uniform image_info { + uint num_levels; uint pitch; + uint sizes[14]; } info; #define MICRO_TILE_DIM 8 @@ -32,17 +36,15 @@ void main() { uint row = (gl_LocalInvocationID.x % TEXELS_PER_ELEMENT) + TEXELS_PER_ELEMENT * (gl_LocalInvocationID.x >> 3); - uint tiles_per_pitch = info.pitch >> 3; // log2(MICRO_TILE_DIM) + uint mip = 0; + for (int m = 0; m < info.num_levels; ++m) { + mip += (gl_GlobalInvocationID.x * 4) >= info.sizes[m] ? 1 : 0; + } + + uint tiles_per_pitch = max((info.pitch >> mip) / 8, 1); uint target_tile_x = gl_WorkGroupID.x % tiles_per_pitch; uint target_tile_y = gl_WorkGroupID.x / tiles_per_pitch; - uint dw_ofs_x = target_tile_x * MICRO_TILE_DIM + TEXELS_PER_ELEMENT * col; - uint dw_ofs_y = target_tile_y * MICRO_TILE_DIM + row; - - ivec2 img_pos = ivec2(dw_ofs_x, dw_ofs_y); - - #pragma unroll - for (int ofs = 0; ofs < TEXELS_PER_ELEMENT; ++ofs) { - imageStore(output_img, img_pos + ivec2(ofs, 0), uvec4(dst_tx & 0xff)); - dst_tx >>= 8; - } + uint dw_ofs_x = target_tile_x * 2 + col; // 2 = uints + uint dw_ofs_y = (target_tile_y * MICRO_TILE_DIM + row) * tiles_per_pitch * 2; // 2 = uints + out_data[dw_ofs_x + dw_ofs_y] = dst_tx; } \ No newline at end of file diff --git a/src/video_core/host_shaders/detile_m8x2.comp b/src/video_core/host_shaders/detile_m8x2.comp index 1cebc12b3..d27bc6e2d 100644 --- a/src/video_core/host_shaders/detile_m8x2.comp +++ b/src/video_core/host_shaders/detile_m8x2.comp @@ -10,10 +10,14 @@ layout (local_size_x = 32, local_size_y = 1, local_size_z = 1) in; layout(std430, binding = 0) buffer input_buf { uint in_data[]; }; -layout(rg8ui, binding = 1) uniform writeonly uimage2D output_img; +layout(std430, binding = 1) buffer output_buf { + uint out_data[]; +}; layout(push_constant) uniform image_info { + uint num_levels; uint pitch; + uint sizes[14]; } info; #define MICRO_TILE_DIM 8 @@ -44,18 +48,14 @@ void main() { uint col = bitfieldExtract(packed_pos, 4, 4); uint row = bitfieldExtract(packed_pos, 0, 4); - uint tiles_per_pitch = info.pitch >> 3; // log2(MICRO_TILE_DIM) + uint mip = 0u; + for (int m = 0; m < info.num_levels; ++m) { + mip += (gl_GlobalInvocationID.x * 4) >= info.sizes[m] ? 1 : 0; + } + uint tiles_per_pitch = max(((info.pitch >> mip) / 8u), 1u); uint target_tile_x = gl_WorkGroupID.x % tiles_per_pitch; uint target_tile_y = gl_WorkGroupID.x / tiles_per_pitch; - uint dw_ofs_x = target_tile_x * MICRO_TILE_DIM + col; - uint dw_ofs_y = target_tile_y * MICRO_TILE_DIM + row; - - ivec2 img_pos = ivec2(dw_ofs_x, dw_ofs_y); - - #pragma unroll - for (int ofs = 0; ofs < TEXELS_PER_ELEMENT; ++ofs) { - uint p0 = (p[ofs] >> 8) & 0xff; - uint p1 = p[ofs] & 0xff; - imageStore(output_img, img_pos + ivec2(ofs, 0), uvec4(p1, p0, 0, 0)); - } + uint dw_ofs_x = target_tile_x * 8 + col; + uint dw_ofs_y = (target_tile_y * tiles_per_pitch * 64) + row * tiles_per_pitch * 8; + out_data[(dw_ofs_x + dw_ofs_y) / 2] = src_tx; } diff --git a/src/video_core/multi_level_page_table.h b/src/video_core/multi_level_page_table.h new file mode 100644 index 000000000..527476f3b --- /dev/null +++ b/src/video_core/multi_level_page_table.h @@ -0,0 +1,65 @@ +// SPDX-FileCopyrightText: 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include + +#include "common/object_pool.h" +#include "common/types.h" + +namespace VideoCore { + +template +class MultiLevelPageTable final { + using Entry = typename Traits::Entry; + + static constexpr size_t AddressSpaceBits = Traits::AddressSpaceBits; + static constexpr size_t FirstLevelBits = Traits::FirstLevelBits; + static constexpr size_t PageBits = Traits::PageBits; + static constexpr size_t FirstLevelShift = AddressSpaceBits - FirstLevelBits; + static constexpr size_t SecondLevelBits = FirstLevelShift - PageBits; + static constexpr size_t NumEntriesPerL1Page = 1ULL << SecondLevelBits; + + using L1Page = std::array; + +public: + explicit MultiLevelPageTable() : first_level_map{1ULL << FirstLevelBits, nullptr} {} + + ~MultiLevelPageTable() noexcept = default; + + [[nodiscard]] Entry* find(size_t page) { + const size_t l1_page = page >> SecondLevelBits; + const size_t l2_page = page & (NumEntriesPerL1Page - 1); + if (!first_level_map[l1_page]) { + return nullptr; + } + return &(*first_level_map[l1_page])[l2_page]; + } + + [[nodiscard]] const Entry& operator[](size_t page) const { + const size_t l1_page = page >> SecondLevelBits; + const size_t l2_page = page & (NumEntriesPerL1Page - 1); + if (!first_level_map[l1_page]) { + first_level_map[l1_page] = page_alloc.Create(); + } + return (*first_level_map[l1_page])[l2_page]; + } + + [[nodiscard]] Entry& operator[](size_t page) { + const size_t l1_page = page >> SecondLevelBits; + const size_t l2_page = page & (NumEntriesPerL1Page - 1); + if (!first_level_map[l1_page]) { + first_level_map[l1_page] = page_alloc.Create(); + } + return (*first_level_map[l1_page])[l2_page]; + } + +private: + std::vector first_level_map{}; + Common::ObjectPool page_alloc; +}; + +} // namespace VideoCore diff --git a/src/video_core/page_manager.cpp b/src/video_core/page_manager.cpp new file mode 100644 index 000000000..6225f11ba --- /dev/null +++ b/src/video_core/page_manager.cpp @@ -0,0 +1,260 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include "common/alignment.h" +#include "common/assert.h" +#include "common/error.h" +#include "video_core/page_manager.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" + +#ifndef _WIN64 +#include +#include +#include +#include +#include +#ifdef ENABLE_USERFAULTFD +#include +#endif +#else +#include +#endif + +namespace VideoCore { + +constexpr size_t PAGESIZE = 4_KB; +constexpr size_t PAGEBITS = 12; + +#ifdef _WIN64 +struct PageManager::Impl { + Impl(Vulkan::Rasterizer* rasterizer_) { + rasterizer = rasterizer_; + + veh_handle = AddVectoredExceptionHandler(0, GuestFaultSignalHandler); + ASSERT_MSG(veh_handle, "Failed to register an exception handler"); + } + + void OnMap(VAddr address, size_t size) {} + + void OnUnmap(VAddr address, size_t size) {} + + void Protect(VAddr address, size_t size, bool allow_write) { + DWORD prot = allow_write ? PAGE_READWRITE : PAGE_READONLY; + DWORD old_prot{}; + BOOL result = VirtualProtect(std::bit_cast(address), size, prot, &old_prot); + ASSERT_MSG(result != 0, "Region protection failed"); + } + + static LONG WINAPI GuestFaultSignalHandler(EXCEPTION_POINTERS* pExp) noexcept { + const u32 ec = pExp->ExceptionRecord->ExceptionCode; + if (ec == EXCEPTION_ACCESS_VIOLATION) { + const auto info = pExp->ExceptionRecord->ExceptionInformation; + if (info[0] == 1) { // Write violation + rasterizer->InvalidateMemory(info[1], sizeof(u64)); + return EXCEPTION_CONTINUE_EXECUTION; + } /* else { + UNREACHABLE(); + }*/ + } + return EXCEPTION_CONTINUE_SEARCH; // pass further + } + + inline static Vulkan::Rasterizer* rasterizer; + void* veh_handle{}; +}; +#elif ENABLE_USERFAULTFD +struct PageManager::Impl { + Impl(Vulkan::Rasterizer* rasterizer_) : rasterizer{rasterizer_} { + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + ASSERT_MSG(uffd != -1, "{}", Common::GetLastErrorMsg()); + + // Request uffdio features from kernel. + uffdio_api api; + api.api = UFFD_API; + api.features = UFFD_FEATURE_THREAD_ID; + const int ret = ioctl(uffd, UFFDIO_API, &api); + ASSERT(ret == 0 && api.api == UFFD_API); + + // Create uffd handler thread + ufd_thread = std::jthread([&](std::stop_token token) { UffdHandler(token); }); + } + + void OnMap(VAddr address, size_t size) { + uffdio_register reg; + reg.range.start = address; + reg.range.len = size; + reg.mode = UFFDIO_REGISTER_MODE_WP; + const int ret = ioctl(uffd, UFFDIO_REGISTER, ®); + ASSERT_MSG(ret != -1, "Uffdio register failed"); + } + + void OnUnmap(VAddr address, size_t size) { + uffdio_range range; + range.start = address; + range.len = size; + const int ret = ioctl(uffd, UFFDIO_UNREGISTER, &range); + ASSERT_MSG(ret != -1, "Uffdio unregister failed"); + } + + void Protect(VAddr address, size_t size, bool allow_write) { + uffdio_writeprotect wp; + wp.range.start = address; + wp.range.len = size; + wp.mode = allow_write ? 0 : UFFDIO_WRITEPROTECT_MODE_WP; + const int ret = ioctl(uffd, UFFDIO_WRITEPROTECT, &wp); + ASSERT_MSG(ret != -1, "Uffdio writeprotect failed with error: {}", + Common::GetLastErrorMsg()); + } + + void UffdHandler(std::stop_token token) { + while (!token.stop_requested()) { + pollfd pollfd; + pollfd.fd = uffd; + pollfd.events = POLLIN; + + // Block until the descriptor is ready for data reads. + const int pollres = poll(&pollfd, 1, -1); + switch (pollres) { + case -1: + perror("Poll userfaultfd"); + continue; + break; + case 0: + continue; + case 1: + break; + default: + UNREACHABLE_MSG("Unexpected number of descriptors {} out of poll", pollres); + } + + // We don't want an error condition to have occured. + ASSERT_MSG(!(pollfd.revents & POLLERR), "POLLERR on userfaultfd"); + + // We waited until there is data to read, we don't care about anything else. + if (!(pollfd.revents & POLLIN)) { + continue; + } + + // Read message from kernel. + uffd_msg msg; + const int readret = read(uffd, &msg, sizeof(msg)); + ASSERT_MSG(readret != -1 || errno == EAGAIN, "Unexpected result of uffd read"); + if (errno == EAGAIN) { + continue; + } + ASSERT_MSG(readret == sizeof(msg), "Unexpected short read, exiting"); + ASSERT(msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP); + + // Notify rasterizer about the fault. + const VAddr addr = msg.arg.pagefault.address; + const VAddr addr_page = Common::AlignDown(addr, PAGESIZE); + rasterizer->InvalidateMemory(addr_page, PAGESIZE); + } + } + + Vulkan::Rasterizer* rasterizer; + std::jthread ufd_thread; + int uffd; +}; +#else +struct PageManager::Impl { + Impl(Vulkan::Rasterizer* rasterizer_) { + rasterizer = rasterizer_; + +#ifdef __APPLE__ + // Read-only memory write results in SIGBUS on Apple. + static constexpr int SignalType = SIGBUS; +#else + static constexpr int SignalType = SIGSEGV; +#endif + sigset_t signal_mask; + sigemptyset(&signal_mask); + sigaddset(&signal_mask, SignalType); + + using HandlerType = decltype(sigaction::sa_sigaction); + + struct sigaction guest_access_fault {}; + guest_access_fault.sa_flags = SA_SIGINFO | SA_ONSTACK; + guest_access_fault.sa_sigaction = &GuestFaultSignalHandler; + guest_access_fault.sa_mask = signal_mask; + sigaction(SignalType, &guest_access_fault, nullptr); + } + + void OnMap(VAddr address, size_t size) {} + + void OnUnmap(VAddr address, size_t size) {} + + void Protect(VAddr address, size_t size, bool allow_write) { + mprotect(reinterpret_cast(address), size, + PROT_READ | (allow_write ? PROT_WRITE : 0)); + } + + static void GuestFaultSignalHandler(int sig, siginfo_t* info, void* raw_context) { + ucontext_t* ctx = reinterpret_cast(raw_context); + const VAddr address = reinterpret_cast(info->si_addr); +#ifdef __APPLE__ + const u32 err = ctx->uc_mcontext->__es.__err; +#else + const greg_t err = ctx->uc_mcontext.gregs[REG_ERR]; +#endif + if (err & 0x2) { + rasterizer->InvalidateMemory(address, sizeof(u64)); + } else { + // Read not supported! + UNREACHABLE(); + } + } + + inline static Vulkan::Rasterizer* rasterizer; +}; +#endif + +PageManager::PageManager(Vulkan::Rasterizer* rasterizer_) + : impl{std::make_unique(rasterizer_)}, rasterizer{rasterizer_} {} + +PageManager::~PageManager() = default; + +void PageManager::OnGpuMap(VAddr address, size_t size) { + impl->OnMap(address, size); +} + +void PageManager::OnGpuUnmap(VAddr address, size_t size) { + impl->OnUnmap(address, size); +} + +void PageManager::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) { + static constexpr u64 PageShift = 12; + + std::scoped_lock lk{mutex}; + const u64 num_pages = ((addr + size - 1) >> PageShift) - (addr >> PageShift) + 1; + const u64 page_start = addr >> PageShift; + const u64 page_end = page_start + num_pages; + + const auto pages_interval = + decltype(cached_pages)::interval_type::right_open(page_start, page_end); + if (delta > 0) { + cached_pages.add({pages_interval, delta}); + } + + const auto& range = cached_pages.equal_range(pages_interval); + for (const auto& [range, count] : boost::make_iterator_range(range)) { + const auto interval = range & pages_interval; + const VAddr interval_start_addr = boost::icl::first(interval) << PageShift; + const VAddr interval_end_addr = boost::icl::last_next(interval) << PageShift; + const u32 interval_size = interval_end_addr - interval_start_addr; + if (delta > 0 && count == delta) { + impl->Protect(interval_start_addr, interval_size, false); + } else if (delta < 0 && count == -delta) { + impl->Protect(interval_start_addr, interval_size, true); + } else { + ASSERT(count >= 0); + } + } + + if (delta < 0) { + cached_pages.add({pages_interval, delta}); + } +} + +} // namespace VideoCore diff --git a/src/video_core/page_manager.h b/src/video_core/page_manager.h new file mode 100644 index 000000000..0dc022aa5 --- /dev/null +++ b/src/video_core/page_manager.h @@ -0,0 +1,39 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include "common/types.h" + +namespace Vulkan { +class Rasterizer; +} + +namespace VideoCore { + +class PageManager { +public: + explicit PageManager(Vulkan::Rasterizer* rasterizer); + ~PageManager(); + + /// Register a range of mapped gpu memory. + void OnGpuMap(VAddr address, size_t size); + + /// Unregister a range of gpu memory that was unmapped. + void OnGpuUnmap(VAddr address, size_t size); + + /// Increase/decrease the number of surface in pages touching the specified region + void UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta); + +private: + struct Impl; + std::unique_ptr impl; + Vulkan::Rasterizer* rasterizer; + std::mutex mutex; + boost::icl::interval_map cached_pages; +}; + +} // namespace VideoCore diff --git a/src/video_core/renderdoc.cpp b/src/video_core/renderdoc.cpp new file mode 100644 index 000000000..7f88e1264 --- /dev/null +++ b/src/video_core/renderdoc.cpp @@ -0,0 +1,120 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/assert.h" +#include "common/config.h" +#include "video_core/renderdoc.h" + +#include + +#ifdef _WIN32 +#include +#else +#include +#endif + +#include + +namespace VideoCore { + +enum class CaptureState { + Idle, + Triggered, + InProgress, +}; +static CaptureState capture_state{CaptureState::Idle}; + +RENDERDOC_API_1_6_0* rdoc_api{}; + +void LoadRenderDoc() { +#ifdef WIN32 + + // Check if we are running by RDoc GUI + HMODULE mod = GetModuleHandleA("renderdoc.dll"); + if (!mod && Config::isRdocEnabled()) { + // If enabled in config, try to load RDoc runtime in offline mode + HKEY h_reg_key; + LONG result = RegOpenKeyExW(HKEY_LOCAL_MACHINE, + L"SOFTWARE\\Classes\\RenderDoc.RDCCapture.1\\DefaultIcon\\", 0, + KEY_READ, &h_reg_key); + if (result != ERROR_SUCCESS) { + return; + } + std::array key_str{}; + DWORD str_sz_out{key_str.size()}; + result = RegQueryValueExW(h_reg_key, L"", 0, NULL, (LPBYTE)key_str.data(), &str_sz_out); + if (result != ERROR_SUCCESS) { + return; + } + + std::filesystem::path path{key_str.cbegin(), key_str.cend()}; + path = path.parent_path().append("renderdoc.dll"); + const auto path_to_lib = path.generic_string(); + mod = LoadLibraryA(path_to_lib.c_str()); + } + + if (mod) { + const auto RENDERDOC_GetAPI = + reinterpret_cast(GetProcAddress(mod, "RENDERDOC_GetAPI")); + const s32 ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void**)&rdoc_api); + ASSERT(ret == 1); + } +#else +#ifdef ANDROID + static constexpr const char RENDERDOC_LIB[] = "libVkLayer_GLES_RenderDoc.so"; +#else + static constexpr const char RENDERDOC_LIB[] = "librenderdoc.so"; +#endif + if (void* mod = dlopen(RENDERDOC_LIB, RTLD_NOW | RTLD_NOLOAD)) { + const auto RENDERDOC_GetAPI = + reinterpret_cast(dlsym(mod, "RENDERDOC_GetAPI")); + const s32 ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void**)&rdoc_api); + ASSERT(ret == 1); + } +#endif + if (rdoc_api) { + // Disable default capture keys as they suppose to trigger present-to-present capturing + // and it is not what we want + rdoc_api->SetCaptureKeys(nullptr, 0); + + // Also remove rdoc crash handler + rdoc_api->UnloadCrashHandler(); + } +} + +void StartCapture() { + if (!rdoc_api) { + return; + } + + if (capture_state == CaptureState::Triggered) { + rdoc_api->StartFrameCapture(nullptr, nullptr); + capture_state = CaptureState::InProgress; + } +} + +void EndCapture() { + if (!rdoc_api) { + return; + } + + if (capture_state == CaptureState::InProgress) { + rdoc_api->EndFrameCapture(nullptr, nullptr); + capture_state = CaptureState::Idle; + } +} + +void TriggerCapture() { + if (capture_state == CaptureState::Idle) { + capture_state = CaptureState::Triggered; + } +} + +void SetOutputDir(const std::string& path, const std::string& prefix) { + if (!rdoc_api) { + return; + } + rdoc_api->SetCaptureFilePathTemplate((path + '\\' + prefix).c_str()); +} + +} // namespace VideoCore diff --git a/src/video_core/renderdoc.h b/src/video_core/renderdoc.h new file mode 100644 index 000000000..febf6fbc1 --- /dev/null +++ b/src/video_core/renderdoc.h @@ -0,0 +1,25 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +namespace VideoCore { + +/// Loads renderdoc dynamic library module. +void LoadRenderDoc(); + +/// Begins a capture if a renderdoc instance is attached. +void StartCapture(); + +/// Ends current renderdoc capture. +void EndCapture(); + +/// Triggers capturing process. +void TriggerCapture(); + +/// Sets output directory for captures +void SetOutputDir(const std::string& path, const std::string& prefix); + +} // namespace VideoCore diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 90d974040..4fc32ab28 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -81,6 +81,8 @@ vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type) { return vk::PrimitiveTopology::eTriangleListWithAdjacency; case Liverpool::PrimitiveType::AdjTriangleStrip: return vk::PrimitiveTopology::eTriangleStripWithAdjacency; + case Liverpool::PrimitiveType::PatchPrimitive: + return vk::PrimitiveTopology::ePatchList; case Liverpool::PrimitiveType::QuadList: // Needs to generate index buffer on the fly. return vk::PrimitiveTopology::eTriangleList; @@ -297,6 +299,7 @@ std::span GetAllFormats() { vk::Format::eBc3UnormBlock, vk::Format::eBc4UnormBlock, vk::Format::eBc5UnormBlock, + vk::Format::eBc5SnormBlock, vk::Format::eBc7SrgbBlock, vk::Format::eBc7UnormBlock, vk::Format::eD16Unorm, @@ -308,6 +311,7 @@ std::span GetAllFormats() { vk::Format::eR8G8B8A8Srgb, vk::Format::eR8G8B8A8Uint, vk::Format::eR8G8B8A8Unorm, + vk::Format::eR8G8B8A8Snorm, vk::Format::eR8G8B8A8Uscaled, vk::Format::eR8G8Snorm, vk::Format::eR8G8Uint, @@ -315,6 +319,7 @@ std::span GetAllFormats() { vk::Format::eR8Sint, vk::Format::eR8Uint, vk::Format::eR8Unorm, + vk::Format::eR8Srgb, vk::Format::eR16G16B16A16Sfloat, vk::Format::eR16G16B16A16Sint, vk::Format::eR16G16B16A16Snorm, @@ -335,6 +340,12 @@ std::span GetAllFormats() { vk::Format::eR32Sfloat, vk::Format::eR32Sint, vk::Format::eR32Uint, + vk::Format::eBc6HUfloatBlock, + vk::Format::eBc6HSfloatBlock, + vk::Format::eR16G16Unorm, + vk::Format::eR16G16B16A16Sscaled, + vk::Format::eR16G16Sscaled, + vk::Format::eE5B9G9R9UfloatPack32, }; return formats; } @@ -384,17 +395,24 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu if (data_format == AmdGpu::DataFormat::FormatBc5 && num_format == AmdGpu::NumberFormat::Unorm) { return vk::Format::eBc5UnormBlock; } + if (data_format == AmdGpu::DataFormat::FormatBc5 && num_format == AmdGpu::NumberFormat::Snorm) { + return vk::Format::eBc5SnormBlock; + } if (data_format == AmdGpu::DataFormat::Format16_16_16_16 && num_format == AmdGpu::NumberFormat::Sint) { return vk::Format::eR16G16B16A16Sint; } + if (data_format == AmdGpu::DataFormat::Format16_16_16_16 && + num_format == AmdGpu::NumberFormat::Sscaled) { + return vk::Format::eR16G16B16A16Sscaled; + } if (data_format == AmdGpu::DataFormat::Format16_16 && num_format == AmdGpu::NumberFormat::Float) { return vk::Format::eR16G16Sfloat; } - if (data_format == AmdGpu::DataFormat::Format10_11_11 && - num_format == AmdGpu::NumberFormat::Float) { - return vk::Format::eB10G11R11UfloatPack32; + if (data_format == AmdGpu::DataFormat::Format16_16 && + num_format == AmdGpu::NumberFormat::Unorm) { + return vk::Format::eR16G16Unorm; } if (data_format == AmdGpu::DataFormat::Format2_10_10_10 && num_format == AmdGpu::NumberFormat::Unorm) { @@ -492,6 +510,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu num_format == AmdGpu::NumberFormat::Sint) { return vk::Format::eR16G16Sint; } + if (data_format == AmdGpu::DataFormat::Format16_16 && + num_format == AmdGpu::NumberFormat::Sscaled) { + return vk::Format::eR16G16Sscaled; + } if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Uscaled) { return vk::Format::eR8G8B8A8Uscaled; @@ -514,6 +536,37 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu num_format == AmdGpu::NumberFormat::SnormNz) { return vk::Format::eR16G16B16A16Snorm; } + if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && + num_format == AmdGpu::NumberFormat::Snorm) { + return vk::Format::eR8G8B8A8Snorm; + } + if (data_format == AmdGpu::DataFormat::FormatBc6 && num_format == AmdGpu::NumberFormat::Unorm) { + return vk::Format::eBc6HUfloatBlock; + } + if (data_format == AmdGpu::DataFormat::FormatBc6 && num_format == AmdGpu::NumberFormat::Snorm) { + return vk::Format::eBc6HSfloatBlock; + } + if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && + num_format == AmdGpu::NumberFormat::Sint) { + return vk::Format::eR8G8B8A8Sint; + } + if (data_format == AmdGpu::DataFormat::Format8 && num_format == AmdGpu::NumberFormat::Srgb) { + return vk::Format::eR8Srgb; + } + if (data_format == AmdGpu::DataFormat::Format11_11_10 && + num_format == AmdGpu::NumberFormat::Float) { + return vk::Format::eB10G11R11UfloatPack32; + } + if (data_format == AmdGpu::DataFormat::Format16 && num_format == AmdGpu::NumberFormat::Uint) { + return vk::Format::eR16Uint; + } + if (data_format == AmdGpu::DataFormat::Format5_9_9_9 && + num_format == AmdGpu::NumberFormat::Float) { + return vk::Format::eE5B9G9R9UfloatPack32; + } + if (data_format == AmdGpu::DataFormat::Format8 && num_format == AmdGpu::NumberFormat::Snorm) { + return vk::Format::eR8Snorm; + } UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format)); } @@ -645,6 +698,8 @@ vk::SampleCountFlagBits NumSamples(u32 num_samples) { return vk::SampleCountFlagBits::e4; case 8: return vk::SampleCountFlagBits::e8; + case 16: + return vk::SampleCountFlagBits::e16; default: UNREACHABLE(); } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 098f14d9b..c78d629e4 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -63,44 +63,30 @@ bool CanBlitToSwapchain(const vk::PhysicalDevice physical_device, vk::Format for }; } -RendererVulkan::RendererVulkan(Frontend::WindowSDL& window_, AmdGpu::Liverpool* liverpool) - : window{window_}, instance{window, Config::getGpuId(), Config::vkValidationEnabled()}, - scheduler{instance}, swapchain{instance, window}, texture_cache{instance, scheduler} { - rasterizer = std::make_unique(instance, scheduler, texture_cache, liverpool); +RendererVulkan::RendererVulkan(Frontend::WindowSDL& window_, AmdGpu::Liverpool* liverpool_) + : window{window_}, liverpool{liverpool_}, + instance{window, Config::getGpuId(), Config::vkValidationEnabled()}, draw_scheduler{instance}, + present_scheduler{instance}, flip_scheduler{instance}, swapchain{instance, window}, + rasterizer{std::make_unique(instance, draw_scheduler, liverpool)}, + texture_cache{rasterizer->GetTextureCache()} { const u32 num_images = swapchain.GetImageCount(); const vk::Device device = instance.GetDevice(); - const vk::CommandPoolCreateInfo pool_info = { - .flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer | - vk::CommandPoolCreateFlagBits::eTransient, - .queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex(), - }; - command_pool = device.createCommandPoolUnique(pool_info); - - const vk::CommandBufferAllocateInfo alloc_info = { - .commandPool = *command_pool, - .level = vk::CommandBufferLevel::ePrimary, - .commandBufferCount = num_images, - }; - - const auto cmdbuffers = device.allocateCommandBuffers(alloc_info); + // Create presentation frames. present_frames.resize(num_images); for (u32 i = 0; i < num_images; i++) { Frame& frame = present_frames[i]; - frame.cmdbuf = cmdbuffers[i]; - frame.render_ready = device.createSemaphore({}); frame.present_done = device.createFence({.flags = vk::FenceCreateFlagBits::eSignaled}); free_queue.push(&frame); } } RendererVulkan::~RendererVulkan() { - scheduler.Finish(); + draw_scheduler.Finish(); const vk::Device device = instance.GetDevice(); for (auto& frame : present_frames) { vmaDestroyImage(instance.GetAllocator(), frame.image, frame.allocation); device.destroyImageView(frame.image_view); - device.destroySemaphore(frame.render_ready); device.destroyFence(frame.present_done); } } @@ -184,7 +170,7 @@ bool RendererVulkan::ShowSplash(Frame* frame /*= nullptr*/) { info.pitch = splash->GetImageInfo().width; info.guest_address = VAddr(splash->GetImageData().data()); info.guest_size_bytes = splash->GetImageData().size(); - splash_img.emplace(instance, scheduler, info); + splash_img.emplace(instance, present_scheduler, info); texture_cache.RefreshImage(*splash_img); } frame = PrepareFrameInternal(*splash_img); @@ -193,12 +179,18 @@ bool RendererVulkan::ShowSplash(Frame* frame /*= nullptr*/) { return true; } -Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) { +Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image, bool is_eop) { // Request a free presentation frame. Frame* frame = GetRenderFrame(); - // Post-processing (Anti-aliasing, FSR etc) goes here. For now just blit to the frame image. - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead); + // EOP flips are triggered from GPU thread so use the drawing scheduler to record + // commands. Otherwise we are dealing with a CPU flip which could have arrived + // from any guest thread. Use a separate scheduler for that. + auto& scheduler = is_eop ? draw_scheduler : flip_scheduler; + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + + image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead, cmdbuf); const std::array pre_barrier{ vk::ImageMemoryBarrier{ @@ -218,12 +210,11 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) { }, }, }; - - const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier); + // Post-processing (Anti-aliasing, FSR etc) goes here. For now just blit to the frame image. cmdbuf.blitImage( image.image, image.layout, frame->image, vk::ImageLayout::eTransferDstOptimal, MakeImageBlit(image.info.size.width, image.info.size.height, frame->width, frame->height), @@ -245,13 +236,15 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) { .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eAllCommands, vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier); - // Flush pending vulkan operations. - scheduler.Flush(frame->render_ready); + // Flush frame creation commands. + frame->ready_semaphore = scheduler.GetMasterSemaphore()->Handle(); + frame->ready_tick = scheduler.CurrentTick(); + SubmitInfo info{}; + scheduler.Flush(info); return frame; } @@ -260,11 +253,8 @@ void RendererVulkan::Present(Frame* frame) { const vk::Image swapchain_image = swapchain.Image(); - const vk::CommandBufferBeginInfo begin_info = { - .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit, - }; - const vk::CommandBuffer cmdbuf = frame->cmdbuf; - cmdbuf.begin(begin_info); + auto& scheduler = present_scheduler; + const auto cmdbuf = scheduler.CommandBuffer(); { auto* profiler_ctx = instance.GetProfilerContext(); TracyVkNamedZoneC(profiler_ctx, renderer_gpu_zone, cmdbuf, "Host frame", @@ -339,35 +329,17 @@ void RendererVulkan::Present(Frame* frame) { TracyVkCollect(profiler_ctx, cmdbuf); } } - cmdbuf.end(); - static constexpr std::array wait_stage_masks = { - vk::PipelineStageFlagBits::eColorAttachmentOutput, - vk::PipelineStageFlagBits::eAllGraphics, - }; - - const vk::Semaphore present_ready = swapchain.GetPresentReadySemaphore(); - const vk::Semaphore image_acquired = swapchain.GetImageAcquiredSemaphore(); - const std::array wait_semaphores = {image_acquired, frame->render_ready}; - - vk::SubmitInfo submit_info = { - .waitSemaphoreCount = static_cast(wait_semaphores.size()), - .pWaitSemaphores = wait_semaphores.data(), - .pWaitDstStageMask = wait_stage_masks.data(), - .commandBufferCount = 1u, - .pCommandBuffers = &cmdbuf, - .signalSemaphoreCount = 1, - .pSignalSemaphores = &present_ready, - }; - - std::scoped_lock submit_lock{scheduler.submit_mutex}; - try { - instance.GetGraphicsQueue().submit(submit_info, frame->present_done); - } catch (vk::DeviceLostError& err) { - LOG_CRITICAL(Render_Vulkan, "Device lost during present submit: {}", err.what()); - UNREACHABLE(); - } + // Flush vulkan commands. + SubmitInfo info{}; + info.AddWait(swapchain.GetImageAcquiredSemaphore()); + info.AddWait(frame->ready_semaphore, frame->ready_tick); + info.AddSignal(swapchain.GetPresentReadySemaphore()); + info.AddSignal(frame->present_done); + scheduler.Flush(info); + // Present to swapchain. + std::scoped_lock submit_lock{Scheduler::submit_mutex}; swapchain.Present(); // Free the frame for reuse diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 701d3d14b..eab9d527c 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -26,9 +26,15 @@ struct Frame { VmaAllocation allocation; vk::Image image; vk::ImageView image_view; - vk::Semaphore render_ready; vk::Fence present_done; - vk::CommandBuffer cmdbuf; + vk::Semaphore ready_semaphore; + u64 ready_tick; +}; + +enum SchedulerType { + Draw, + Present, + CpuFlip, }; class Rasterizer; @@ -39,47 +45,56 @@ public: ~RendererVulkan(); Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, - VAddr cpu_address) { + VAddr cpu_address, bool is_eop) { const auto info = VideoCore::ImageInfo{attribute, cpu_address}; - const auto image_id = texture_cache.FindImage(info, cpu_address); + const auto image_id = texture_cache.FindImage(info); + texture_cache.UpdateImage(image_id, is_eop ? nullptr : &flip_scheduler); auto& image = texture_cache.GetImage(image_id); - return PrepareFrameInternal(image); + return PrepareFrameInternal(image, is_eop); } - Frame* PrepareBlankFrame() { + Frame* PrepareBlankFrame(bool is_eop) { auto& image = texture_cache.GetImage(VideoCore::NULL_IMAGE_ID); - return PrepareFrameInternal(image); + return PrepareFrameInternal(image, is_eop); } VideoCore::Image& RegisterVideoOutSurface( const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) { vo_buffers_addr.emplace_back(cpu_address); const auto info = VideoCore::ImageInfo{attribute, cpu_address}; - const auto image_id = texture_cache.FindImage(info, cpu_address); + const auto image_id = texture_cache.FindImage(info); return texture_cache.GetImage(image_id); } bool IsVideoOutSurface(const AmdGpu::Liverpool::ColorBuffer& color_buffer) { - return std::find_if(vo_buffers_addr.cbegin(), vo_buffers_addr.cend(), [&](VAddr vo_buffer) { + return std::ranges::find_if(vo_buffers_addr, [&](VAddr vo_buffer) { return vo_buffer == color_buffer.Address(); - }) != vo_buffers_addr.cend(); + }) != vo_buffers_addr.end(); } bool ShowSplash(Frame* frame = nullptr); void Present(Frame* frame); void RecreateFrame(Frame* frame, u32 width, u32 height); + void FlushDraw() { + SubmitInfo info{}; + draw_scheduler.Flush(info); + } + private: - Frame* PrepareFrameInternal(VideoCore::Image& image); + Frame* PrepareFrameInternal(VideoCore::Image& image, bool is_eop = true); Frame* GetRenderFrame(); private: Frontend::WindowSDL& window; + AmdGpu::Liverpool* liverpool; Instance instance; - Scheduler scheduler; + Scheduler draw_scheduler; + Scheduler present_scheduler; + Scheduler flip_scheduler; Swapchain swapchain; std::unique_ptr rasterizer; - VideoCore::TextureCache texture_cache; + VideoCore::TextureCache& texture_cache; vk::UniqueCommandPool command_pool; std::vector present_frames; std::queue free_queue; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 954adf448..62b50eeb1 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -3,11 +3,10 @@ #include #include "common/alignment.h" -#include "core/memory.h" +#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/texture_cache/texture_cache.h" namespace Vulkan { @@ -51,6 +50,12 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler }); } + const vk::PushConstantRange push_constants = { + .stageFlags = vk::ShaderStageFlagBits::eCompute, + .offset = 0, + .size = sizeof(Shader::PushData), + }; + const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = { .flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR, .bindingCount = static_cast(bindings.size()), @@ -62,8 +67,8 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler const vk::PipelineLayoutCreateInfo layout_info = { .setLayoutCount = 1U, .pSetLayouts = &set_layout, - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, + .pushConstantRangeCount = 1U, + .pPushConstantRanges = &push_constants, }; pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info); @@ -82,33 +87,18 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler ComputePipeline::~ComputePipeline() = default; -bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging, +bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, VideoCore::TextureCache& texture_cache) const { // Bind resource buffers and textures. boost::container::static_vector buffer_infos; boost::container::static_vector image_infos; boost::container::small_vector set_writes; + Shader::PushData push_data{}; u32 binding{}; for (const auto& buffer : info.buffers) { const auto vsharp = buffer.GetVsharp(info); - const u32 size = vsharp.GetSize(); const VAddr address = vsharp.base_address; - texture_cache.OnCpuWrite(address); - const u32 offset = staging.Copy(address, size, - buffer.is_storage ? instance.StorageMinAlignment() - : instance.UniformMinAlignment()); - buffer_infos.emplace_back(staging.Handle(), offset, size); - set_writes.push_back({ - .dstSet = VK_NULL_HANDLE, - .dstBinding = binding++, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = buffer.is_storage ? vk::DescriptorType::eStorageBuffer - : vk::DescriptorType::eUniformBuffer, - .pBufferInfo = &buffer_infos.back(), - }); - // Most of the time when a metadata is updated with a shader it gets cleared. It means we // can skip the whole dispatch and update the tracked state instead. Also, it is not // intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we will @@ -123,12 +113,38 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)"); } } + const u32 size = vsharp.GetSize(); + if (buffer.is_written) { + texture_cache.InvalidateMemory(address, size, true); + } + const u32 alignment = + buffer.is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment(); + const auto [vk_buffer, offset] = + buffer_cache.ObtainBuffer(address, size, buffer.is_written); + const u32 offset_aligned = Common::AlignDown(offset, alignment); + const u32 adjust = offset - offset_aligned; + if (adjust != 0) { + ASSERT(adjust % 4 == 0); + push_data.AddOffset(binding, adjust); + } + buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust); + set_writes.push_back({ + .dstSet = VK_NULL_HANDLE, + .dstBinding = binding++, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = buffer.is_storage ? vk::DescriptorType::eStorageBuffer + : vk::DescriptorType::eUniformBuffer, + .pBufferInfo = &buffer_infos.back(), + }); } for (const auto& image_desc : info.images) { const auto tsharp = info.ReadUd(image_desc.sgpr_base, image_desc.dword_offset); - const auto& image_view = texture_cache.FindTexture(tsharp, image_desc.is_storage); + VideoCore::ImageInfo image_info{tsharp}; + VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; + const auto& image_view = texture_cache.FindTexture(image_info, view_info); const auto& image = texture_cache.GetImage(image_view.image_id); image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout); set_writes.push_back({ @@ -146,7 +162,7 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s } } for (const auto& sampler : info.samplers) { - const auto ssharp = info.ReadUd(sampler.sgpr_base, sampler.dword_offset); + const auto ssharp = sampler.GetSsharp(info); const auto vk_sampler = texture_cache.GetSampler(ssharp); image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); set_writes.push_back({ @@ -164,6 +180,8 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s } const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.pushConstants(*pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(push_data), + &push_data); cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, set_writes); return true; } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 4cdcccfcb..16de5635f 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -6,19 +6,15 @@ #include "shader_recompiler/runtime_info.h" #include "video_core/renderer_vulkan/vk_common.h" -namespace Core { -class MemoryManager; -} - namespace VideoCore { +class BufferCache; class TextureCache; -} +} // namespace VideoCore namespace Vulkan { class Instance; class Scheduler; -class StreamBuffer; class ComputePipeline { public: @@ -31,7 +27,7 @@ public: return *pipeline; } - bool BindResources(Core::MemoryManager* memory, StreamBuffer& staging, + bool BindResources(VideoCore::BufferCache& buffer_cache, VideoCore::TextureCache& texture_cache) const; private: diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index f119bc770..0c516dbac 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -5,13 +5,13 @@ #include #include +#include "common/alignment.h" #include "common/assert.h" -#include "core/memory.h" #include "video_core/amdgpu/resource.h" +#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/texture_cache/texture_cache.h" namespace Vulkan { @@ -32,9 +32,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul BuildDescSetLayout(); const vk::PushConstantRange push_constants = { - .stageFlags = vk::ShaderStageFlagBits::eVertex, + .stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, .offset = 0, - .size = 2 * sizeof(u32), + .size = sizeof(Shader::PushData), }; const vk::DescriptorSetLayout set_layout = *desc_layout; @@ -86,8 +86,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { .topology = LiverpoolToVK::PrimitiveType(key.prim_type), - .primitiveRestartEnable = false, + .primitiveRestartEnable = key.prim_restart_index != 0, }; + ASSERT_MSG(key.prim_restart_index == 0 || key.prim_restart_index == 0xFFFF, + "Primitive restart index other than 0xFFFF is not supported"); const vk::PipelineRasterizationStateCreateInfo raster_state = { .depthClampEnable = false, @@ -145,6 +147,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul dynamic_states.push_back(vk::DynamicState::eColorWriteEnableEXT); dynamic_states.push_back(vk::DynamicState::eColorWriteMaskEXT); } + if (instance.IsVertexInputDynamicState()) { + dynamic_states.push_back(vk::DynamicState::eVertexInputEXT); + } const vk::PipelineDynamicStateCreateInfo dynamic_info = { .dynamicStateCount = static_cast(dynamic_states.size()), @@ -328,25 +333,43 @@ void GraphicsPipeline::BuildDescSetLayout() { desc_layout = instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci); } -void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging, +void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, + VideoCore::BufferCache& buffer_cache, VideoCore::TextureCache& texture_cache) const { - BindVertexBuffers(staging); - // Bind resource buffers and textures. boost::container::static_vector buffer_infos; boost::container::static_vector image_infos; boost::container::small_vector set_writes; + Shader::PushData push_data{}; u32 binding{}; for (const auto& stage : stages) { + if (stage.uses_step_rates) { + push_data.step0 = regs.vgt_instance_step_rate_0; + push_data.step1 = regs.vgt_instance_step_rate_1; + } for (const auto& buffer : stage.buffers) { const auto vsharp = buffer.GetVsharp(stage); - const VAddr address = vsharp.base_address; - const u32 size = vsharp.GetSize(); - const u32 offset = staging.Copy(address, size, - buffer.is_storage ? instance.StorageMinAlignment() - : instance.UniformMinAlignment()); - buffer_infos.emplace_back(staging.Handle(), offset, size); + if (vsharp) { + const VAddr address = vsharp.base_address; + if (texture_cache.IsMeta(address)) { + LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (buffer)"); + } + const u32 size = vsharp.GetSize(); + const u32 alignment = buffer.is_storage ? instance.StorageMinAlignment() + : instance.UniformMinAlignment(); + const auto [vk_buffer, offset] = + buffer_cache.ObtainBuffer(address, size, buffer.is_written); + const u32 offset_aligned = Common::AlignDown(offset, alignment); + const u32 adjust = offset - offset_aligned; + if (adjust != 0) { + ASSERT(adjust % 4 == 0); + push_data.AddOffset(binding, adjust); + } + buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust); + } else { + buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE); + } set_writes.push_back({ .dstSet = VK_NULL_HANDLE, .dstBinding = binding++, @@ -356,17 +379,15 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& : vk::DescriptorType::eUniformBuffer, .pBufferInfo = &buffer_infos.back(), }); - - if (texture_cache.IsMeta(address)) { - LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (buffer)"); - } } boost::container::static_vector tsharps; for (const auto& image_desc : stage.images) { const auto& tsharp = tsharps.emplace_back( stage.ReadUd(image_desc.sgpr_base, image_desc.dword_offset)); - const auto& image_view = texture_cache.FindTexture(tsharp, image_desc.is_storage); + VideoCore::ImageInfo image_info{tsharp}; + VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; + const auto& image_view = texture_cache.FindTexture(image_info, view_info); const auto& image = texture_cache.GetImage(image_view.image_id); image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout); set_writes.push_back({ @@ -384,7 +405,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& } } for (const auto& sampler : stage.samplers) { - auto ssharp = stage.ReadUd(sampler.sgpr_base, sampler.dword_offset); + auto ssharp = sampler.GetSsharp(stage); if (sampler.disable_aniso) { const auto& tsharp = tsharps[sampler.associated_image]; if (tsharp.base_level == 0 && tsharp.last_level == 0) { @@ -404,86 +425,15 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& } } + const auto cmdbuf = scheduler.CommandBuffer(); if (!set_writes.empty()) { - const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, set_writes); } -} - -void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const { - const auto& vs_info = stages[u32(Shader::Stage::Vertex)]; - if (vs_info.vs_inputs.empty()) { - return; - } - - std::array host_buffers; - std::array host_offsets; - boost::container::static_vector guest_buffers; - - struct BufferRange { - VAddr base_address; - VAddr end_address; - u64 offset; // offset in the mapped memory - - size_t GetSize() const { - return end_address - base_address; - } - }; - - // Calculate buffers memory overlaps - boost::container::static_vector ranges{}; - for (const auto& input : vs_info.vs_inputs) { - if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || - input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) { - continue; - } - - const auto& buffer = vs_info.ReadUd(input.sgpr_base, input.dword_offset); - if (buffer.GetSize() == 0) { - continue; - } - guest_buffers.emplace_back(buffer); - ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize()); - } - std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) { - return lhv.base_address < rhv.base_address; - }); - - boost::container::static_vector ranges_merged{ranges[0]}; - for (auto range : ranges) { - auto& prev_range = ranges_merged.back(); - if (prev_range.end_address < range.base_address) { - ranges_merged.emplace_back(range); - } else { - prev_range.end_address = std::max(prev_range.end_address, range.end_address); - } - } - - // Map buffers - for (auto& range : ranges_merged) { - range.offset = staging.Copy(range.base_address, range.GetSize(), 4); - } - - // Bind vertex buffers - const size_t num_buffers = guest_buffers.size(); - for (u32 i = 0; i < num_buffers; ++i) { - const auto& buffer = guest_buffers[i]; - const auto& host_buffer = std::ranges::find_if( - ranges_merged.cbegin(), ranges_merged.cend(), [&](const BufferRange& range) { - return (buffer.base_address >= range.base_address && - buffer.base_address < range.end_address); - }); - assert(host_buffer != ranges_merged.cend()); - - host_buffers[i] = staging.Handle(); - host_offsets[i] = host_buffer->offset + buffer.base_address - host_buffer->base_address; - } - - if (num_buffers > 0) { - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.bindVertexBuffers(0, num_buffers, host_buffers.data(), host_offsets.data()); - } + cmdbuf.pushConstants(*pipeline_layout, + vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, 0U, + sizeof(push_data), &push_data); + cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, Handle()); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index e1564f8fd..fc5070913 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -7,13 +7,10 @@ #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_common.h" -namespace Core { -class MemoryManager; -} - namespace VideoCore { +class BufferCache; class TextureCache; -} +} // namespace VideoCore namespace Vulkan { @@ -22,7 +19,6 @@ static constexpr u32 MaxShaderStages = 5; class Instance; class Scheduler; -class StreamBuffer; using Liverpool = AmdGpu::Liverpool; @@ -43,6 +39,7 @@ struct GraphicsPipelineKey { Liverpool::StencilRefMask stencil_ref_front; Liverpool::StencilRefMask stencil_ref_back; Liverpool::PrimitiveType prim_type; + u32 prim_restart_index; Liverpool::PolygonMode polygon_mode; Liverpool::CullMode cull_mode; Liverpool::FrontFace front_face; @@ -64,7 +61,7 @@ public: std::array modules); ~GraphicsPipeline(); - void BindResources(Core::MemoryManager* memory, StreamBuffer& staging, + void BindResources(const Liverpool::Regs& regs, VideoCore::BufferCache& buffer_cache, VideoCore::TextureCache& texture_cache) const; vk::Pipeline Handle() const noexcept { @@ -75,6 +72,10 @@ public: return *pipeline_layout; } + const Shader::Info& GetStage(Shader::Stage stage) const noexcept { + return stages[u32(stage)]; + } + bool IsEmbeddedVs() const noexcept { static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f; return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash; @@ -90,7 +91,6 @@ public: private: void BuildDescSetLayout(); - void BindVertexBuffers(StreamBuffer& staging) const; private: const Instance& instance; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 09a9180e1..66da030f1 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -8,6 +8,7 @@ #include #include "common/assert.h" +#include "common/config.h" #include "sdl_window.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -163,11 +164,13 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceColorWriteEnableFeaturesEXT, vk::PhysicalDeviceVulkan12Features, vk::PhysicalDeviceVulkan13Features, vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR, - vk::PhysicalDeviceDepthClipControlFeaturesEXT>(); - const vk::StructureChain properties_chain = - physical_device.getProperties2(); + vk::PhysicalDeviceDepthClipControlFeaturesEXT, vk::PhysicalDeviceRobustness2FeaturesEXT, + vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(); + const vk::StructureChain properties_chain = physical_device.getProperties2< + vk::PhysicalDeviceProperties2, vk::PhysicalDevicePortabilitySubsetPropertiesKHR, + vk::PhysicalDeviceExternalMemoryHostPropertiesEXT, vk::PhysicalDeviceVulkan11Properties>(); + subgroup_size = properties_chain.get().subgroupSize; + LOG_INFO(Render_Vulkan, "Physical device subgroup size {}", subgroup_size); features = feature_chain.get().features; if (available_extensions.empty()) { @@ -196,21 +199,36 @@ bool Instance::CreateDevice() { external_memory_host = add_extension(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME); custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); - add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); + const bool depth_clip_control = add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME); workgroup_memory_explicit_layout = add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); + vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); + // The next two extensions are required to be available together in order to support write masks color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME); color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); - const auto calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME); + const bool calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME); + const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); // These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2 // with extensions. tooling_info = add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME); - add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME); + const bool maintenance4 = add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME); add_extension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME); add_extension(VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME); + const bool has_sync2 = add_extension(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME); + + if (has_sync2) { + has_nv_checkpoints = Config::isMarkersEnabled() + ? add_extension(VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME) + : false; + } + +#ifdef __APPLE__ + // Required by Vulkan spec if supported. + add_extension(VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME); +#endif const auto family_properties = physical_device.getQueueFamilyProperties(); if (family_properties.empty()) { @@ -261,6 +279,7 @@ bool Instance::CreateDevice() { .shaderStorageImageExtendedFormats = features.shaderStorageImageExtendedFormats, .shaderStorageImageMultisample = features.shaderStorageImageMultisample, .shaderClipDistance = features.shaderClipDistance, + .shaderInt64 = features.shaderInt64, .shaderInt16 = features.shaderInt16, }, }, @@ -301,18 +320,55 @@ bool Instance::CreateDevice() { .workgroupMemoryExplicitLayoutScalarBlockLayout = true, .workgroupMemoryExplicitLayout8BitAccess = true, .workgroupMemoryExplicitLayout16BitAccess = true, - }}; + }, + vk::PhysicalDeviceRobustness2FeaturesEXT{ + .nullDescriptor = true, + }, + vk::PhysicalDeviceSynchronization2Features{ + .synchronization2 = true, + }, + vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT{ + .vertexInputDynamicState = true, + }, +#ifdef __APPLE__ + feature_chain.get(), +#endif + }; + if (!maintenance4) { + device_chain.unlink(); + } + if (!custom_border_color) { + device_chain.unlink(); + } if (!color_write_en) { device_chain.unlink(); device_chain.unlink(); } + if (!depth_clip_control) { + device_chain.unlink(); + } + if (!workgroup_memory_explicit_layout) { + device_chain.unlink(); + } + if (robustness) { + device_chain.get().nullDescriptor = + feature_chain.get().nullDescriptor; + } else { + device_chain.unlink(); + } + if (!vertex_input_dynamic_state) { + device_chain.unlink(); + } try { device = physical_device.createDeviceUnique(device_chain.get()); } catch (vk::ExtensionNotPresentError& err) { LOG_CRITICAL(Render_Vulkan, "Some required extensions are not available {}", err.what()); return false; + } catch (vk::FeatureNotPresentError& err) { + LOG_CRITICAL(Render_Vulkan, "Some required features are not available {}", err.what()); + return false; } VULKAN_HPP_DEFAULT_DISPATCHER.init(*device); diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 32965ddb1..4cb4741a5 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -88,6 +88,10 @@ public: return profiler_context; } + bool HasNvCheckpoints() const { + return has_nv_checkpoints; + } + /// Returns true when a known debugging tool is attached. bool HasDebuggingToolAttached() const { return has_renderdoc || has_nsight_graphics; @@ -128,6 +132,11 @@ public: return color_write_en; } + /// Returns true when VK_EXT_vertex_input_dynamic_state is supported. + bool IsVertexInputDynamicState() const { + return vertex_input_dynamic_state; + } + /// Returns the vendor ID of the physical device u32 GetVendorID() const { return properties.vendorID; @@ -188,6 +197,11 @@ public: return properties.limits.nonCoherentAtomSize; } + /// Returns the subgroup size of the selected physical device. + u32 SubgroupSize() const { + return subgroup_size; + } + /// Returns the maximum supported elements in a texel buffer u32 MaxTexelBufferElements() const { return properties.limits.maxTexelBufferElements; @@ -248,11 +262,14 @@ private: bool external_memory_host{}; bool workgroup_memory_explicit_layout{}; bool color_write_en{}; + bool vertex_input_dynamic_state{}; u64 min_imported_host_pointer_alignment{}; + u32 subgroup_size{}; bool tooling_info{}; bool debug_utils_supported{}; bool has_nsight_graphics{}; bool has_renderdoc{}; + bool has_nv_checkpoints{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp index 037510d41..753f2bbdf 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include -#include "common/assert.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" @@ -60,46 +58,4 @@ void MasterSemaphore::Wait(u64 tick) { Refresh(); } -void MasterSemaphore::SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, vk::Semaphore signal, - u64 signal_value) { - cmdbuf.end(); - - const u32 num_signal_semaphores = signal ? 2U : 1U; - const std::array signal_values{signal_value, u64(0)}; - const std::array signal_semaphores{Handle(), signal}; - - const u32 num_wait_semaphores = wait ? 2U : 1U; - const std::array wait_values{signal_value - 1, u64(1)}; - const std::array wait_semaphores{Handle(), wait}; - - static constexpr std::array wait_stage_masks = { - vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eColorAttachmentOutput, - }; - - const vk::TimelineSemaphoreSubmitInfo timeline_si = { - .waitSemaphoreValueCount = num_wait_semaphores, - .pWaitSemaphoreValues = wait_values.data(), - .signalSemaphoreValueCount = num_signal_semaphores, - .pSignalSemaphoreValues = signal_values.data(), - }; - - const vk::SubmitInfo submit_info = { - .pNext = &timeline_si, - .waitSemaphoreCount = num_wait_semaphores, - .pWaitSemaphores = wait_semaphores.data(), - .pWaitDstStageMask = wait_stage_masks.data(), - .commandBufferCount = 1u, - .pCommandBuffers = &cmdbuf, - .signalSemaphoreCount = num_signal_semaphores, - .pSignalSemaphores = signal_semaphores.data(), - }; - - try { - instance.GetGraphicsQueue().submit(submit_info); - } catch (vk::DeviceLostError& err) { - UNREACHABLE_MSG("Device lost during submit: {}", err.what()); - } -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index 963676b1a..ebc7a60a1 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -46,10 +46,6 @@ public: /// Waits for a tick to be hit on the GPU void Wait(u64 tick); - /// Submits the provided command buffer for execution - void SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, vk::Semaphore signal, - u64 signal_value); - protected: const Instance& instance; vk::UniqueSemaphore semaphore; ///< Timeline semaphore. diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7f0b74ab6..0a94ce6d8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -109,11 +109,16 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, pipeline_cache = instance.GetDevice().createPipelineCacheUnique({}); profile = Shader::Profile{ .supported_spirv = 0x00010600U, + .subgroup_size = instance.SubgroupSize(), .support_explicit_workgroup_layout = true, }; } const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() { + // Tessellation is unsupported so skip the draw to avoid locking up the driver. + if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::PatchPrimitive) { + return nullptr; + } RefreshGraphicsKey(); const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key); if (is_new) { @@ -160,6 +165,7 @@ void PipelineCache::RefreshGraphicsKey() { key.stencil_ref_front = regs.stencil_ref_front; key.stencil_ref_back = regs.stencil_ref_back; key.prim_type = regs.primitive_type; + key.prim_restart_index = regs.primitive_reset_index; key.polygon_mode = regs.polygon_control.PolyMode(); key.cull_mode = regs.polygon_control.CullingMode(); key.clip_space = regs.clipper_control.clip_space; @@ -191,7 +197,7 @@ void PipelineCache::RefreshGraphicsKey() { LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); const auto is_vo_surface = renderer->IsVideoOutSurface(col_buf); key.color_formats[remapped_cb] = LiverpoolToVK::AdjustColorBufferFormat( - base_format, col_buf.info.comp_swap.Value(), is_vo_surface); + base_format, col_buf.info.comp_swap.Value(), false /*is_vo_surface*/); key.blend_controls[remapped_cb] = regs.blend_control[cb]; key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && !col_buf.info.blend_bypass); @@ -202,12 +208,20 @@ void PipelineCache::RefreshGraphicsKey() { } for (u32 i = 0; i < MaxShaderStages; i++) { + if (!regs.stage_enable.IsStageEnabled(i)) { + key.stage_hashes[i] = 0; + continue; + } auto* pgm = regs.ProgramForStage(i); if (!pgm || !pgm->Address()) { key.stage_hashes[i] = 0; continue; } const auto* bininfo = Liverpool::GetBinaryInfo(*pgm); + if (!bininfo->Valid()) { + key.stage_hashes[i] = 0; + continue; + } key.stage_hashes[i] = bininfo->shader_hash; } } @@ -268,7 +282,8 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs); info.pgm_base = pgm->Address(); info.pgm_hash = hash; - programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info)); + programs[i] = + Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info), profile); // Compile IR to SPIR-V auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i], binding); @@ -308,7 +323,8 @@ std::unique_ptr PipelineCache::CreateComputePipeline() { Shader::Info info = MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs); info.pgm_base = cs_pgm.Address(); - auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info)); + auto program = + Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info), profile); // Compile IR to SPIR-V u32 binding{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index a77b298b1..d41723ec8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -5,7 +5,6 @@ #include #include "shader_recompiler/ir/basic_block.h" -#include "shader_recompiler/object_pool.h" #include "shader_recompiler/profile.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" @@ -51,8 +50,8 @@ private: Shader::Profile profile{}; GraphicsPipelineKey graphics_key{}; u64 compute_key{}; - Shader::ObjectPool inst_pool; - Shader::ObjectPool block_pool; + Common::ObjectPool inst_pool; + Common::ObjectPool block_pool; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index 1499d8771..c73a8139d 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -32,6 +32,7 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL DebugUtilsCallback( switch (static_cast(callback_data->messageIdNumber)) { case 0x609a13b: // Vertex attribute at location not consumed by shader case 0xc81ad50e: + case 0x92d66fc1: // `pMultisampleState is NULL` for depth only passes (confirmed VL error) return VK_FALSE; default: break; @@ -156,6 +157,10 @@ std::vector GetInstanceExtensions(Frontend::WindowSystemType window break; } +#ifdef __APPLE__ + extensions.push_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME); +#endif + if (window_type != Frontend::WindowSystemType::Headless) { extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); } @@ -220,12 +225,61 @@ vk::UniqueInstance CreateInstance(vk::DynamicLoader& dl, Frontend::WindowSystemT vk::Bool32 enable_sync = enable_validation && Config::vkValidationSyncEnabled() ? vk::True : vk::False; - vk::LayerSettingEXT layer_set = { - .pLayerName = VALIDATION_LAYER_NAME, - .pSettingName = "validate_sync", - .type = vk::LayerSettingTypeEXT::eBool32, - .valueCount = 1, - .pValues = &enable_sync, + vk::Bool32 enable_gpuav = + enable_validation && Config::vkValidationSyncEnabled() ? vk::True : vk::False; + const char* gpuav_mode = enable_validation && Config::vkValidationGpuEnabled() + ? "GPU_BASED_GPU_ASSISTED" + : "GPU_BASED_NONE"; + const std::array layer_setings = { + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "validate_sync", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_sync, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "sync_queue_submit", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_sync, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "validate_gpu_based", + .type = vk::LayerSettingTypeEXT::eString, + .valueCount = 1, + .pValues = &gpuav_mode, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "gpuav_reserve_binding_slot", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_gpuav, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "gpuav_descriptor_checks", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_gpuav, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "gpuav_validate_indirect_buffer", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_gpuav, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "gpuav_buffer_copies", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_gpuav, + }, }; vk::StructureChain instance_ci_chain = { @@ -235,10 +289,13 @@ vk::UniqueInstance CreateInstance(vk::DynamicLoader& dl, Frontend::WindowSystemT .ppEnabledLayerNames = layers.data(), .enabledExtensionCount = static_cast(extensions.size()), .ppEnabledExtensionNames = extensions.data(), +#ifdef __APPLE__ + .flags = vk::InstanceCreateFlagBits::eEnumeratePortabilityKHR, +#endif }, vk::LayerSettingsCreateInfoEXT{ - .settingCount = 1, - .pSettings = &layer_set, + .settingCount = layer_setings.size(), + .pSettings = layer_setings.data(), }, }; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index fe52d0741..542624a0e 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -13,22 +13,17 @@ namespace Vulkan { -static constexpr vk::BufferUsageFlags VertexIndexFlags = - vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer | - vk::BufferUsageFlagBits::eTransferDst | vk::BufferUsageFlagBits::eUniformBuffer | - vk::BufferUsageFlagBits::eStorageBuffer; - Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_, - VideoCore::TextureCache& texture_cache_, AmdGpu::Liverpool* liverpool_) - : instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_}, - liverpool{liverpool_}, memory{Core::Memory::Instance()}, - pipeline_cache{instance, scheduler, liverpool}, - vertex_index_buffer{instance, scheduler, VertexIndexFlags, 1_GB, BufferType::Upload} { + AmdGpu::Liverpool* liverpool_) + : instance{instance_}, scheduler{scheduler_}, page_manager{this}, + buffer_cache{instance, scheduler, liverpool_, page_manager}, + texture_cache{instance, scheduler, buffer_cache, page_manager}, liverpool{liverpool_}, + memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool} { if (!Config::nullGpu()) { liverpool->BindRasterizer(this); } - - memory->SetInstance(&instance); + memory->SetRasterizer(this); + wfi_event = instance.GetDevice().createEventUnique({}); } Rasterizer::~Rasterizer() = default; @@ -38,29 +33,24 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { const auto cmdbuf = scheduler.CommandBuffer(); const auto& regs = liverpool->regs; - const u32 num_indices = SetupIndexBuffer(is_indexed, index_offset); const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline(); if (!pipeline) { return; } try { - pipeline->BindResources(memory, vertex_index_buffer, texture_cache); + pipeline->BindResources(regs, buffer_cache, texture_cache); } catch (...) { UNREACHABLE(); } + const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); + buffer_cache.BindVertexBuffers(vs_info); + const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset); + BeginRendering(); UpdateDynamicState(*pipeline); - cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); - - const u32 step_rates[] = { - regs.vgt_instance_step_rate_0, - regs.vgt_instance_step_rate_1, - }; - cmdbuf.pushConstants(pipeline->GetLayout(), vk::ShaderStageFlagBits::eVertex, 0u, - sizeof(step_rates), &step_rates); if (is_indexed) { cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0); } else { @@ -82,8 +72,7 @@ void Rasterizer::DispatchDirect() { } try { - const auto has_resources = - pipeline->BindResources(memory, vertex_index_buffer, texture_cache); + const auto has_resources = pipeline->BindResources(buffer_cache, texture_cache); if (!has_resources) { return; } @@ -96,6 +85,13 @@ void Rasterizer::DispatchDirect() { cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z); } +u64 Rasterizer::Flush() { + const u64 current_tick = scheduler.CurrentTick(); + SubmitInfo info{}; + scheduler.Flush(info); + return current_tick; +} + void Rasterizer::BeginRendering() { const auto& regs = liverpool->regs; RenderState state; @@ -113,15 +109,18 @@ void Rasterizer::BeginRendering() { } const auto& hint = liverpool->last_cb_extent[col_buf_id]; - const auto& image_view = texture_cache.FindRenderTarget(col_buf, hint); + VideoCore::ImageInfo image_info{col_buf, hint}; + VideoCore::ImageViewInfo view_info{col_buf, false /*!!image.info.usage.vo_buffer*/}; + const auto& image_view = texture_cache.FindRenderTarget(image_info, view_info); const auto& image = texture_cache.GetImage(image_view.image_id); state.width = std::min(state.width, image.info.size.width); state.height = std::min(state.height, image.info.size.height); const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress()); + state.color_images[state.num_color_attachments] = image.image; state.color_attachments[state.num_color_attachments++] = { .imageView = *image_view.image_view, - .imageLayout = vk::ImageLayout::eGeneral, + .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, .loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, .storeOp = vk::AttachmentStoreOp::eStore, .clearValue = @@ -136,12 +135,14 @@ void Rasterizer::BeginRendering() { const bool is_clear = regs.depth_render_control.depth_clear_enable || texture_cache.IsMetaCleared(htile_address); const auto& hint = liverpool->last_db_extent; - const auto& image_view = texture_cache.FindDepthTarget( - regs.depth_buffer, regs.depth_view.NumSlices(), htile_address, hint, - regs.depth_control.depth_write_enable); + VideoCore::ImageInfo image_info{regs.depth_buffer, regs.depth_view.NumSlices(), + htile_address, hint}; + VideoCore::ImageViewInfo view_info{regs.depth_buffer, regs.depth_view, regs.depth_control}; + const auto& image_view = texture_cache.FindDepthTarget(image_info, view_info); const auto& image = texture_cache.GetImage(image_view.image_id); state.width = std::min(state.width, image.info.size.width); state.height = std::min(state.height, image.info.size.height); + state.depth_image = image.image; state.depth_attachment = { .imageView = *image_view.image_view, .imageLayout = image.layout, @@ -151,50 +152,25 @@ void Rasterizer::BeginRendering() { .stencil = regs.stencil_clear}}, }; texture_cache.TouchMeta(htile_address, false); - state.num_depth_attachments++; + state.has_depth = true; + state.has_stencil = image.info.usage.stencil; } scheduler.BeginRendering(state); } -u32 Rasterizer::SetupIndexBuffer(bool& is_indexed, u32 index_offset) { - // Emulate QuadList primitive type with CPU made index buffer. - const auto& regs = liverpool->regs; - if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::QuadList) { - // ASSERT_MSG(!is_indexed, "Using QuadList primitive with indexed draw"); - is_indexed = true; +void Rasterizer::InvalidateMemory(VAddr addr, u64 size) { + buffer_cache.InvalidateMemory(addr, size); + texture_cache.InvalidateMemory(addr, size); +} - // Emit indices. - const u32 index_size = 3 * regs.num_indices; - const auto [data, offset, _] = vertex_index_buffer.Map(index_size); - LiverpoolToVK::EmitQuadToTriangleListIndices(data, regs.num_indices); - vertex_index_buffer.Commit(index_size); +void Rasterizer::MapMemory(VAddr addr, u64 size) { + page_manager.OnGpuMap(addr, size); +} - // Bind index buffer. - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), offset, vk::IndexType::eUint16); - return index_size / sizeof(u16); - } - if (!is_indexed) { - return regs.num_indices; - } - - // Figure out index type and size. - const bool is_index16 = regs.index_buffer_type.index_type == Liverpool::IndexType::Index16; - const vk::IndexType index_type = is_index16 ? vk::IndexType::eUint16 : vk::IndexType::eUint32; - const u32 index_size = is_index16 ? sizeof(u16) : sizeof(u32); - - // Upload index data to stream buffer. - const auto index_address = regs.index_base_address.Address(); - const u32 index_buffer_size = (index_offset + regs.num_indices) * index_size; - const auto [data, offset, _] = vertex_index_buffer.Map(index_buffer_size); - std::memcpy(data, index_address, index_buffer_size); - vertex_index_buffer.Commit(index_buffer_size); - - // Bind index buffer. - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), offset + index_offset * index_size, - index_type); - return regs.num_indices; +void Rasterizer::UnmapMemory(VAddr addr, u64 size) { + buffer_cache.InvalidateMemory(addr, size); + texture_cache.UnmapMemory(addr, size); + page_manager.OnGpuUnmap(addr, size); } void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) { @@ -255,16 +231,42 @@ void Rasterizer::UpdateDepthStencilState() { cmdbuf.setDepthBoundsTestEnable(depth.depth_bounds_enable); } -void Rasterizer::ScopeMarkerBegin(const std::string& str) { +void Rasterizer::ScopeMarkerBegin(const std::string_view& str) { + if (!Config::isMarkersEnabled()) { + return; + } + const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.beginDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{ - .pLabelName = str.c_str(), + .pLabelName = str.data(), }); } void Rasterizer::ScopeMarkerEnd() { + if (!Config::isMarkersEnabled()) { + return; + } + const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.endDebugUtilsLabelEXT(); } +void Rasterizer::ScopedMarkerInsert(const std::string_view& str) { + if (!Config::isMarkersEnabled()) { + return; + } + + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.insertDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{ + .pLabelName = str.data(), + }); +} + +void Rasterizer::Breadcrumb(u64 id) { + if (!instance.HasNvCheckpoints()) { + return; + } + scheduler.CommandBuffer().setCheckpointNV(id); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index aead5955d..a151ebc27 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -3,8 +3,10 @@ #pragma once +#include "video_core/buffer_cache/buffer_cache.h" +#include "video_core/page_manager.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" -#include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/texture_cache/texture_cache.h" namespace AmdGpu { struct Liverpool; @@ -14,10 +16,6 @@ namespace Core { class MemoryManager; } -namespace VideoCore { -class TextureCache; -} - namespace Vulkan { class Scheduler; @@ -26,20 +24,29 @@ class GraphicsPipeline; class Rasterizer { public: explicit Rasterizer(const Instance& instance, Scheduler& scheduler, - VideoCore::TextureCache& texture_cache, AmdGpu::Liverpool* liverpool); + AmdGpu::Liverpool* liverpool); ~Rasterizer(); + [[nodiscard]] VideoCore::TextureCache& GetTextureCache() noexcept { + return texture_cache; + } + void Draw(bool is_indexed, u32 index_offset = 0); void DispatchDirect(); - void ScopeMarkerBegin(const std::string& str); + void ScopeMarkerBegin(const std::string_view& str); void ScopeMarkerEnd(); + void ScopedMarkerInsert(const std::string_view& str); + void Breadcrumb(u64 id); + + void InvalidateMemory(VAddr addr, u64 size); + void MapMemory(VAddr addr, u64 size); + void UnmapMemory(VAddr addr, u64 size); + + u64 Flush(); private: - u32 SetupIndexBuffer(bool& is_indexed, u32 index_offset); - void MapMemory(VAddr addr, size_t size); - void BeginRendering(); void UpdateDynamicState(const GraphicsPipeline& pipeline); @@ -49,11 +56,13 @@ private: private: const Instance& instance; Scheduler& scheduler; - VideoCore::TextureCache& texture_cache; + VideoCore::PageManager page_manager; + VideoCore::BufferCache buffer_cache; + VideoCore::TextureCache texture_cache; AmdGpu::Liverpool* liverpool; Core::MemoryManager* memory; PipelineCache pipeline_cache; - StreamBuffer vertex_index_buffer; + vk::UniqueEvent wfi_event; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 39dc2847d..a6c2536ba 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -2,12 +2,15 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include "common/assert.h" #include "common/debug.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" namespace Vulkan { +std::mutex Scheduler::submit_mutex; + Scheduler::Scheduler(const Instance& instance) : instance{instance}, master_semaphore{instance}, command_pool{instance, &master_semaphore} { profiler_scope = reinterpret_cast(std::malloc(sizeof(tracy::VkCtxScope))); @@ -35,8 +38,7 @@ void Scheduler::BeginRendering(const RenderState& new_state) { .layerCount = 1, .colorAttachmentCount = render_state.num_color_attachments, .pColorAttachments = render_state.color_attachments.data(), - .pDepthAttachment = - render_state.num_depth_attachments ? &render_state.depth_attachment : nullptr, + .pDepthAttachment = render_state.has_depth ? &render_state.depth_attachment : nullptr, }; current_cmdbuf.beginRendering(rendering_info); @@ -48,24 +50,78 @@ void Scheduler::EndRendering() { } is_rendering = false; current_cmdbuf.endRendering(); + + boost::container::static_vector barriers; + for (size_t i = 0; i < render_state.num_color_attachments; ++i) { + barriers.push_back(vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite, + .oldLayout = vk::ImageLayout::eColorAttachmentOptimal, + .newLayout = vk::ImageLayout::eColorAttachmentOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = render_state.color_images[i], + .subresourceRange = + { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }); + } + if (render_state.has_depth) { + barriers.push_back(vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite, + .oldLayout = render_state.depth_attachment.imageLayout, + .newLayout = render_state.depth_attachment.imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = render_state.depth_image, + .subresourceRange = + { + .aspectMask = vk::ImageAspectFlagBits::eDepth | + (render_state.has_stencil ? vk::ImageAspectFlagBits::eStencil + : vk::ImageAspectFlagBits::eNone), + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }); + } + + if (!barriers.empty()) { + const auto src_stages = + vk::PipelineStageFlagBits::eColorAttachmentOutput | + (render_state.has_depth ? vk::PipelineStageFlagBits::eLateFragmentTests | + vk::PipelineStageFlagBits::eEarlyFragmentTests + : vk::PipelineStageFlagBits::eNone); + current_cmdbuf.pipelineBarrier(src_stages, vk::PipelineStageFlagBits::eFragmentShader, + vk::DependencyFlagBits::eByRegion, {}, {}, barriers); + } } -void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) { - // When flushing, we only send data to the worker thread; no waiting is necessary. - SubmitExecution(signal, wait); +void Scheduler::Flush(SubmitInfo& info) { + // When flushing, we only send data to the driver; no waiting is necessary. + SubmitExecution(info); } -void Scheduler::Finish(vk::Semaphore signal, vk::Semaphore wait) { +void Scheduler::Finish() { // When finishing, we need to wait for the submission to have executed on the device. const u64 presubmit_tick = CurrentTick(); - SubmitExecution(signal, wait); + SubmitInfo info{}; + SubmitExecution(info); Wait(presubmit_tick); } void Scheduler::Wait(u64 tick) { if (tick >= master_semaphore.CurrentTick()) { // Make sure we are not waiting for the current tick without signalling - Flush(); + SubmitInfo info{}; + Flush(info); } master_semaphore.Wait(tick); } @@ -86,7 +142,7 @@ void Scheduler::AllocateWorkerCommandBuffers() { } } -void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) { +void Scheduler::SubmitExecution(SubmitInfo& info) { std::scoped_lock lk{submit_mutex}; const u64 signal_value = master_semaphore.NextTick(); @@ -97,7 +153,47 @@ void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wa } EndRendering(); - master_semaphore.SubmitWork(current_cmdbuf, wait_semaphore, signal_semaphore, signal_value); + current_cmdbuf.end(); + + const vk::Semaphore timeline = master_semaphore.Handle(); + info.AddSignal(timeline, signal_value); + + static constexpr std::array wait_stage_masks = { + vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + }; + + const vk::TimelineSemaphoreSubmitInfo timeline_si = { + .waitSemaphoreValueCount = static_cast(info.wait_ticks.size()), + .pWaitSemaphoreValues = info.wait_ticks.data(), + .signalSemaphoreValueCount = static_cast(info.signal_ticks.size()), + .pSignalSemaphoreValues = info.signal_ticks.data(), + }; + + const vk::SubmitInfo submit_info = { + .pNext = &timeline_si, + .waitSemaphoreCount = static_cast(info.wait_semas.size()), + .pWaitSemaphores = info.wait_semas.data(), + .pWaitDstStageMask = wait_stage_masks.data(), + .commandBufferCount = 1U, + .pCommandBuffers = ¤t_cmdbuf, + .signalSemaphoreCount = static_cast(info.signal_semas.size()), + .pSignalSemaphores = info.signal_semas.data(), + }; + + try { + instance.GetGraphicsQueue().submit(submit_info, info.fence); + } catch (vk::DeviceLostError& err) { + if (instance.HasNvCheckpoints()) { + const auto checkpoint_data = instance.GetGraphicsQueue().getCheckpointData2NV(); + for (const auto& cp : checkpoint_data) { + LOG_CRITICAL(Render_Vulkan, "{}: {:#x}", vk::to_string(cp.stage), + reinterpret_cast(cp.pCheckpointMarker)); + } + } + UNREACHABLE_MSG("Device lost during submit: {}", err.what()); + } + master_semaphore.Refresh(); AllocateWorkerCommandBuffers(); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index b45042743..1140bfbc2 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -6,6 +6,7 @@ #include #include #include "common/types.h" +#include "common/unique_function.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" @@ -15,9 +16,12 @@ class Instance; struct RenderState { std::array color_attachments{}; + std::array color_images{}; vk::RenderingAttachmentInfo depth_attachment{}; + vk::Image depth_image{}; u32 num_color_attachments{}; - u32 num_depth_attachments{}; + bool has_depth{}; + bool has_stencil{}; u32 width = std::numeric_limits::max(); u32 height = std::numeric_limits::max(); @@ -26,16 +30,39 @@ struct RenderState { } }; +struct SubmitInfo { + boost::container::static_vector wait_semas; + boost::container::static_vector wait_ticks; + boost::container::static_vector signal_semas; + boost::container::static_vector signal_ticks; + vk::Fence fence; + + void AddWait(vk::Semaphore semaphore, u64 tick = 1) { + wait_semas.emplace_back(semaphore); + wait_ticks.emplace_back(tick); + } + + void AddSignal(vk::Semaphore semaphore, u64 tick = 1) { + signal_semas.emplace_back(semaphore); + signal_ticks.emplace_back(tick); + } + + void AddSignal(vk::Fence fence) { + this->fence = fence; + } +}; + class Scheduler { public: explicit Scheduler(const Instance& instance); ~Scheduler(); - /// Sends the current execution context to the GPU. - void Flush(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr); + /// Sends the current execution context to the GPU + /// and increments the scheduler timeline semaphore. + void Flush(SubmitInfo& info); /// Sends the current execution context to the GPU and waits for it to complete. - void Finish(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr); + void Finish(); /// Waits for the given tick to trigger on the GPU. void Wait(u64 tick); @@ -72,16 +99,16 @@ public: } /// Defers an operation until the gpu has reached the current cpu tick. - void DeferOperation(auto&& func) { - pending_ops.emplace(func, CurrentTick()); + void DeferOperation(Common::UniqueFunction&& func) { + pending_ops.emplace(std::move(func), CurrentTick()); } - std::mutex submit_mutex; + static std::mutex submit_mutex; private: void AllocateWorkerCommandBuffers(); - void SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore); + void SubmitExecution(SubmitInfo& info); private: const Instance& instance; @@ -90,7 +117,7 @@ private: vk::CommandBuffer current_cmdbuf; std::condition_variable_any event_cv; struct PendingOp { - std::function callback; + Common::UniqueFunction callback; u64 gpu_tick; }; std::queue pending_ops; diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp deleted file mode 100644 index 116f7896d..000000000 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ /dev/null @@ -1,241 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include -#include "common/alignment.h" -#include "common/assert.h" -#include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_stream_buffer.h" - -namespace Vulkan { - -namespace { - -std::string_view BufferTypeName(BufferType type) { - switch (type) { - case BufferType::Upload: - return "Upload"; - case BufferType::Download: - return "Download"; - case BufferType::Stream: - return "Stream"; - default: - return "Invalid"; - } -} - -vk::MemoryPropertyFlags MakePropertyFlags(BufferType type) { - switch (type) { - case BufferType::Upload: - return vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent; - case BufferType::Download: - return vk::MemoryPropertyFlagBits::eHostVisible | - vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached; - case BufferType::Stream: - return vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible | - vk::MemoryPropertyFlagBits::eHostCoherent; - default: - UNREACHABLE_MSG("Unknown buffer type {}", static_cast(type)); - return vk::MemoryPropertyFlagBits::eHostVisible; - } -} - -static std::optional FindMemoryType(const vk::PhysicalDeviceMemoryProperties& properties, - vk::MemoryPropertyFlags wanted) { - for (u32 i = 0; i < properties.memoryTypeCount; ++i) { - const auto flags = properties.memoryTypes[i].propertyFlags; - if ((flags & wanted) == wanted) { - return i; - } - } - return std::nullopt; -} - -/// Get the preferred host visible memory type. -u32 GetMemoryType(const vk::PhysicalDeviceMemoryProperties& properties, BufferType type) { - vk::MemoryPropertyFlags flags = MakePropertyFlags(type); - std::optional preferred_type = FindMemoryType(properties, flags); - - constexpr std::array remove_flags = { - vk::MemoryPropertyFlagBits::eHostCached, - vk::MemoryPropertyFlagBits::eHostCoherent, - }; - - for (u32 i = 0; i < remove_flags.size() && !preferred_type; i++) { - flags &= ~remove_flags[i]; - preferred_type = FindMemoryType(properties, flags); - } - ASSERT_MSG(preferred_type, "No suitable memory type found"); - return preferred_type.value(); -} - -constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; -constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; - -} // Anonymous namespace - -StreamBuffer::StreamBuffer(const Instance& instance_, Scheduler& scheduler_, - vk::BufferUsageFlags usage_, u64 size, BufferType type_) - : instance{instance_}, scheduler{scheduler_}, device{instance.GetDevice()}, - stream_buffer_size{size}, usage{usage_}, type{type_} { - CreateBuffers(size); - ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); - ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); -} - -StreamBuffer::~StreamBuffer() { - device.unmapMemory(memory); - device.destroyBuffer(buffer); - device.freeMemory(memory); -} - -std::tuple StreamBuffer::Map(u64 size, u64 alignment) { - if (!is_coherent && type == BufferType::Stream) { - size = Common::AlignUp(size, instance.NonCoherentAtomSize()); - } - - ASSERT(size <= stream_buffer_size); - mapped_size = size; - - if (alignment > 0) { - offset = Common::AlignUp(offset, alignment); - } - - bool invalidate{false}; - if (offset + size > stream_buffer_size) { - // The buffer would overflow, save the amount of used watches and reset the state. - invalidate = true; - invalidation_mark = current_watch_cursor; - current_watch_cursor = 0; - offset = 0; - - // Swap watches and reset waiting cursors. - std::swap(previous_watches, current_watches); - wait_cursor = 0; - wait_bound = 0; - } - - const u64 mapped_upper_bound = offset + size; - WaitPendingOperations(mapped_upper_bound); - - return std::make_tuple(mapped + offset, offset, invalidate); -} - -void StreamBuffer::Commit(u64 size) { - if (!is_coherent && type == BufferType::Stream) { - size = Common::AlignUp(size, instance.NonCoherentAtomSize()); - } - - ASSERT_MSG(size <= mapped_size, "Reserved size {} is too small compared to {}", mapped_size, - size); - - const vk::MappedMemoryRange range = { - .memory = memory, - .offset = offset, - .size = size, - }; - - if (!is_coherent && type == BufferType::Download) { - device.invalidateMappedMemoryRanges(range); - } else if (!is_coherent) { - device.flushMappedMemoryRanges(range); - } - - offset += size; - - if (current_watch_cursor + 1 >= current_watches.size()) { - // Ensure that there are enough watches. - ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK); - } - auto& watch = current_watches[current_watch_cursor++]; - watch.upper_bound = offset; - watch.tick = scheduler.CurrentTick(); -} - -void StreamBuffer::CreateBuffers(u64 prefered_size) { - const vk::Device device = instance.GetDevice(); - const auto memory_properties = instance.GetPhysicalDevice().getMemoryProperties(); - const u32 preferred_type = GetMemoryType(memory_properties, type); - const vk::MemoryType mem_type = memory_properties.memoryTypes[preferred_type]; - const u32 preferred_heap = mem_type.heapIndex; - is_coherent = - static_cast(mem_type.propertyFlags & vk::MemoryPropertyFlagBits::eHostCoherent); - - // Substract from the preferred heap size some bytes to avoid getting out of memory. - const vk::DeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size; - // As per DXVK's example, using `heap_size / 2` - const vk::DeviceSize allocable_size = heap_size / 2; - buffer = device.createBuffer({ - .size = std::min(prefered_size, allocable_size), - .usage = usage, - }); - - const auto requirements_chain = - device - .getBufferMemoryRequirements2( - {.buffer = buffer}); - - const auto& requirements = requirements_chain.get(); - const auto& dedicated_requirements = requirements_chain.get(); - - stream_buffer_size = static_cast(requirements.memoryRequirements.size); - - LOG_INFO(Render_Vulkan, "Creating {} buffer with size {} KiB with flags {}", - BufferTypeName(type), stream_buffer_size / 1024, - vk::to_string(mem_type.propertyFlags)); - - if (dedicated_requirements.prefersDedicatedAllocation) { - vk::StructureChain alloc_chain = - {}; - - auto& alloc_info = alloc_chain.get(); - alloc_info.allocationSize = requirements.memoryRequirements.size; - alloc_info.memoryTypeIndex = preferred_type; - - auto& dedicated_alloc_info = alloc_chain.get(); - dedicated_alloc_info.buffer = buffer; - - memory = device.allocateMemory(alloc_chain.get()); - } else { - memory = device.allocateMemory({ - .allocationSize = requirements.memoryRequirements.size, - .memoryTypeIndex = preferred_type, - }); - } - - device.bindBufferMemory(buffer, memory, 0); - mapped = reinterpret_cast(device.mapMemory(memory, 0, VK_WHOLE_SIZE)); - - if (instance.HasDebuggingToolAttached()) { - SetObjectName(device, buffer, "StreamBuffer({}): {} KiB {}", BufferTypeName(type), - stream_buffer_size / 1024, vk::to_string(mem_type.propertyFlags)); - SetObjectName(device, memory, "StreamBufferMemory({}): {} Kib {}", BufferTypeName(type), - stream_buffer_size / 1024, vk::to_string(mem_type.propertyFlags)); - } -} - -void StreamBuffer::ReserveWatches(std::vector& watches, std::size_t grow_size) { - watches.resize(watches.size() + grow_size); -} - -void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) { - if (!invalidation_mark) { - return; - } - while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) { - auto& watch = previous_watches[wait_cursor]; - wait_bound = watch.upper_bound; - scheduler.Wait(watch.tick); - ++wait_cursor; - } -} - -u64 StreamBuffer::Copy(VAddr src, size_t size, size_t alignment /*= 0*/) { - const auto [data, offset, _] = Map(size, alignment); - std::memcpy(data, reinterpret_cast(src), size); - Commit(size); - return offset; -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h deleted file mode 100644 index f7957ac08..000000000 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ /dev/null @@ -1,89 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include -#include -#include -#include "common/types.h" -#include "video_core/renderer_vulkan/vk_common.h" - -namespace Vulkan { - -enum class BufferType : u32 { - Upload = 0, - Download = 1, - Stream = 2, -}; - -class Instance; -class Scheduler; - -class StreamBuffer final { - static constexpr std::size_t MAX_BUFFER_VIEWS = 3; - -public: - explicit StreamBuffer(const Instance& instance, Scheduler& scheduler, - vk::BufferUsageFlags usage, u64 size, - BufferType type = BufferType::Stream); - ~StreamBuffer(); - - /** - * Reserves a region of memory from the stream buffer. - * @param size Size to reserve. - * @returns A pair of a raw memory pointer (with offset added), and the buffer offset - */ - std::tuple Map(u64 size, u64 alignment = 0); - - /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. - void Commit(u64 size); - - /// Maps and commits a memory region with user provided data - u64 Copy(VAddr src, size_t size, size_t alignment = 0); - - vk::Buffer Handle() const noexcept { - return buffer; - } - -private: - struct Watch { - u64 tick{}; - u64 upper_bound{}; - }; - - /// Creates Vulkan buffer handles committing the required the required memory. - void CreateBuffers(u64 prefered_size); - - /// Increases the amount of watches available. - void ReserveWatches(std::vector& watches, std::size_t grow_size); - - void WaitPendingOperations(u64 requested_upper_bound); - -private: - const Instance& instance; ///< Vulkan instance. - Scheduler& scheduler; ///< Command scheduler. - - vk::Device device; - vk::Buffer buffer; ///< Mapped buffer. - vk::DeviceMemory memory; ///< Memory allocation. - u8* mapped{}; ///< Pointer to the mapped memory - u64 stream_buffer_size{}; ///< Stream buffer size. - vk::BufferUsageFlags usage{}; - BufferType type; - - u64 offset{}; ///< Buffer iterator. - u64 mapped_size{}; ///< Size reserved for the current copy. - bool is_coherent{}; ///< True if the buffer is coherent - - std::vector current_watches; ///< Watches recorded in the current iteration. - std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation. - std::optional invalidation_mark; ///< Number of watches used in the previous cycle. - - std::vector previous_watches; ///< Watches used in the previous iteration. - std::size_t wait_cursor{}; ///< Last watch being waited for completion. - u64 wait_bound{}; ///< Highest offset being watched for completion. -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 7fffdeb2c..dcc19bf3b 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -37,6 +37,16 @@ void Swapchain::Create(u32 width_, u32 height_, vk::SurfaceKHR surface_) { instance.GetPresentQueueFamilyIndex(), }; + const auto modes = instance.GetPhysicalDevice().getSurfacePresentModesKHR(surface); + const auto find_mode = [&modes](vk::PresentModeKHR requested) { + const auto it = + std::find_if(modes.begin(), modes.end(), + [&requested](vk::PresentModeKHR mode) { return mode == requested; }); + + return it != modes.end(); + }; + const bool has_mailbox = find_mode(vk::PresentModeKHR::eMailbox); + const bool exclusive = queue_family_indices[0] == queue_family_indices[1]; const u32 queue_family_indices_count = exclusive ? 1u : 2u; const vk::SharingMode sharing_mode = @@ -55,7 +65,7 @@ void Swapchain::Create(u32 width_, u32 height_, vk::SurfaceKHR surface_) { .pQueueFamilyIndices = queue_family_indices.data(), .preTransform = transform, .compositeAlpha = composite_alpha, - .presentMode = vk::PresentModeKHR::eFifo, + .presentMode = has_mailbox ? vk::PresentModeKHR::eMailbox : vk::PresentModeKHR::eImmediate, .clipped = true, .oldSwapchain = nullptr, }; @@ -83,6 +93,7 @@ bool Swapchain::AcquireNextImage() { case vk::Result::eSuboptimalKHR: case vk::Result::eErrorSurfaceLostKHR: case vk::Result::eErrorOutOfDateKHR: + case vk::Result::eErrorUnknown: needs_recreation = true; break; default: diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index b4b3f48a7..bae4b89d4 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/assert.h" +#include "common/config.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -116,19 +117,17 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, : instance{&instance_}, scheduler{&scheduler_}, info{info_}, image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{info.guest_address}, cpu_addr_end{cpu_addr + info.guest_size_bytes} { + mip_hashes.resize(info.resources.levels); ASSERT(info.pixel_format != vk::Format::eUndefined); + // Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case + // the texture cache should re-create the resource with the usage requested vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat | vk::ImageCreateFlagBits::eExtendedUsage}; - if (info.type == vk::ImageType::e2D && info.resources.layers >= 6 && - info.size.width == info.size.height) { + if (info.props.is_cube) { flags |= vk::ImageCreateFlagBits::eCubeCompatible; - } - if (info.type == vk::ImageType::e3D) { + } else if (info.props.is_volume) { flags |= vk::ImageCreateFlagBits::e2DArrayCompatible; } - if (info.IsBlockCoded()) { - flags |= vk::ImageCreateFlagBits::eBlockTexelViewCompatible; - } usage = ImageUsageFlags(info); @@ -158,14 +157,8 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, image.Create(image_ci); - // Create a special view for detiler - if (info.is_tiled) { - ImageViewInfo view_info; - view_info.format = DemoteImageFormatForDetiling(info.pixel_format); - view_for_detiler.emplace(*instance, view_info, *this, ImageId{}); - } - - Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eNone); + Vulkan::SetObjectName(instance->GetDevice(), (vk::Image)image, "Image {:#x}:{:#x}", + info.guest_address, info.guest_size_bytes); } void Image::Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 97ceaa098..5a888346f 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -105,13 +105,13 @@ struct Image { VAddr cpu_addr_end = 0; std::vector image_view_infos; std::vector image_view_ids; - std::optional view_for_detiler; // Resource state tracking vk::ImageUsageFlags usage; vk::Flags pl_stage = vk::PipelineStageFlagBits::eAllCommands; vk::Flags access_mask = vk::AccessFlagBits::eNone; vk::ImageLayout layout = vk::ImageLayout::eUndefined; + boost::container::small_vector mip_hashes; }; } // namespace VideoCore diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 41ad09381..17b78a6d5 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -47,33 +47,33 @@ static vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept { // clang-format off // The table of macro tiles parameters for given tiling index (row) and bpp (column) static constexpr std::array macro_tile_extents{ - std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00 - std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01 - std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02 - std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03 - std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 04 - std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05 - std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06 - std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 07 - std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08 - std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09 - std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0A - std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 0B - std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 0C - std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D - std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0E - std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0F - std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 10 - std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 11 - std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 12 - std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13 - std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14 - std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15 - std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16 - std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17 - std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18 - std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19 - std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A + std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00 + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01 + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02 + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03 + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 04 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05 + std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06 + std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 07 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09 + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0A + std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0B + std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0C + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0E + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0F + std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 10 + std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 11 + std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 12 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13 + std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14 + std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{128u, 64u}, // 18 + std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19 + std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A }; // clang-format on @@ -82,62 +82,65 @@ static constexpr auto hw_pipe_interleave = 256u; static constexpr std::pair GetMacroTileExtents(u32 tiling_idx, u32 bpp, u32 num_samples) { ASSERT(num_samples == 1); - const auto row = tiling_idx * 4; - const auto column = std::bit_width(bpp) - 4; // bpps are 8, 16, 32, 64 + const auto row = tiling_idx * 5; + const auto column = std::bit_width(bpp) - 4; // bpps are 8, 16, 32, 64, 128 return macro_tile_extents[row + column]; } -static constexpr size_t ImageSizeLinearAligned(u32 pitch, u32 height, u32 bpp, u32 num_samples) { +static constexpr std::pair ImageSizeLinearAligned(u32 pitch, u32 height, u32 bpp, + u32 num_samples) { const auto pitch_align = std::max(8u, 64u / ((bpp + 7) / 8)); auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1); const auto height_aligned = height; - size_t log_sz = 1; - const auto slice_align = std::max(64u, hw_pipe_interleave / (bpp + 7) / 8); + size_t log_sz = pitch_aligned * height_aligned * num_samples; + const auto slice_align = std::max(64u, 256u / ((bpp + 7) / 8)); while (log_sz % slice_align) { - log_sz = pitch_aligned * height_aligned * num_samples; pitch_aligned += pitch_align; + log_sz = pitch_aligned * height_aligned * num_samples; } - return (log_sz * bpp + 7) / 8; + return {pitch_aligned, (log_sz * bpp + 7) / 8}; } -static constexpr size_t ImageSizeMicroTiled(u32 pitch, u32 height, u32 bpp, u32 num_samples) { +static constexpr std::pair ImageSizeMicroTiled(u32 pitch, u32 height, u32 bpp, + u32 num_samples) { const auto& [pitch_align, height_align] = micro_tile_extent; auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1); const auto height_aligned = (height + height_align - 1) & ~(height_align - 1); - size_t log_sz = 1; + size_t log_sz = (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8; while (log_sz % 256) { - log_sz = (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8; pitch_aligned += 8; + log_sz = (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8; } - return log_sz; + return {pitch_aligned, log_sz}; } -static constexpr size_t ImageSizeMacroTiled(u32 pitch, u32 height, u32 bpp, u32 num_samples, - u32 tiling_idx) { +static constexpr std::pair ImageSizeMacroTiled(u32 pitch, u32 height, u32 bpp, + u32 num_samples, u32 tiling_idx) { const auto& [pitch_align, height_align] = GetMacroTileExtents(tiling_idx, bpp, num_samples); ASSERT(pitch_align != 0 && height_align != 0); const auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1); const auto height_aligned = (height + height_align - 1) & ~(height_align - 1); - return (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8; + const auto log_sz = pitch_aligned * height_aligned * num_samples; + return {pitch_aligned, (log_sz * bpp + 7) / 8}; } ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group, VAddr cpu_address) noexcept { const auto& attrib = group.attrib; - is_tiled = attrib.tiling_mode == TilingMode::Tile; - tiling_mode = - is_tiled ? AmdGpu::TilingMode::Display_MacroTiled : AmdGpu::TilingMode::Display_Linear; + props.is_tiled = attrib.tiling_mode == TilingMode::Tile; + tiling_mode = props.is_tiled ? AmdGpu::TilingMode::Display_MacroTiled + : AmdGpu::TilingMode::Display_Linear; pixel_format = ConvertPixelFormat(attrib.pixel_format); type = vk::ImageType::e2D; size.width = attrib.width; size.height = attrib.height; pitch = attrib.tiling_mode == TilingMode::Linear ? size.width : (size.width + 127) & (~127); usage.vo_buffer = true; - const bool is_32bpp = attrib.pixel_format != VideoOutFormat::A16R16G16B16Float; - ASSERT(is_32bpp); + num_bits = attrib.pixel_format != VideoOutFormat::A16R16G16B16Float ? 32 : 64; + ASSERT(num_bits == 32); guest_address = cpu_address; - if (!is_tiled) { + if (!props.is_tiled) { guest_size_bytes = pitch * size.height * 4; } else { if (Config::isNeoMode()) { @@ -146,15 +149,16 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group, guest_size_bytes = pitch * ((size.height + 63) & (~63)) * 4; } } - mips_layout.emplace_back(0, guest_size_bytes); + mips_layout.emplace_back(guest_size_bytes, pitch, 0); } ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, const AmdGpu::Liverpool::CbDbExtent& hint /*= {}*/) noexcept { - is_tiled = buffer.IsTiled(); + props.is_tiled = buffer.IsTiled(); tiling_mode = buffer.GetTilingMode(); pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat()); num_samples = 1 << buffer.attrib.num_fragments_log2; + num_bits = NumBits(buffer.info.format); type = vk::ImageType::e2D; size.width = hint.Valid() ? hint.width : buffer.Pitch(); size.height = hint.Valid() ? hint.height : buffer.Height(); @@ -168,15 +172,16 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, guest_address = buffer.Address(); const auto color_slice_sz = buffer.GetColorSliceSize(); guest_size_bytes = color_slice_sz * buffer.NumSlices(); - mips_layout.emplace_back(0, color_slice_sz); + mips_layout.emplace_back(color_slice_sz, pitch, 0); } ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint) noexcept { - is_tiled = false; + props.is_tiled = false; pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format); type = vk::ImageType::e2D; num_samples = 1 << buffer.z_info.num_samples; // spec doesn't say it is a log2 + num_bits = buffer.NumBits(); size.width = hint.Valid() ? hint.width : buffer.Pitch(); size.height = hint.Valid() ? hint.height : buffer.Height(); size.depth = 1; @@ -184,41 +189,44 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice resources.layers = num_slices; meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0; usage.depth_target = true; + usage.stencil = + buffer.stencil_info.format != AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid; guest_address = buffer.Address(); const auto depth_slice_sz = buffer.GetDepthSliceSize(); guest_size_bytes = depth_slice_sz * num_slices; - mips_layout.emplace_back(0, depth_slice_sz); + mips_layout.emplace_back(depth_slice_sz, pitch, 0); } ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept { - is_tiled = image.IsTiled(); tiling_mode = image.GetTilingMode(); pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt()); type = ConvertImageType(image.GetType()); - is_cube = image.GetType() == AmdGpu::ImageType::Cube; - is_volume = image.GetType() == AmdGpu::ImageType::Color3D; + props.is_tiled = image.IsTiled(); + props.is_cube = image.GetType() == AmdGpu::ImageType::Cube; + props.is_volume = image.GetType() == AmdGpu::ImageType::Color3D; + props.is_pow2 = image.pow2pad; + props.is_block = IsBlockCoded(); size.width = image.width + 1; size.height = image.height + 1; - size.depth = is_volume ? image.depth + 1 : 1; + size.depth = props.is_volume ? image.depth + 1 : 1; pitch = image.Pitch(); resources.levels = image.NumLevels(); resources.layers = image.NumLayers(); + num_bits = NumBits(image.GetDataFmt()); usage.texture = true; guest_address = image.Address(); mips_layout.reserve(resources.levels); - const auto num_bits = NumBits(image.GetDataFmt()); - const auto is_block = IsBlockCoded(); - const auto is_pow2 = image.pow2pad; + MipInfo mip_info{}; guest_size_bytes = 0; for (auto mip = 0u; mip < resources.levels; ++mip) { auto bpp = num_bits; auto mip_w = pitch >> mip; auto mip_h = size.height >> mip; - if (is_block) { + if (props.is_block) { mip_w = (mip_w + 3) / 4; mip_h = (mip_h + 3) / 4; bpp *= 16; @@ -227,40 +235,48 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept { mip_h = std::max(mip_h, 1u); auto mip_d = std::max(size.depth >> mip, 1u); - if (is_pow2) { + if (props.is_pow2) { mip_w = std::bit_ceil(mip_w); mip_h = std::bit_ceil(mip_h); mip_d = std::bit_ceil(mip_d); } - size_t mip_size = 0; switch (tiling_mode) { case AmdGpu::TilingMode::Display_Linear: { - ASSERT(!is_cube); - mip_size = ImageSizeLinearAligned(mip_w, mip_h, bpp, num_samples); + ASSERT(!props.is_cube); + std::tie(mip_info.pitch, mip_info.size) = + ImageSizeLinearAligned(mip_w, mip_h, bpp, num_samples); + mip_info.height = mip_h; break; } case AmdGpu::TilingMode::Texture_MicroTiled: { - mip_size = ImageSizeMicroTiled(mip_w, mip_h, bpp, num_samples); + std::tie(mip_info.pitch, mip_info.size) = + ImageSizeMicroTiled(mip_w, mip_h, bpp, num_samples); + mip_info.height = std::max(mip_h, 8u); + if (props.is_block) { + mip_info.pitch = std::max(mip_info.pitch * 4, 32u); + mip_info.height = std::max(mip_info.height * 4, 32u); + } break; } case AmdGpu::TilingMode::Display_MacroTiled: case AmdGpu::TilingMode::Texture_MacroTiled: case AmdGpu::TilingMode::Depth_MacroTiled: { - ASSERT(!is_cube && !is_block); + ASSERT(!props.is_block); ASSERT(num_samples == 1); - ASSERT(num_bits <= 64); - mip_size = ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, image.tiling_index); + std::tie(mip_info.pitch, mip_info.size) = + ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, image.tiling_index); break; } default: { UNREACHABLE(); } } - mip_size *= mip_d; + mip_info.size *= mip_d; - mips_layout.emplace_back(guest_size_bytes, mip_size); - guest_size_bytes += mip_size; + mip_info.offset = guest_size_bytes; + mips_layout.emplace_back(mip_info); + guest_size_bytes += mip_info.size; } guest_size_bytes *= resources.layers; } diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index b98410b90..9dad0dd67 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -9,6 +9,8 @@ #include "video_core/amdgpu/liverpool.h" #include "video_core/texture_cache/types.h" +#include + namespace VideoCore { struct ImageInfo { @@ -42,18 +44,29 @@ struct ImageInfo { u32 vo_buffer : 1; } usage{}; // Usage data tracked during image lifetime - bool is_cube = false; - bool is_volume = false; - bool is_tiled = false; - bool is_read_only = false; + struct { + u32 is_cube : 1; + u32 is_volume : 1; + u32 is_tiled : 1; + u32 is_pow2 : 1; + u32 is_block : 1; + } props{}; // Surface properties with impact on various calculation factors + vk::Format pixel_format = vk::Format::eUndefined; vk::ImageType type = vk::ImageType::e1D; SubresourceExtent resources; Extent3D size{1, 1, 1}; + u32 num_bits{}; u32 num_samples = 1; u32 pitch = 0; AmdGpu::TilingMode tiling_mode{AmdGpu::TilingMode::Display_Linear}; - std::vector> mips_layout; + struct MipInfo { + u32 size; + u32 pitch; + u32 height; + u32 offset; + }; + boost::container::small_vector mips_layout; VAddr guest_address{0}; u32 guest_size_bytes{0}; }; diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 8f9722531..cbf77f2d8 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/logging/log.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/texture_cache/image.h" @@ -46,30 +47,67 @@ vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) { } } -ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept - : is_storage{is_storage} { +bool IsIdentityMapping(u32 dst_sel, u32 num_components) { + return (num_components == 1 && dst_sel == 0b100) || + (num_components == 2 && dst_sel == 0b101'100) || + (num_components == 3 && dst_sel == 0b110'101'100) || + (num_components == 4 && dst_sel == 0b111'110'101'100); +} + +vk::Format TrySwizzleFormat(vk::Format format, u32 dst_sel) { + if (format == vk::Format::eR8G8B8A8Unorm && dst_sel == 0b111100101110) { + return vk::Format::eB8G8R8A8Unorm; + } + return format; +} + +ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage_) noexcept + : is_storage{is_storage_} { type = ConvertImageViewType(image.GetType()); format = Vulkan::LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt()); - range.base.level = static_cast(image.base_level); - range.base.layer = static_cast(image.base_array); - range.extent.levels = image.NumLevels(); - range.extent.layers = image.NumLayers(); + range.base.level = image.base_level; + range.base.layer = image.base_array; + range.extent.levels = image.last_level + 1; + range.extent.layers = image.last_array + 1; if (!is_storage) { mapping.r = ConvertComponentSwizzle(image.dst_sel_x); mapping.g = ConvertComponentSwizzle(image.dst_sel_y); mapping.b = ConvertComponentSwizzle(image.dst_sel_z); mapping.a = ConvertComponentSwizzle(image.dst_sel_w); } + // Check for unfortunate case of storage images being swizzled + const u32 num_comps = AmdGpu::NumComponents(image.GetDataFmt()); + const u32 dst_sel = image.DstSelect(); + if (is_storage && !IsIdentityMapping(dst_sel, num_comps)) { + if (auto new_format = TrySwizzleFormat(format, dst_sel); new_format != format) { + format = new_format; + return; + } + LOG_ERROR(Render_Vulkan, "Storage image (num_comps = {}) requires swizzling {}", num_comps, + image.DstSelectName()); + } } ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer, bool is_vo_surface) noexcept { const auto base_format = Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.info.format, col_buffer.NumFormat()); + range.base.layer = col_buffer.view.slice_start; + range.extent.layers = col_buffer.NumSlices(); format = Vulkan::LiverpoolToVK::AdjustColorBufferFormat( base_format, col_buffer.info.comp_swap.Value(), is_vo_surface); } +ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer, + AmdGpu::Liverpool::DepthView view, + AmdGpu::Liverpool::DepthControl ctl) { + format = Vulkan::LiverpoolToVK::DepthFormat(depth_buffer.z_info.format, + depth_buffer.stencil_info.format); + is_storage = ctl.depth_write_enable; + range.base.layer = view.slice_start; + range.extent.layers = view.NumSlices(); +} + ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image, ImageId image_id_, std::optional usage_override /*= {}*/) : info{info_}, image_id{image_id_} { @@ -93,10 +131,10 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info .components = instance.GetSupportedComponentSwizzle(format, info.mapping), .subresourceRange{ .aspectMask = aspect, - .baseMipLevel = 0U, - .levelCount = 1, + .baseMipLevel = info.range.base.level, + .levelCount = info.range.extent.levels - info.range.base.level, .baseArrayLayer = info_.range.base.layer, - .layerCount = image.info.IsBlockCoded() ? 1 : VK_REMAINING_ARRAY_LAYERS, + .layerCount = info.range.extent.layers - info.range.base.layer, }, }; image_view = instance.GetDevice().createImageViewUnique(image_view_ci); diff --git a/src/video_core/texture_cache/image_view.h b/src/video_core/texture_cache/image_view.h index b43f65dee..fbc62db36 100644 --- a/src/video_core/texture_cache/image_view.h +++ b/src/video_core/texture_cache/image_view.h @@ -18,10 +18,11 @@ class Scheduler; namespace VideoCore { struct ImageViewInfo { - explicit ImageViewInfo() = default; - explicit ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept; - explicit ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer, - bool is_vo_surface) noexcept; + ImageViewInfo() = default; + ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept; + ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer, bool is_vo_surface) noexcept; + ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer, + AmdGpu::Liverpool::DepthView view, AmdGpu::Liverpool::DepthControl ctl); vk::ImageViewType type = vk::ImageViewType::e2D; vk::Format format = vk::Format::eR8G8B8A8Unorm; @@ -34,6 +35,8 @@ struct ImageViewInfo { struct Image; +constexpr Common::SlotId NULL_IMAGE_VIEW_ID{0}; + struct ImageView { explicit ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, Image& image, ImageId image_id, std::optional usage_override = {}); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 55bb99cca..6bc893b09 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -3,103 +3,21 @@ #include #include "common/assert.h" -#include "common/config.h" -#include "core/virtual_memory.h" +#include "video_core/buffer_cache/buffer_cache.h" +#include "video_core/page_manager.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/tile_manager.h" -#ifndef _WIN64 -#include -#include - -#define PAGE_NOACCESS PROT_NONE -#define PAGE_READWRITE (PROT_READ | PROT_WRITE) -#define PAGE_READONLY PROT_READ -#else -#include - -void mprotect(void* addr, size_t len, int prot) { - DWORD old_prot{}; - BOOL result = VirtualProtect(addr, len, prot, &old_prot); - ASSERT_MSG(result != 0, "Region protection failed"); -} - -#endif - namespace VideoCore { -static TextureCache* g_texture_cache = nullptr; - -#ifndef _WIN64 -void GuestFaultSignalHandler(int sig, siginfo_t* info, void* raw_context) { - ucontext_t* ctx = reinterpret_cast(raw_context); - const VAddr address = reinterpret_cast(info->si_addr); - -#ifdef __APPLE__ - const u32 err = ctx->uc_mcontext->__es.__err; -#else - const greg_t err = ctx->uc_mcontext.gregs[REG_ERR]; -#endif - - if (err & 0x2) { - g_texture_cache->OnCpuWrite(address); - } else { - // Read not supported! - UNREACHABLE(); - } -} -#else -LONG WINAPI GuestFaultSignalHandler(EXCEPTION_POINTERS* pExp) noexcept { - const u32 ec = pExp->ExceptionRecord->ExceptionCode; - if (ec == EXCEPTION_ACCESS_VIOLATION) { - const auto info = pExp->ExceptionRecord->ExceptionInformation; - if (info[0] == 1) { // Write violation - g_texture_cache->OnCpuWrite(info[1]); - return EXCEPTION_CONTINUE_EXECUTION; - } /* else { - UNREACHABLE(); - }*/ - } - return EXCEPTION_CONTINUE_SEARCH; // pass further -} -#endif - -static constexpr u64 StreamBufferSize = 512_MB; static constexpr u64 PageShift = 12; -TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_) - : instance{instance_}, scheduler{scheduler_}, - staging{instance, scheduler, vk::BufferUsageFlagBits::eTransferSrc, StreamBufferSize, - Vulkan::BufferType::Upload}, +TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, + BufferCache& buffer_cache_, PageManager& tracker_) + : instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_}, tile_manager{instance, scheduler} { - -#ifndef _WIN64 -#ifdef __APPLE__ - // Read-only memory write results in SIGBUS on Apple. - static constexpr int SignalType = SIGBUS; -#else - static constexpr int SignalType = SIGSEGV; -#endif - - sigset_t signal_mask; - sigemptyset(&signal_mask); - sigaddset(&signal_mask, SignalType); - - using HandlerType = decltype(sigaction::sa_sigaction); - - struct sigaction guest_access_fault {}; - guest_access_fault.sa_flags = SA_SIGINFO | SA_ONSTACK; - guest_access_fault.sa_sigaction = &GuestFaultSignalHandler; - guest_access_fault.sa_mask = signal_mask; - sigaction(SignalType, &guest_access_fault, nullptr); -#else - veh_handle = AddVectoredExceptionHandler(0, GuestFaultSignalHandler); - ASSERT_MSG(veh_handle, "Failed to register an exception handler"); -#endif - g_texture_cache = this; - ImageInfo info; info.pixel_format = vk::Format::eR8G8B8A8Unorm; info.type = vk::ImageType::e2D; @@ -110,15 +28,14 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& void(slot_image_views.insert(instance, view_info, slot_images[null_id], null_id)); } -TextureCache::~TextureCache() { -#if _WIN64 - RemoveVectoredExceptionHandler(veh_handle); -#endif -} +TextureCache::~TextureCache() = default; -void TextureCache::OnCpuWrite(VAddr address) { - std::unique_lock lock{m_page_table}; - ForEachImageInRegion(address, 1 << PageShift, [&](ImageId image_id, Image& image) { +void TextureCache::InvalidateMemory(VAddr address, size_t size, bool from_compute) { + std::unique_lock lock{mutex}; + ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) { + if (from_compute && !image.Overlaps(address, size)) { + return; + } // Ensure image is reuploaded when accessed again. image.flags |= ImageFlagBits::CpuModified; // Untrack image, so the range is unprotected and the guest can write freely. @@ -126,8 +43,28 @@ void TextureCache::OnCpuWrite(VAddr address) { }); } -ImageId TextureCache::FindImage(const ImageInfo& info, bool refresh_on_create) { - std::unique_lock lock{m_page_table}; +void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) { + std::scoped_lock lk{mutex}; + + boost::container::small_vector deleted_images; + ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); + for (const ImageId id : deleted_images) { + Image& image = slot_images[id]; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, id); + } + // TODO: Download image data back to host. + UnregisterImage(id); + DeleteImage(id); + } +} + +ImageId TextureCache::FindImage(const ImageInfo& info) { + if (info.guest_address == 0) [[unlikely]] { + return NULL_IMAGE_VIEW_ID; + } + + std::unique_lock lock{mutex}; boost::container::small_vector image_ids; ForEachImageInRegion( info.guest_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) { @@ -142,22 +79,14 @@ ImageId TextureCache::FindImage(const ImageInfo& info, bool refresh_on_create) { image_ids.push_back(image_id); }); - ASSERT_MSG(image_ids.size() <= 1, "Overlapping images not allowed!"); + // ASSERT_MSG(image_ids.size() <= 1, "Overlapping images not allowed!"); ImageId image_id{}; if (image_ids.empty()) { image_id = slot_images.insert(instance, scheduler, info); RegisterImage(image_id); } else { - image_id = image_ids[0]; - } - - RegisterMeta(info, image_id); - - Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::CpuModified) && refresh_on_create) { - RefreshImage(image); - TrackImage(image, image_id); + image_id = image_ids[image_ids.size() > 1 ? 1 : 0]; } return image_id; @@ -184,14 +113,15 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo return slot_image_views[view_id]; } -ImageView& TextureCache::FindTexture(const AmdGpu::Image& desc, bool is_storage) { - const ImageInfo info{desc}; +ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) { const ImageId image_id = FindImage(info); + UpdateImage(image_id); Image& image = slot_images[image_id]; auto& usage = image.info.usage; - if (is_storage) { - image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite); + if (view_info.is_storage) { + image.Transit(vk::ImageLayout::eGeneral, + vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite); usage.storage = true; } else { const auto new_layout = image.info.IsDepthStencil() @@ -201,110 +131,167 @@ ImageView& TextureCache::FindTexture(const AmdGpu::Image& desc, bool is_storage) usage.texture = true; } - const ImageViewInfo view_info{desc, is_storage}; - return RegisterImageView(image_id, view_info); + // These changes are temporary and should be removed once texture cache will handle subresources + // merging + auto view_info_tmp = view_info; + if (view_info_tmp.range.base.level > image.info.resources.levels - 1 || + view_info_tmp.range.base.layer > image.info.resources.layers - 1 || + view_info_tmp.range.extent.levels > image.info.resources.levels || + view_info_tmp.range.extent.layers > image.info.resources.layers) { + + LOG_DEBUG(Render_Vulkan, + "Subresource range ({}~{},{}~{}) exceeds base image extents ({},{})", + view_info_tmp.range.base.level, view_info_tmp.range.extent.levels, + view_info_tmp.range.base.layer, view_info_tmp.range.extent.layers, + image.info.resources.levels, image.info.resources.layers); + + view_info_tmp.range.base.level = + std::min(view_info_tmp.range.base.level, image.info.resources.levels - 1); + view_info_tmp.range.base.layer = + std::min(view_info_tmp.range.base.layer, image.info.resources.layers - 1); + view_info_tmp.range.extent.levels = + std::min(view_info_tmp.range.extent.levels, image.info.resources.levels); + view_info_tmp.range.extent.layers = + std::min(view_info_tmp.range.extent.layers, image.info.resources.layers); + } + + return RegisterImageView(image_id, view_info_tmp); } -ImageView& TextureCache::FindRenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, - const AmdGpu::Liverpool::CbDbExtent& hint) { - const ImageInfo info{buffer, hint}; - const ImageId image_id = FindImage(info); +ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info, + const ImageViewInfo& view_info) { + const ImageId image_id = FindImage(image_info); Image& image = slot_images[image_id]; - image.flags &= ~ImageFlagBits::CpuModified; + image.flags |= ImageFlagBits::GpuModified; + UpdateImage(image_id); image.Transit(vk::ImageLayout::eColorAttachmentOptimal, vk::AccessFlagBits::eColorAttachmentWrite | vk::AccessFlagBits::eColorAttachmentRead); + // Register meta data for this color buffer + if (!(image.flags & ImageFlagBits::MetaRegistered)) { + if (image_info.meta_info.cmask_addr) { + surface_metas.emplace( + image_info.meta_info.cmask_addr, + MetaDataInfo{.type = MetaDataInfo::Type::CMask, .is_cleared = true}); + image.info.meta_info.cmask_addr = image_info.meta_info.cmask_addr; + image.flags |= ImageFlagBits::MetaRegistered; + } + + if (image_info.meta_info.fmask_addr) { + surface_metas.emplace( + image_info.meta_info.fmask_addr, + MetaDataInfo{.type = MetaDataInfo::Type::FMask, .is_cleared = true}); + image.info.meta_info.fmask_addr = image_info.meta_info.fmask_addr; + image.flags |= ImageFlagBits::MetaRegistered; + } + } + + // Update tracked image usage image.info.usage.render_target = true; - ImageViewInfo view_info{buffer, !!image.info.usage.vo_buffer}; return RegisterImageView(image_id, view_info); } -ImageView& TextureCache::FindDepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer, - u32 num_slices, VAddr htile_address, - const AmdGpu::Liverpool::CbDbExtent& hint, - bool write_enabled) { - const ImageInfo info{buffer, num_slices, htile_address, hint}; - const ImageId image_id = FindImage(info, false); +ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info, + const ImageViewInfo& view_info) { + const ImageId image_id = FindImage(image_info); Image& image = slot_images[image_id]; + image.flags |= ImageFlagBits::GpuModified; image.flags &= ~ImageFlagBits::CpuModified; - const auto new_layout = write_enabled ? vk::ImageLayout::eDepthStencilAttachmentOptimal - : vk::ImageLayout::eDepthStencilReadOnlyOptimal; + const auto new_layout = view_info.is_storage ? vk::ImageLayout::eDepthStencilAttachmentOptimal + : vk::ImageLayout::eDepthStencilReadOnlyOptimal; image.Transit(new_layout, vk::AccessFlagBits::eDepthStencilAttachmentWrite | vk::AccessFlagBits::eDepthStencilAttachmentRead); + // Register meta data for this depth buffer + if (!(image.flags & ImageFlagBits::MetaRegistered)) { + if (image_info.meta_info.htile_addr) { + surface_metas.emplace( + image_info.meta_info.htile_addr, + MetaDataInfo{.type = MetaDataInfo::Type::HTile, .is_cleared = true}); + image.info.meta_info.htile_addr = image_info.meta_info.htile_addr; + image.flags |= ImageFlagBits::MetaRegistered; + } + } + + // Update tracked image usage image.info.usage.depth_target = true; - ImageViewInfo view_info; - view_info.format = info.pixel_format; return RegisterImageView(image_id, view_info); } -void TextureCache::RefreshImage(Image& image) { +void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler /*= nullptr*/) { // Mark image as validated. image.flags &= ~ImageFlagBits::CpuModified; - { - if (!tile_manager.TryDetile(image)) { - // Upload data to the staging buffer. - const auto offset = staging.Copy(image.cpu_addr, image.info.guest_size_bytes, 4); - // Copy to the image. - image.Upload(staging.Handle(), offset); - } + const auto& num_layers = image.info.resources.layers; + const auto& num_mips = image.info.resources.levels; + ASSERT(num_mips == image.info.mips_layout.size()); - image.Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); - return; - } - - ASSERT(image.info.resources.levels == image.info.mips_layout.size()); - const u8* image_data = reinterpret_cast(image.cpu_addr); - for (u32 m = 0; m < image.info.resources.levels; m++) { + boost::container::small_vector image_copy{}; + for (u32 m = 0; m < num_mips; m++) { const u32 width = std::max(image.info.size.width >> m, 1u); const u32 height = std::max(image.info.size.height >> m, 1u); - const u32 depth = image.info.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; - const u32 map_size = image.info.mips_layout[m].second * image.info.resources.layers; + const u32 depth = + image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; + const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; - // Upload data to the staging buffer. - const auto [data, offset, _] = staging.Map(map_size, 16); - if (image.info.is_tiled) { - ConvertTileToLinear(data, image_data, width, height, Config::isNeoMode()); - } else { - std::memcpy(data, - image_data + image.info.mips_layout[m].first * image.info.resources.layers, - map_size); + // Protect GPU modified resources from accidental reuploads. + if (True(image.flags & ImageFlagBits::GpuModified) && + !buffer_cache.IsRegionGpuModified(image.info.guest_address + mip_ofs, mip_size)) { + const u8* addr = std::bit_cast(image.info.guest_address); + const u64 hash = XXH3_64bits(addr + mip_ofs, mip_size); + if (image.mip_hashes[m] == hash) { + continue; + } + image.mip_hashes[m] = hash; } - staging.Commit(map_size); - // Copy to the image. - const vk::BufferImageCopy image_copy = { - .bufferOffset = offset, - .bufferRowLength = 0, - .bufferImageHeight = 0, + image_copy.push_back({ + .bufferOffset = mip_ofs * num_layers, + .bufferRowLength = static_cast(mip_pitch), + .bufferImageHeight = static_cast(mip_height), .imageSubresource{ .aspectMask = vk::ImageAspectFlagBits::eColor, .mipLevel = m, .baseArrayLayer = 0, - .layerCount = u32(image.info.resources.layers), + .layerCount = num_layers, }, .imageOffset = {0, 0, 0}, .imageExtent = {width, height, depth}, - }; - - scheduler.EndRendering(); - - const auto cmdbuf = scheduler.CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); - - cmdbuf.copyBufferToImage(staging.Handle(), image.image, - vk::ImageLayout::eTransferDstOptimal, image_copy); - - image.Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); + }); } + + if (image_copy.empty()) { + return; + } + + auto* sched_ptr = custom_scheduler ? custom_scheduler : &scheduler; + sched_ptr->EndRendering(); + + const auto cmdbuf = sched_ptr->CommandBuffer(); + image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite, cmdbuf); + + const VAddr image_addr = image.info.guest_address; + const size_t image_size = image.info.guest_size_bytes; + vk::Buffer buffer{}; + u32 offset{}; + if (auto upload_buffer = tile_manager.TryDetile(image); upload_buffer) { + buffer = *upload_buffer; + } else { + const auto [vk_buffer, buf_offset] = buffer_cache.ObtainTempBuffer(image_addr, image_size); + buffer = vk_buffer->Handle(); + offset = buf_offset; + } + + for (auto& copy : image_copy) { + copy.bufferOffset += offset; + } + + cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy); } vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) { @@ -322,47 +309,6 @@ void TextureCache::RegisterImage(ImageId image_id) { [this, image_id](u64 page) { page_table[page].push_back(image_id); }); } -void TextureCache::RegisterMeta(const ImageInfo& info, ImageId image_id) { - Image& image = slot_images[image_id]; - - if (image.flags & ImageFlagBits::MetaRegistered) { - return; - } - - bool registered = true; - // Current resource tracking implementation allows us to detect usage of meta only in the last - // moment, so we likely will miss its first clear. To avoid this and make first frame, where - // the meta is encountered, looks correct we set its state to "cleared" at registrations time. - if (info.usage.render_target) { - if (info.meta_info.cmask_addr) { - surface_metas.emplace( - info.meta_info.cmask_addr, - MetaDataInfo{.type = MetaDataInfo::Type::CMask, .is_cleared = true}); - image.info.meta_info.cmask_addr = info.meta_info.cmask_addr; - } - - if (info.meta_info.fmask_addr) { - surface_metas.emplace( - info.meta_info.fmask_addr, - MetaDataInfo{.type = MetaDataInfo::Type::FMask, .is_cleared = true}); - image.info.meta_info.fmask_addr = info.meta_info.fmask_addr; - } - } else if (info.usage.depth_target) { - if (info.meta_info.htile_addr) { - surface_metas.emplace( - info.meta_info.htile_addr, - MetaDataInfo{.type = MetaDataInfo::Type::HTile, .is_cleared = true}); - image.info.meta_info.htile_addr = info.meta_info.htile_addr; - } - } else { - registered = false; - } - - if (registered) { - image.flags |= ImageFlagBits::MetaRegistered; - } -} - void TextureCache::UnregisterImage(ImageId image_id) { Image& image = slot_images[image_id]; ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), @@ -370,11 +316,11 @@ void TextureCache::UnregisterImage(ImageId image_id) { image.flags &= ~ImageFlagBits::Registered; ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) { const auto page_it = page_table.find(page); - if (page_it == page_table.end()) { - ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageShift); + if (page_it == nullptr) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PageShift); return; } - auto& image_ids = page_it.value(); + auto& image_ids = *page_it; const auto vector_it = std::ranges::find(image_ids, image_id); if (vector_it == image_ids.end()) { ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageShift); @@ -390,7 +336,7 @@ void TextureCache::TrackImage(Image& image, ImageId image_id) { return; } image.flags |= ImageFlagBits::Tracked; - UpdatePagesCachedCount(image.cpu_addr, image.info.guest_size_bytes, 1); + tracker.UpdatePagesCachedCount(image.cpu_addr, image.info.guest_size_bytes, 1); } void TextureCache::UntrackImage(Image& image, ImageId image_id) { @@ -398,40 +344,34 @@ void TextureCache::UntrackImage(Image& image, ImageId image_id) { return; } image.flags &= ~ImageFlagBits::Tracked; - UpdatePagesCachedCount(image.cpu_addr, image.info.guest_size_bytes, -1); + tracker.UpdatePagesCachedCount(image.cpu_addr, image.info.guest_size_bytes, -1); } -void TextureCache::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) { - std::scoped_lock lk{mutex}; - const u64 num_pages = ((addr + size - 1) >> PageShift) - (addr >> PageShift) + 1; - const u64 page_start = addr >> PageShift; - const u64 page_end = page_start + num_pages; +void TextureCache::DeleteImage(ImageId image_id) { + Image& image = slot_images[image_id]; + ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); + ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); - const auto pages_interval = - decltype(cached_pages)::interval_type::right_open(page_start, page_end); - if (delta > 0) { - cached_pages.add({pages_interval, delta}); + // Remove any registered meta areas. + const auto& meta_info = image.info.meta_info; + if (meta_info.cmask_addr) { + surface_metas.erase(meta_info.cmask_addr); + } + if (meta_info.fmask_addr) { + surface_metas.erase(meta_info.fmask_addr); + } + if (meta_info.htile_addr) { + surface_metas.erase(meta_info.htile_addr); } - const auto& range = cached_pages.equal_range(pages_interval); - for (const auto& [range, count] : boost::make_iterator_range(range)) { - const auto interval = range & pages_interval; - const VAddr interval_start_addr = boost::icl::first(interval) << PageShift; - const VAddr interval_end_addr = boost::icl::last_next(interval) << PageShift; - const u32 interval_size = interval_end_addr - interval_start_addr; - void* addr = reinterpret_cast(interval_start_addr); - if (delta > 0 && count == delta) { - mprotect(addr, interval_size, PAGE_READONLY); - } else if (delta < 0 && count == -delta) { - mprotect(addr, interval_size, PAGE_READWRITE); - } else { - ASSERT(count >= 0); + // Reclaim image and any image views it references. + scheduler.DeferOperation([this, image_id] { + Image& image = slot_images[image_id]; + for (const ImageViewId image_view_id : image.image_view_ids) { + slot_image_views.erase(image_view_id); } - } - - if (delta < 0) { - cached_pages.add({pages_interval, delta}); - } + slot_images.erase(image_id); + }); } } // namespace VideoCore diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8a6189835..137b60141 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -4,12 +4,11 @@ #pragma once #include -#include #include #include "common/slot_vector.h" #include "video_core/amdgpu/resource.h" -#include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/multi_level_page_table.h" #include "video_core/texture_cache/image.h" #include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/sampler.h" @@ -21,50 +20,56 @@ struct BufferAttributeGroup; namespace VideoCore { +class BufferCache; +class PageManager; + class TextureCache { - // This is the page shift for adding images into the hash map. It isn't related to - // the page size of the guest or the host and is chosen for convenience. A number too - // small will increase the number of hash map lookups per image, while too large will - // increase the number of images per page. - static constexpr u64 PageBits = 20; - static constexpr u64 PageMask = (1ULL << PageBits) - 1; - - struct MetaDataInfo { - enum class Type { - CMask, - FMask, - HTile, - }; - - Type type; - bool is_cleared; + struct Traits { + using Entry = boost::container::small_vector; + static constexpr size_t AddressSpaceBits = 39; + static constexpr size_t FirstLevelBits = 9; + static constexpr size_t PageBits = 22; }; + using PageTable = MultiLevelPageTable; public: - explicit TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler); + explicit TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, + BufferCache& buffer_cache, PageManager& tracker); ~TextureCache(); /// Invalidates any image in the logical page range. - void OnCpuWrite(VAddr address); + void InvalidateMemory(VAddr address, size_t size, bool from_compute = false); + + /// Evicts any images that overlap the unmapped range. + void UnmapMemory(VAddr cpu_addr, size_t size); /// Retrieves the image handle of the image with the provided attributes. - [[nodiscard]] ImageId FindImage(const ImageInfo& info, bool refresh_on_create = true); + [[nodiscard]] ImageId FindImage(const ImageInfo& info); /// Retrieves an image view with the properties of the specified image descriptor. - [[nodiscard]] ImageView& FindTexture(const AmdGpu::Image& image, bool is_storage); + [[nodiscard]] ImageView& FindTexture(const ImageInfo& image_info, + const ImageViewInfo& view_info); /// Retrieves the render target with specified properties - [[nodiscard]] ImageView& FindRenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, - const AmdGpu::Liverpool::CbDbExtent& hint); + [[nodiscard]] ImageView& FindRenderTarget(const ImageInfo& image_info, + const ImageViewInfo& view_info); /// Retrieves the depth target with specified properties - [[nodiscard]] ImageView& FindDepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer, - u32 num_slices, VAddr htile_address, - const AmdGpu::Liverpool::CbDbExtent& hint, - bool write_enabled); + [[nodiscard]] ImageView& FindDepthTarget(const ImageInfo& image_info, + const ImageViewInfo& view_info); + + /// Updates image contents if it was modified by CPU. + void UpdateImage(ImageId image_id, Vulkan::Scheduler* custom_scheduler = nullptr) { + Image& image = slot_images[image_id]; + if (False(image.flags & ImageFlagBits::CpuModified)) { + return; + } + RefreshImage(image, custom_scheduler); + TrackImage(image, image_id); + } /// Reuploads image contents. - void RefreshImage(Image& image); + void RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler = nullptr); /// Retrieves the sampler that matches the provided S# descriptor. [[nodiscard]] vk::Sampler GetSampler(const AmdGpu::Sampler& sampler); @@ -102,8 +107,8 @@ private: template static void ForEachPage(PAddr addr, size_t size, Func&& func) { static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; - const u64 page_end = (addr + size - 1) >> PageBits; - for (u64 page = addr >> PageBits; page <= page_end; ++page) { + const u64 page_end = (addr + size - 1) >> Traits::PageBits; + for (u64 page = addr >> Traits::PageBits; page <= page_end; ++page) { if constexpr (RETURNS_BOOL) { if (func(page)) { break; @@ -121,14 +126,14 @@ private: boost::container::small_vector images; ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) { const auto it = page_table.find(page); - if (it == page_table.end()) { + if (it == nullptr) { if constexpr (BOOL_BREAK) { return false; } else { return; } } - for (const ImageId image_id : it->second) { + for (const ImageId image_id : *it) { Image& image = slot_images[image_id]; if (image.flags & ImageFlagBits::Picked) { continue; @@ -158,9 +163,6 @@ private: /// Register image in the page table void RegisterImage(ImageId image); - /// Register metadata surfaces attached to the image - void RegisterMeta(const ImageInfo& info, ImageId image); - /// Unregister image from the page table void UnregisterImage(ImageId image); @@ -170,25 +172,31 @@ private: /// Stop tracking CPU reads and writes for image void UntrackImage(Image& image, ImageId image_id); - /// Increase/decrease the number of surface in pages touching the specified region - void UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta); + /// Removes the image and any views/surface metas that reference it. + void DeleteImage(ImageId image_id); private: const Vulkan::Instance& instance; Vulkan::Scheduler& scheduler; - Vulkan::StreamBuffer staging; + BufferCache& buffer_cache; + PageManager& tracker; TileManager tile_manager; Common::SlotVector slot_images; Common::SlotVector slot_image_views; tsl::robin_map samplers; - tsl::robin_pg_map> page_table; - boost::icl::interval_map cached_pages; - tsl::robin_map surface_metas; + PageTable page_table; std::mutex mutex; -#ifdef _WIN64 - void* veh_handle{}; -#endif - std::mutex m_page_table; + + struct MetaDataInfo { + enum class Type { + CMask, + FMask, + HTile, + }; + Type type; + bool is_cleared; + }; + tsl::robin_map surface_metas; }; } // namespace VideoCore diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index e097ba3ed..f08f2094c 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -5,7 +5,6 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/texture_cache/image_view.h" -#include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/tile_manager.h" #include "video_core/host_shaders/detile_m32x1_comp.h" @@ -16,6 +15,7 @@ #include #include +#include namespace VideoCore { @@ -176,17 +176,26 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { return vk::Format::eR8Uint; case vk::Format::eR8G8Unorm: case vk::Format::eR16Sfloat: + case vk::Format::eR16Unorm: return vk::Format::eR8G8Uint; case vk::Format::eR8G8B8A8Srgb: case vk::Format::eB8G8R8A8Srgb: case vk::Format::eB8G8R8A8Unorm: case vk::Format::eR8G8B8A8Unorm: + case vk::Format::eR8G8B8A8Uint: case vk::Format::eR32Sfloat: case vk::Format::eR32Uint: + case vk::Format::eR16G16Sfloat: + case vk::Format::eR16G16Unorm: return vk::Format::eR32Uint; + case vk::Format::eBc1RgbaSrgbBlock: case vk::Format::eBc1RgbaUnormBlock: case vk::Format::eBc4UnormBlock: case vk::Format::eR32G32Sfloat: + case vk::Format::eR32G32Uint: + case vk::Format::eR16G16B16A16Unorm: + case vk::Format::eR16G16B16A16Uint: + case vk::Format::eR16G16B16A16Sfloat: return vk::Format::eR32G32Uint; case vk::Format::eBc2SrgbBlock: case vk::Format::eBc2UnormBlock: @@ -195,11 +204,20 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { case vk::Format::eBc5UnormBlock: case vk::Format::eBc7SrgbBlock: case vk::Format::eBc7UnormBlock: + case vk::Format::eBc6HUfloatBlock: + case vk::Format::eR32G32B32A32Sfloat: return vk::Format::eR32G32B32A32Uint; default: break; } - LOG_ERROR(Render_Vulkan, "Unexpected format for demotion {}", vk::to_string(format)); + + // Log missing formats only once to avoid spamming the log. + static constexpr size_t MaxFormatIndex = 256; + static std::array logged_formats{}; + if (const u32 index = u32(format); !logged_formats[index]) { + LOG_ERROR(Render_Vulkan, "Unexpected format for demotion {}", vk::to_string(format)); + logged_formats[index] = true; + } return format; } @@ -225,14 +243,17 @@ const DetilerContext* TileManager::GetDetiler(const Image& image) const { return nullptr; } -static constexpr vk::BufferUsageFlags StagingFlags = vk::BufferUsageFlagBits::eTransferDst | - vk::BufferUsageFlagBits::eUniformBuffer | - vk::BufferUsageFlagBits::eStorageBuffer; +struct DetilerParams { + u32 num_levels; + u32 pitch0; + u32 sizes[14]; +}; + +static constexpr size_t StreamBufferSize = 128_MB; TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler) : instance{instance}, scheduler{scheduler}, - staging{instance, scheduler, StagingFlags, 128_MB, Vulkan::BufferType::Upload} { - + stream_buffer{instance, scheduler, MemoryUsage::Stream, StreamBufferSize} { static const std::array detiler_shaders{ HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X2_COMP, HostShaders::DETILE_M32X1_COMP, HostShaders::DETILE_M32X2_COMP, @@ -264,7 +285,7 @@ TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& sc }, { .binding = 1, - .descriptorType = vk::DescriptorType::eStorageImage, + .descriptorType = vk::DescriptorType::eStorageBuffer, .descriptorCount = 1, .stageFlags = vk::ShaderStageFlagBits::eCompute, }, @@ -281,7 +302,7 @@ TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& sc const vk::PushConstantRange push_constants = { .stageFlags = vk::ShaderStageFlagBits::eCompute, .offset = 0, - .size = sizeof(u32), + .size = sizeof(DetilerParams), }; const vk::DescriptorSetLayout set_layout = *desc_layout; @@ -312,35 +333,101 @@ TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& sc TileManager::~TileManager() = default; -bool TileManager::TryDetile(Image& image) { - if (!image.info.is_tiled) { - return false; +TileManager::ScratchBuffer TileManager::AllocBuffer(u32 size, bool is_storage /*= false*/) { + const auto usage = vk::BufferUsageFlagBits::eStorageBuffer | + (is_storage ? vk::BufferUsageFlagBits::eTransferSrc + : vk::BufferUsageFlagBits::eTransferDst); + const vk::BufferCreateInfo buffer_ci{ + .size = size, + .usage = usage, + }; + +#ifdef __APPLE__ + // Fix for detiler artifacts on macOS + const bool is_large_buffer = true; +#else + const bool is_large_buffer = size > 128_MB; +#endif + VmaAllocationCreateInfo alloc_info{ + .flags = !is_storage ? VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT | + VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT + : static_cast(0), + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + .requiredFlags = !is_storage ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + : static_cast(0), + }; + + VkBuffer buffer; + VmaAllocation allocation; + const auto buffer_ci_unsafe = static_cast(buffer_ci); + const auto result = vmaCreateBuffer(instance.GetAllocator(), &buffer_ci_unsafe, &alloc_info, + &buffer, &allocation, nullptr); + ASSERT(result == VK_SUCCESS); + return {buffer, allocation}; +} + +void TileManager::Upload(ScratchBuffer buffer, const void* data, size_t size) { + VmaAllocationInfo alloc_info{}; + vmaGetAllocationInfo(instance.GetAllocator(), buffer.second, &alloc_info); + ASSERT(size <= alloc_info.size); + void* ptr{}; + const auto result = vmaMapMemory(instance.GetAllocator(), buffer.second, &ptr); + ASSERT(result == VK_SUCCESS); + std::memcpy(ptr, data, size); + vmaUnmapMemory(instance.GetAllocator(), buffer.second); +} + +void TileManager::FreeBuffer(ScratchBuffer buffer) { + vmaDestroyBuffer(instance.GetAllocator(), buffer.first, buffer.second); +} + +std::optional TileManager::TryDetile(Image& image) { + if (!image.info.props.is_tiled) { + return std::nullopt; } const auto* detiler = GetDetiler(image); if (!detiler) { - LOG_ERROR(Render_Vulkan, "Unsupported tiled image: {} ({})", - vk::to_string(image.info.pixel_format), NameOf(image.info.tiling_mode)); - return false; + if (image.info.tiling_mode != AmdGpu::TilingMode::Texture_MacroTiled) { + LOG_ERROR(Render_Vulkan, "Unsupported tiled image: {} ({})", + vk::to_string(image.info.pixel_format), NameOf(image.info.tiling_mode)); + } + return std::nullopt; } - const auto offset = - staging.Copy(image.cpu_addr, image.info.guest_size_bytes, instance.StorageMinAlignment()); - image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite); + // Prepare input buffer + const u32 image_size = image.info.guest_size_bytes; + const auto [in_buffer, in_offset] = [&] -> std::pair { + // Use stream buffer for smaller textures. + if (image_size <= stream_buffer.GetFreeSize()) { + u32 offset = stream_buffer.Copy(image.info.guest_address, image_size); + return {stream_buffer.Handle(), offset}; + } + // Request temporary host buffer for larger sizes. + auto in_buffer = AllocBuffer(image_size); + const auto addr = reinterpret_cast(image.info.guest_address); + Upload(in_buffer, addr, image_size); + scheduler.DeferOperation([=, this]() { FreeBuffer(in_buffer); }); + return {in_buffer.first, 0}; + }(); + + // Prepare output buffer + auto out_buffer = AllocBuffer(image_size, true); + scheduler.DeferOperation([=, this]() { FreeBuffer(out_buffer); }); auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *detiler->pl); const vk::DescriptorBufferInfo input_buffer_info{ - .buffer = staging.Handle(), - .offset = offset, - .range = image.info.guest_size_bytes, + .buffer = in_buffer, + .offset = in_offset, + .range = image_size, }; - ASSERT(image.view_for_detiler.has_value()); - const vk::DescriptorImageInfo output_image_info{ - .imageView = *image.view_for_detiler->image_view, - .imageLayout = image.layout, + const vk::DescriptorBufferInfo output_buffer_info{ + .buffer = out_buffer.first, + .offset = 0, + .range = image_size, }; std::vector set_writes{ @@ -357,20 +444,44 @@ bool TileManager::TryDetile(Image& image) { .dstBinding = 1, .dstArrayElement = 0, .descriptorCount = 1, - .descriptorType = vk::DescriptorType::eStorageImage, - .pImageInfo = &output_image_info, + .descriptorType = vk::DescriptorType::eStorageBuffer, + .pBufferInfo = &output_buffer_info, }, }; cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *detiler->pl_layout, 0, set_writes); - cmdbuf.pushConstants(*detiler->pl_layout, vk::ShaderStageFlagBits::eCompute, 0u, - sizeof(image.info.pitch), &image.info.pitch); + DetilerParams params; + params.pitch0 = image.info.pitch >> (image.info.props.is_block ? 2u : 0u); + params.num_levels = image.info.resources.levels; - cmdbuf.dispatch((image.info.size.width * image.info.size.height) / 64, 1, - 1); // round to 64 + ASSERT(image.info.resources.levels <= 14); + std::memset(¶ms.sizes, 0, sizeof(params.sizes)); + for (int m = 0; m < image.info.resources.levels; ++m) { + params.sizes[m] = image.info.mips_layout[m].size * image.info.resources.layers + + (m > 0 ? params.sizes[m - 1] : 0); + } - return true; + auto pitch = image.info.pitch; + cmdbuf.pushConstants(*detiler->pl_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(params), + ¶ms); + + ASSERT((image_size % 64) == 0); + const auto bpp = image.info.num_bits * (image.info.props.is_block ? 16u : 1u); + const auto num_tiles = image_size / (64 * (bpp / 8)); + cmdbuf.dispatch(num_tiles, 1, 1); + + const vk::BufferMemoryBarrier post_barrier{ + .srcAccessMask = vk::AccessFlagBits::eShaderWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .buffer = out_buffer.first, + .size = image_size, + }; + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, + {}, post_barrier, {}); + + return {out_buffer.first}; } } // namespace VideoCore diff --git a/src/video_core/texture_cache/tile_manager.h b/src/video_core/texture_cache/tile_manager.h index 98a337860..00765b1f8 100644 --- a/src/video_core/texture_cache/tile_manager.h +++ b/src/video_core/texture_cache/tile_manager.h @@ -4,7 +4,7 @@ #pragma once #include "common/types.h" -#include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/buffer_cache/buffer.h" #include "video_core/texture_cache/image.h" namespace VideoCore { @@ -34,10 +34,16 @@ struct DetilerContext { class TileManager { public: + using ScratchBuffer = std::pair; + TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler); ~TileManager(); - bool TryDetile(Image& image); + std::optional TryDetile(Image& image); + + ScratchBuffer AllocBuffer(u32 size, bool is_storage = false); + void Upload(ScratchBuffer buffer, const void* data, size_t size); + void FreeBuffer(ScratchBuffer buffer); private: const DetilerContext* GetDetiler(const Image& image) const; @@ -45,7 +51,7 @@ private: private: const Vulkan::Instance& instance; Vulkan::Scheduler& scheduler; - Vulkan::StreamBuffer staging; + StreamBuffer stream_buffer; std::array detilers; };