diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 878c10868..3b5690438 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -89,7 +89,7 @@ jobs: arch: amd64 - name: Configure CMake - run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $env:NUMBER_OF_PROCESSORS @@ -143,7 +143,7 @@ jobs: arch: amd64 - name: Configure CMake - run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $env:NUMBER_OF_PROCESSORS @@ -174,11 +174,6 @@ jobs: with: xcode-version: latest - - name: Install MoltenVK - run: | - arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" - arch -x86_64 /usr/local/bin/brew install molten-vk - - name: Cache CMake Configuration uses: actions/cache@v4 env: @@ -201,7 +196,7 @@ jobs: variant: sccache - name: Configure CMake - run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache + run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu) @@ -210,7 +205,7 @@ jobs: run: | mkdir upload mv ${{github.workspace}}/build/shadps4 upload - cp $(arch -x86_64 /usr/local/bin/brew --prefix)/opt/molten-vk/lib/libMoltenVK.dylib upload + cp ${{github.workspace}}/build/externals/MoltenVK/libMoltenVK.dylib upload tar cf shadps4-macos-sdl.tar.gz -C upload . - uses: actions/upload-artifact@v4 with: @@ -230,11 +225,8 @@ jobs: with: xcode-version: latest - - name: Install MoltenVK and Setup Qt - run: | - arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" - arch -x86_64 /usr/local/bin/brew install molten-vk - - uses: jurplel/install-qt-action@v4 + - name: Setup Qt + uses: jurplel/install-qt-action@v4 with: version: 6.7.3 host: mac @@ -265,7 +257,7 @@ jobs: variant: sccache - name: Configure CMake - run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache + run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu) @@ -312,7 +304,7 @@ jobs: key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} - name: Configure CMake - run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc) @@ -368,7 +360,7 @@ jobs: key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} - name: Configure CMake - run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc) diff --git a/.gitmodules b/.gitmodules index 8010250a9..3d0d21c5b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -106,4 +106,16 @@ [submodule "externals/libpng"] path = externals/libpng url = https://github.com/pnggroup/libpng - shallow = true \ No newline at end of file + shallow = true +[submodule "externals/MoltenVK/SPIRV-Cross"] + path = externals/MoltenVK/SPIRV-Cross + url = https://github.com/KhronosGroup/SPIRV-Cross + shallow = true +[submodule "externals/MoltenVK/MoltenVK"] + path = externals/MoltenVK/MoltenVK + url = https://github.com/KhronosGroup/MoltenVK + shallow = true +[submodule "externals/MoltenVK/cereal"] + path = externals/MoltenVK/cereal + url = https://github.com/USCiLab/cereal + shallow = true diff --git a/CMakeLists.txt b/CMakeLists.txt index aa84139ef..172733840 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -664,12 +664,14 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/passes/constant_propagation_pass.cpp src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp + src/shader_recompiler/ir/passes/hull_shader_transform.cpp src/shader_recompiler/ir/passes/identity_removal_pass.cpp src/shader_recompiler/ir/passes/ir_passes.h src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp src/shader_recompiler/ir/passes/resource_tracking_pass.cpp src/shader_recompiler/ir/passes/ring_access_elimination.cpp src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp + src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp src/shader_recompiler/ir/abstract_syntax_list.h src/shader_recompiler/ir/attribute.cpp @@ -683,6 +685,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/opcodes.cpp src/shader_recompiler/ir/opcodes.h src/shader_recompiler/ir/opcodes.inc + src/shader_recompiler/ir/patch.cpp + src/shader_recompiler/ir/patch.h src/shader_recompiler/ir/post_order.cpp src/shader_recompiler/ir/post_order.h src/shader_recompiler/ir/program.cpp @@ -877,7 +881,7 @@ endif() create_target_directory_groups(shadps4) target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG) -target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::SPIRV glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers) +target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers) target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h") target_compile_definitions(Dear_ImGui PRIVATE IMGUI_USER_CONFIG="${PROJECT_SOURCE_DIR}/src/imgui/imgui_config.h") @@ -891,13 +895,17 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") endif() if (APPLE) - option(USE_SYSTEM_VULKAN_LOADER "Enables using the system Vulkan loader instead of directly linking with MoltenVK. Useful for loading validation layers." OFF) - if (USE_SYSTEM_VULKAN_LOADER) - target_compile_definitions(shadps4 PRIVATE USE_SYSTEM_VULKAN_LOADER=1) + if (ENABLE_QT_GUI) + # Include MoltenVK in the app bundle, along with an ICD file so it can be found by the system Vulkan loader if used for loading layers. + target_sources(shadps4 PRIVATE externals/MoltenVK/MoltenVK_icd.json) + set_source_files_properties(externals/MoltenVK/MoltenVK_icd.json + PROPERTIES MACOSX_PACKAGE_LOCATION Resources/vulkan/icd.d) + add_custom_command(TARGET shadps4 POST_BUILD + COMMAND cmake -E copy $ $/Contents/Frameworks/libMoltenVK.dylib) + set_property(TARGET shadps4 APPEND PROPERTY BUILD_RPATH "@executable_path/../Frameworks") else() - # Link MoltenVK for Vulkan support - find_library(MOLTENVK MoltenVK REQUIRED) - target_link_libraries(shadps4 PRIVATE ${MOLTENVK}) + # For non-bundled SDL build, just do a normal library link. + target_link_libraries(shadps4 PRIVATE MoltenVK) endif() if (ARCHITECTURE STREQUAL "x86_64") @@ -1018,4 +1026,4 @@ if (ENABLE_QT_GUI AND CMAKE_SYSTEM_NAME STREQUAL "Linux") install(FILES "dist/net.shadps4.shadPS4.metainfo.xml" DESTINATION "share/metainfo") install(FILES ".github/shadps4.png" DESTINATION "share/icons/hicolor/512x512/apps" RENAME "net.shadps4.shadPS4.png") install(FILES "src/images/net.shadps4.shadPS4.svg" DESTINATION "share/icons/hicolor/scalable/apps") -endif() \ No newline at end of file +endif() diff --git a/README.md b/README.md index 18e69546c..7ef5bdf65 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,13 @@ For more information on how to test, debug and report issues with the emulator o # Keyboard mapping +| Button | Function | +|-------------|-------------| +F10 | FPS Counter +Ctrl+F10 | Video Debug Info +F11 | Fullscreen +F12 | Trigger RenderDoc Capture + > [!NOTE] > Xbox and DualShock controllers work out of the box. diff --git a/REUSE.toml b/REUSE.toml index 747679c8b..cba63adf1 100644 --- a/REUSE.toml +++ b/REUSE.toml @@ -15,6 +15,7 @@ path = [ "documents/changelog.md", "documents/Quickstart/2.png", "documents/Screenshots/*", + "externals/MoltenVK/MoltenVK_icd.json", "scripts/ps4_names.txt", "src/images/about_icon.png", "src/images/controller_icon.png", diff --git a/documents/building-macos.md b/documents/building-macos.md index d8cc414e2..9a1a021ee 100644 --- a/documents/building-macos.md +++ b/documents/building-macos.md @@ -24,23 +24,21 @@ eval $(/opt/homebrew/bin/brew shellenv) brew install clang-format cmake ``` -Next, install x86_64 Homebrew and libraries. +Next, install x86_64 Qt. You can skip these steps and move on to **Cloning and compiling** if you do not intend to build the Qt GUI. **If you are on an ARM Mac:** ``` # Installs x86_64 Homebrew to /usr/local arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" # Installs libraries. -arch -x86_64 /usr/local/bin/brew install molten-vk qt@6 +arch -x86_64 /usr/local/bin/brew install qt@6 ``` **If you are on an x86_64 Mac:** ``` -brew install molten-vk qt@6 +brew install qt@6 ``` -If you don't need the Qt GUI you can remove `qt@6` from the last command. - ### Cloning and compiling: Clone the repository recursively: diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 082be211a..dbe6794d8 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -8,6 +8,9 @@ set_directory_properties(PROPERTIES SYSTEM ON ) +# Set CMP0069 policy to "NEW" in order to ensure consistent behavior when building external targets with LTO enabled +set(CMAKE_POLICY_DEFAULT_CMP0069 NEW) + if (MSVC) # Silence "deprecation" warnings add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE -D_SCL_SECURE_NO_WARNINGS) @@ -107,7 +110,7 @@ if (NOT TARGET glslang::glslang) set(ENABLE_OPT OFF CACHE BOOL "") add_subdirectory(glslang) file(COPY glslang/SPIRV DESTINATION glslang/glslang FILES_MATCHING PATTERN "*.h") - target_include_directories(SPIRV INTERFACE "${CMAKE_CURRENT_BINARY_DIR}/glslang") + target_include_directories(glslang INTERFACE "${CMAKE_CURRENT_BINARY_DIR}/glslang") endif() # Robin-map @@ -174,15 +177,6 @@ if (NOT TARGET PNG::PNG) add_library(PNG::PNG ALIAS png_static) endif() -if (APPLE) - # date - if (NOT TARGET date::date-tz) - option(BUILD_TZ_LIB "" ON) - option(USE_SYSTEM_TZ_DB "" ON) - add_subdirectory(date) - endif() -endif() - # Dear ImGui add_library(Dear_ImGui dear_imgui/imgui.cpp @@ -199,7 +193,7 @@ option(TRACY_ENABLE "" ON) option(TRACY_NO_CRASH_HANDLER "" ON) # Otherwise texture cache exceptions will be treaten as a crash option(TRACY_ON_DEMAND "" ON) option(TRACY_NO_FRAME_IMAGE "" ON) -option(TRACY_FIBERS "" ON) # For AmdGpu frontend profiling +option(TRACY_FIBERS "" OFF) # For AmdGpu frontend profiling, disabled due to instability option(TRACY_NO_SYSTEM_TRACING "" ON) option(TRACY_NO_CALLSTACK "" ON) option(TRACY_NO_CODE_TRANSFER "" ON) @@ -229,3 +223,18 @@ if (NOT TARGET stb::headers) target_include_directories(stb INTERFACE stb) add_library(stb::headers ALIAS stb) endif() + +# Apple-only dependencies +if (APPLE) + # date + if (NOT TARGET date::date-tz) + option(BUILD_TZ_LIB "" ON) + option(USE_SYSTEM_TZ_DB "" ON) + add_subdirectory(date) + endif() + + # MoltenVK + if (NOT TARGET MoltenVK) + add_subdirectory(MoltenVK) + endif() +endif() diff --git a/externals/MoltenVK/CMakeLists.txt b/externals/MoltenVK/CMakeLists.txt new file mode 100644 index 000000000..908c2847c --- /dev/null +++ b/externals/MoltenVK/CMakeLists.txt @@ -0,0 +1,93 @@ +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later + +# Prepare MoltenVK Git revision +find_package(Git) +if(GIT_FOUND) + execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD + OUTPUT_VARIABLE MVK_GIT_REV + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +endif() +set(MVK_GENERATED_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}/Generated) +file(WRITE ${MVK_GENERATED_INCLUDES}/mvkGitRevDerived.h "static const char* mvkRevString = \"${MVK_GIT_REV}\";") +message(STATUS "MoltenVK revision: ${MVK_GIT_REV}") + +# Prepare MoltenVK version +file(READ ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/MoltenVK/MoltenVK/API/mvk_private_api.h MVK_PRIVATE_API) +string(REGEX MATCH "#define MVK_VERSION_MAJOR [0-9]+" MVK_VERSION_MAJOR_LINE "${MVK_PRIVATE_API}") +string(REGEX MATCH "[0-9]+" MVK_VERSION_MAJOR "${MVK_VERSION_MAJOR_LINE}") +string(REGEX MATCH "#define MVK_VERSION_MINOR [0-9]+" MVK_VERSION_MINOR_LINE "${MVK_PRIVATE_API}") +string(REGEX MATCH "[0-9]+" MVK_VERSION_MINOR "${MVK_VERSION_MINOR_LINE}") +string(REGEX MATCH "#define MVK_VERSION_PATCH [0-9]+" MVK_VERSION_PATCH_LINE "${MVK_PRIVATE_API}") +string(REGEX MATCH "[0-9]+" MVK_VERSION_PATCH "${MVK_VERSION_PATCH_LINE}") +set(MVK_VERSION "${MVK_VERSION_MAJOR}.${MVK_VERSION_MINOR}.${MVK_VERSION_PATCH}") +message(STATUS "MoltenVK version: ${MVK_VERSION}") + +# Find required system libraries +find_library(APPKIT_LIBRARY AppKit REQUIRED) +find_library(FOUNDATION_LIBRARY Foundation REQUIRED) +find_library(IOKIT_LIBRARY IOKit REQUIRED) +find_library(IOSURFACE_LIBRARY IOSurface REQUIRED) +find_library(METAL_LIBRARY Metal REQUIRED) +find_library(QUARTZCORE_LIBRARY QuartzCore REQUIRED) + +# cereal +option(SKIP_PORTABILITY_TEST "" ON) +option(BUILD_DOC "" OFF) +option(BUILD_SANDBOX "" OFF) +option(SKIP_PERFORMANCE_COMPARISON "" ON) +option(SPIRV_CROSS_SKIP_INSTALL "" ON) +add_subdirectory(cereal) + +# SPIRV-Cross +option(SPIRV_CROSS_CLI "" OFF) +option(SPIRV_CROSS_ENABLE_TESTS "" OFF) +option(SPIRV_CROSS_ENABLE_HLSL "" OFF) +option(SPIRV_CROSS_ENABLE_CPP "" OFF) +option(SPIRV_CROSS_SKIP_INSTALL "" ON) +add_subdirectory(SPIRV-Cross) + +# Common +set(MVK_COMMON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/Common) +file(GLOB_RECURSE MVK_COMMON_SOURCES CONFIGURE_DEPENDS + ${MVK_COMMON_DIR}/*.cpp + ${MVK_COMMON_DIR}/*.m + ${MVK_COMMON_DIR}/*.mm) +set(MVK_COMMON_INCLUDES ${MVK_COMMON_DIR}) + +add_library(MoltenVKCommon STATIC ${MVK_COMMON_SOURCES}) +target_include_directories(MoltenVKCommon PUBLIC ${MVK_COMMON_INCLUDES}) +target_compile_options(MoltenVKCommon PRIVATE -w) + +# MoltenVKShaderConverter +set(MVK_SHADER_CONVERTER_DIR ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/MoltenVKShaderConverter) +file(GLOB_RECURSE MVK_SHADER_CONVERTER_SOURCES CONFIGURE_DEPENDS + ${MVK_SHADER_CONVERTER_DIR}/MoltenVKShaderConverter/*.cpp + ${MVK_SHADER_CONVERTER_DIR}/MoltenVKShaderConverter/*.m + ${MVK_SHADER_CONVERTER_DIR}/MoltenVKShaderConverter/*.mm) +set(MVK_SHADER_CONVERTER_INCLUDES ${MVK_SHADER_CONVERTER_DIR} ${MVK_SHADER_CONVERTER_DIR}/include) + +add_library(MoltenVKShaderConverter STATIC ${MVK_SHADER_CONVERTER_SOURCES}) +target_include_directories(MoltenVKShaderConverter PUBLIC ${MVK_SHADER_CONVERTER_INCLUDES}) +target_compile_options(MoltenVKShaderConverter PRIVATE -w) +target_link_libraries(MoltenVKShaderConverter PRIVATE spirv-cross-msl spirv-cross-reflect MoltenVKCommon) +target_compile_definitions(MoltenVKShaderConverter PRIVATE MVK_EXCLUDE_SPIRV_TOOLS=1) + +# MoltenVK +set(MVK_DIR ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/MoltenVK) +file(GLOB_RECURSE MVK_SOURCES CONFIGURE_DEPENDS + ${MVK_DIR}/MoltenVK/*.cpp + ${MVK_DIR}/MoltenVK/*.m + ${MVK_DIR}/MoltenVK/*.mm) +file(GLOB MVK_SRC_INCLUDES LIST_DIRECTORIES ON ${MVK_DIR}/MoltenVK/*) +set(MVK_INCLUDES ${MVK_SRC_INCLUDES} ${MVK_GENERATED_INCLUDES} ${MVK_DIR}/include) + +add_library(MoltenVK SHARED ${MVK_SOURCES}) +target_include_directories(MoltenVK PRIVATE ${MVK_INCLUDES}) +target_compile_options(MoltenVK PRIVATE -w) +target_link_libraries(MoltenVK PRIVATE + ${APPKIT_LIBRARY} ${FOUNDATION_LIBRARY} ${IOKIT_LIBRARY} ${IOSURFACE_LIBRARY} ${METAL_LIBRARY} ${QUARTZCORE_LIBRARY} + Vulkan::Headers cereal::cereal spirv-cross-msl MoltenVKCommon MoltenVKShaderConverter) +target_compile_definitions(MoltenVK PRIVATE MVK_FRAMEWORK_VERSION=${MVK_VERSION} MVK_USE_METAL_PRIVATE_API=1) diff --git a/externals/MoltenVK/MoltenVK b/externals/MoltenVK/MoltenVK new file mode 160000 index 000000000..5ad3ee5d2 --- /dev/null +++ b/externals/MoltenVK/MoltenVK @@ -0,0 +1 @@ +Subproject commit 5ad3ee5d2f84342950c3fe93dec97719574d1932 diff --git a/externals/MoltenVK/MoltenVK_icd.json b/externals/MoltenVK/MoltenVK_icd.json new file mode 100644 index 000000000..2c3319263 --- /dev/null +++ b/externals/MoltenVK/MoltenVK_icd.json @@ -0,0 +1,8 @@ +{ + "file_format_version": "1.0.0", + "ICD": { + "library_path": "../../../Frameworks/libMoltenVK.dylib", + "api_version": "1.2.0", + "is_portability_driver": true + } +} diff --git a/externals/MoltenVK/SPIRV-Cross b/externals/MoltenVK/SPIRV-Cross new file mode 160000 index 000000000..6173e24b3 --- /dev/null +++ b/externals/MoltenVK/SPIRV-Cross @@ -0,0 +1 @@ +Subproject commit 6173e24b31f09a0c3217103a130e74c4ddec14a6 diff --git a/externals/MoltenVK/cereal b/externals/MoltenVK/cereal new file mode 160000 index 000000000..d1fcec807 --- /dev/null +++ b/externals/MoltenVK/cereal @@ -0,0 +1 @@ +Subproject commit d1fcec807b372f04e4c1041b3058e11c12853e6e diff --git a/externals/sirit b/externals/sirit index 6cecb95d6..1e74f4ef8 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit 6cecb95d679c82c413d1f989e0b7ad9af130600d +Subproject commit 1e74f4ef8d2a0e3221a4de51977663f342b53c35 diff --git a/src/common/config.cpp b/src/common/config.cpp index 3db98a438..403b0e32f 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -422,6 +422,10 @@ void setEmulatorLanguage(std::string language) { emulator_language = language; } +void setGameInstallDirs(const std::vector& settings_install_dirs_config) { + settings_install_dirs = settings_install_dirs_config; +} + u32 getMainWindowGeometryX() { return main_window_geometry_x; } @@ -673,14 +677,6 @@ void save(const std::filesystem::path& path) { data["Vulkan"]["crashDiagnostic"] = vkCrashDiagnostic; data["Debug"]["DebugDump"] = isDebugDump; data["Debug"]["CollectShader"] = isShaderDebug; - data["GUI"]["theme"] = mw_themes; - data["GUI"]["iconSize"] = m_icon_size; - data["GUI"]["sliderPos"] = m_slider_pos; - data["GUI"]["iconSizeGrid"] = m_icon_size_grid; - data["GUI"]["sliderPosGrid"] = m_slider_pos_grid; - data["GUI"]["gameTableMode"] = m_table_mode; - data["GUI"]["mw_width"] = m_window_size_W; - data["GUI"]["mw_height"] = m_window_size_H; std::vector install_dirs; for (const auto& dirString : settings_install_dirs) { @@ -690,6 +686,44 @@ void save(const std::filesystem::path& path) { data["GUI"]["addonInstallDir"] = std::string{fmt::UTF(settings_addon_install_dir.u8string()).data}; + data["GUI"]["emulatorLanguage"] = emulator_language; + data["Settings"]["consoleLanguage"] = m_language; + + std::ofstream file(path, std::ios::binary); + file << data; + file.close(); + saveMainWindow(path); +} + +void saveMainWindow(const std::filesystem::path& path) { + toml::value data; + + std::error_code error; + if (std::filesystem::exists(path, error)) { + try { + std::ifstream ifs; + ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit); + ifs.open(path, std::ios_base::binary); + data = toml::parse(ifs, std::string{fmt::UTF(path.filename().u8string()).data}); + } catch (const std::exception& ex) { + fmt::print("Exception trying to parse config file. Exception: {}\n", ex.what()); + return; + } + } else { + if (error) { + fmt::print("Filesystem error: {}\n", error.message()); + } + fmt::print("Saving new configuration file {}\n", fmt::UTF(path.u8string())); + } + + data["GUI"]["mw_width"] = m_window_size_W; + data["GUI"]["mw_height"] = m_window_size_H; + data["GUI"]["theme"] = mw_themes; + data["GUI"]["iconSize"] = m_icon_size; + data["GUI"]["sliderPos"] = m_slider_pos; + data["GUI"]["iconSizeGrid"] = m_icon_size_grid; + data["GUI"]["sliderPosGrid"] = m_slider_pos_grid; + data["GUI"]["gameTableMode"] = m_table_mode; data["GUI"]["geometry_x"] = main_window_geometry_x; data["GUI"]["geometry_y"] = main_window_geometry_y; data["GUI"]["geometry_w"] = main_window_geometry_w; @@ -697,9 +731,6 @@ void save(const std::filesystem::path& path) { data["GUI"]["pkgDirs"] = m_pkg_viewer; data["GUI"]["elfDirs"] = m_elf_viewer; data["GUI"]["recentFiles"] = m_recent_files; - data["GUI"]["emulatorLanguage"] = emulator_language; - - data["Settings"]["consoleLanguage"] = m_language; std::ofstream file(path, std::ios::binary); file << data; diff --git a/src/common/config.h b/src/common/config.h index d98c94480..ff3b3703f 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -13,6 +13,7 @@ enum HideCursorState : s16 { Never, Idle, Always }; void load(const std::filesystem::path& path); void save(const std::filesystem::path& path); +void saveMainWindow(const std::filesystem::path& path); bool isNeoMode(); bool isFullscreenMode(); @@ -67,6 +68,7 @@ void setNeoMode(bool enable); void setUserName(const std::string& type); void setUpdateChannel(const std::string& type); void setSeparateUpdateEnabled(bool use); +void setGameInstallDirs(const std::vector& settings_install_dirs_config); void setCursorState(s16 cursorState); void setCursorHideTimeout(int newcursorHideTimeout); @@ -128,4 +130,4 @@ void setDefaultValues(); // settings u32 GetLanguage(); -}; // namespace Config +}; // namespace Config \ No newline at end of file diff --git a/src/common/debug.h b/src/common/debug.h index 091c6191d..882e9e5c4 100644 --- a/src/common/debug.h +++ b/src/common/debug.h @@ -17,6 +17,8 @@ static inline bool IsProfilerConnected() { return tracy::GetProfiler().IsConnected(); } +#define TRACY_GPU_ENABLED 0 + #define CUSTOM_LOCK(type, varname) \ tracy::LockableCtx varname { \ []() -> const tracy::SourceLocationData* { \ @@ -57,3 +59,11 @@ enum MarkersPalette : int { tracy::SourceLocationData{nullptr, name, TracyFile, (uint32_t)TracyLine, 0}; #define FRAME_END FrameMark + +#ifdef TRACY_FIBERS +#define FIBER_ENTER(name) TracyFiberEnter(name) +#define FIBER_EXIT TracyFiberLeave +#else +#define FIBER_ENTER(name) +#define FIBER_EXIT +#endif diff --git a/src/core/debug_state.cpp b/src/core/debug_state.cpp index 649624924..daf614bd9 100644 --- a/src/core/debug_state.cpp +++ b/src/core/debug_state.cpp @@ -142,45 +142,66 @@ void DebugStateImpl::PushQueueDump(QueueDump dump) { frame.queues.push_back(std::move(dump)); } -void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, - const AmdGpu::Liverpool::Regs& regs, bool is_compute) { - std::scoped_lock lock{frame_dump_list_mutex}; +std::optional DebugStateImpl::GetRegDump(uintptr_t base_addr, uintptr_t header_addr) { const auto it = waiting_reg_dumps.find(header_addr); if (it == waiting_reg_dumps.end()) { - return; + return std::nullopt; } auto& frame = *it->second; waiting_reg_dumps.erase(it); waiting_reg_dumps_dbg.erase(waiting_reg_dumps_dbg.find(header_addr)); - auto& dump = frame.regs[header_addr - base_addr]; - dump.regs = regs; - if (is_compute) { - dump.is_compute = true; - const auto& cs = dump.regs.cs_program; - dump.cs_data = PipelineComputerProgramDump{ - .cs_program = cs, - .code = std::vector{cs.Code().begin(), cs.Code().end()}, - }; - } else { - for (int i = 0; i < RegDump::MaxShaderStages; i++) { - if (regs.stage_enable.IsStageEnabled(i)) { - auto stage = regs.ProgramForStage(i); - if (stage->address_lo != 0) { - auto code = stage->Code(); - dump.stages[i] = PipelineShaderProgramDump{ - .user_data = *stage, - .code = std::vector{code.begin(), code.end()}, - }; - } + return &frame.regs[header_addr - base_addr]; +} + +void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, + const AmdGpu::Liverpool::Regs& regs) { + std::scoped_lock lock{frame_dump_list_mutex}; + + auto dump = GetRegDump(base_addr, header_addr); + if (!dump) { + return; + } + + (*dump)->regs = regs; + + for (int i = 0; i < RegDump::MaxShaderStages; i++) { + if ((*dump)->regs.stage_enable.IsStageEnabled(i)) { + auto stage = (*dump)->regs.ProgramForStage(i); + if (stage->address_lo != 0) { + auto code = stage->Code(); + (*dump)->stages[i] = PipelineShaderProgramDump{ + .user_data = *stage, + .code = std::vector{code.begin(), code.end()}, + }; } } } } -void DebugStateImpl::CollectShader(const std::string& name, vk::ShaderModule module, - std::span spv, std::span raw_code, - std::span patch_spv, bool is_patched) { - shader_dump_list.emplace_back(name, module, std::vector{spv.begin(), spv.end()}, +void DebugStateImpl::PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr, + const CsState& cs_state) { + std::scoped_lock lock{frame_dump_list_mutex}; + + auto dump = GetRegDump(base_addr, header_addr); + if (!dump) { + return; + } + + (*dump)->is_compute = true; + auto& cs = (*dump)->regs.cs_program; + cs = cs_state; + + (*dump)->cs_data = PipelineComputerProgramDump{ + .cs_program = cs, + .code = std::vector{cs.Code().begin(), cs.Code().end()}, + }; +} + +void DebugStateImpl::CollectShader(const std::string& name, Shader::LogicalStage l_stage, + vk::ShaderModule module, std::span spv, + std::span raw_code, std::span patch_spv, + bool is_patched) { + shader_dump_list.emplace_back(name, l_stage, module, std::vector{spv.begin(), spv.end()}, std::vector{raw_code.begin(), raw_code.end()}, std::vector{patch_spv.begin(), patch_spv.end()}, is_patched); } diff --git a/src/core/debug_state.h b/src/core/debug_state.h index fa2e5cd9d..a0e428b6b 100644 --- a/src/core/debug_state.h +++ b/src/core/debug_state.h @@ -11,7 +11,6 @@ #include #include "common/types.h" -#include "video_core/amdgpu/liverpool.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #ifdef _WIN32 @@ -76,6 +75,7 @@ struct FrameDump { struct ShaderDump { std::string name; + Shader::LogicalStage l_stage; vk::ShaderModule module; std::vector spv; @@ -90,16 +90,17 @@ struct ShaderDump { std::string cache_isa_disasm{}; std::string cache_patch_disasm{}; - ShaderDump(std::string name, vk::ShaderModule module, std::vector spv, - std::vector isa, std::vector patch_spv, bool is_patched) - : name(std::move(name)), module(module), spv(std::move(spv)), isa(std::move(isa)), - patch_spv(std::move(patch_spv)), is_patched(is_patched) {} + ShaderDump(std::string name, Shader::LogicalStage l_stage, vk::ShaderModule module, + std::vector spv, std::vector isa, std::vector patch_spv, + bool is_patched) + : name(std::move(name)), l_stage(l_stage), module(module), spv(std::move(spv)), + isa(std::move(isa)), patch_spv(std::move(patch_spv)), is_patched(is_patched) {} ShaderDump(const ShaderDump& other) = delete; ShaderDump(ShaderDump&& other) noexcept - : name{std::move(other.name)}, module{std::move(other.module)}, spv{std::move(other.spv)}, - isa{std::move(other.isa)}, patch_spv{std::move(other.patch_spv)}, - patch_source{std::move(other.patch_source)}, + : name{std::move(other.name)}, l_stage(other.l_stage), module{std::move(other.module)}, + spv{std::move(other.spv)}, isa{std::move(other.isa)}, + patch_spv{std::move(other.patch_spv)}, patch_source{std::move(other.patch_source)}, cache_spv_disasm{std::move(other.cache_spv_disasm)}, cache_isa_disasm{std::move(other.cache_isa_disasm)}, cache_patch_disasm{std::move(other.cache_patch_disasm)} {} @@ -108,6 +109,7 @@ struct ShaderDump { if (this == &other) return *this; name = std::move(other.name); + l_stage = other.l_stage; module = std::move(other.module); spv = std::move(other.spv); isa = std::move(other.isa); @@ -201,11 +203,17 @@ public: void PushQueueDump(QueueDump dump); void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, - const AmdGpu::Liverpool::Regs& regs, bool is_compute = false); + const AmdGpu::Liverpool::Regs& regs); + using CsState = AmdGpu::Liverpool::ComputeProgram; + void PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr, const CsState& cs_state); - void CollectShader(const std::string& name, vk::ShaderModule module, std::span spv, + void CollectShader(const std::string& name, Shader::LogicalStage l_stage, + vk::ShaderModule module, std::span spv, std::span raw_code, std::span patch_spv, bool is_patched); + +private: + std::optional GetRegDump(uintptr_t base_addr, uintptr_t header_addr); }; } // namespace DebugStateType diff --git a/src/core/devtools/widget/shader_list.cpp b/src/core/devtools/widget/shader_list.cpp index 80c939718..2c97db7fd 100644 --- a/src/core/devtools/widget/shader_list.cpp +++ b/src/core/devtools/widget/shader_list.cpp @@ -158,16 +158,17 @@ bool ShaderList::Selection::DrawShader(DebugStateType::ShaderDump& value) { DebugState.ShowDebugMessage(msg); } if (compile) { - static std::map stage_arg = { - {"vs", "vert"}, - {"gs", "geom"}, - {"fs", "frag"}, - {"cs", "comp"}, + static std::map stage_arg = { + {Shader::LogicalStage::Vertex, "vert"}, + {Shader::LogicalStage::TessellationControl, "tesc"}, + {Shader::LogicalStage::TessellationEval, "tese"}, + {Shader::LogicalStage::Geometry, "geom"}, + {Shader::LogicalStage::Fragment, "frag"}, + {Shader::LogicalStage::Compute, "comp"}, }; - auto stage = stage_arg.find(value.name.substr(0, 2)); + auto stage = stage_arg.find(value.l_stage); if (stage == stage_arg.end()) { - DebugState.ShowDebugMessage(std::string{"Invalid shader stage: "} + - value.name.substr(0, 2)); + DebugState.ShowDebugMessage(std::string{"Invalid shader stage"}); } else { std::string cmd = fmt::format("glslc --target-env=vulkan1.3 --target-spv=spv1.6 " diff --git a/src/core/file_sys/fs.cpp b/src/core/file_sys/fs.cpp index 0fdbb2783..45ba67b93 100644 --- a/src/core/file_sys/fs.cpp +++ b/src/core/file_sys/fs.cpp @@ -10,16 +10,28 @@ namespace Core::FileSys { +std::string RemoveTrailingSlashes(const std::string& path) { + // Remove trailing slashes to make comparisons simpler. + std::string path_sanitized = path; + while (path_sanitized.ends_with("/")) { + path_sanitized.pop_back(); + } + return path_sanitized; +} + void MntPoints::Mount(const std::filesystem::path& host_folder, const std::string& guest_folder, bool read_only) { std::scoped_lock lock{m_mutex}; - m_mnt_pairs.emplace_back(host_folder, guest_folder, read_only); + const auto guest_folder_sanitized = RemoveTrailingSlashes(guest_folder); + m_mnt_pairs.emplace_back(host_folder, guest_folder_sanitized, read_only); } void MntPoints::Unmount(const std::filesystem::path& host_folder, const std::string& guest_folder) { std::scoped_lock lock{m_mutex}; - auto it = std::remove_if(m_mnt_pairs.begin(), m_mnt_pairs.end(), - [&](const MntPair& pair) { return pair.mount == guest_folder; }); + const auto guest_folder_sanitized = RemoveTrailingSlashes(guest_folder); + auto it = std::remove_if(m_mnt_pairs.begin(), m_mnt_pairs.end(), [&](const MntPair& pair) { + return pair.mount == guest_folder_sanitized; + }); m_mnt_pairs.erase(it, m_mnt_pairs.end()); } @@ -47,7 +59,8 @@ std::filesystem::path MntPoints::GetHostPath(std::string_view path, bool* is_rea } // Nothing to do if getting the mount itself. - if (corrected_path == mount->mount) { + const auto corrected_path_sanitized = RemoveTrailingSlashes(corrected_path); + if (corrected_path_sanitized == mount->mount) { return mount->host_path; } @@ -186,4 +199,14 @@ void HandleTable::CreateStdHandles() { setup("/dev/stderr", new Devices::Logger("stderr", true)); // stderr } +int HandleTable::GetFileDescriptor(File* file) { + std::scoped_lock lock{m_mutex}; + auto it = std::find(m_files.begin(), m_files.end(), file); + + if (it != m_files.end()) { + return std::distance(m_files.begin(), it); + } + return 0; +} + } // namespace Core::FileSys diff --git a/src/core/file_sys/fs.h b/src/core/file_sys/fs.h index b0153c162..56df32ad0 100644 --- a/src/core/file_sys/fs.h +++ b/src/core/file_sys/fs.h @@ -9,6 +9,7 @@ #include #include #include "common/io_file.h" +#include "common/logging/formatter.h" #include "core/devices/base_device.h" namespace Core::FileSys { @@ -22,7 +23,7 @@ class MntPoints { public: struct MntPair { std::filesystem::path host_path; - std::string mount; // e.g /app0/ + std::string mount; // e.g /app0 bool read_only; }; @@ -37,10 +38,21 @@ public: std::filesystem::path GetHostPath(std::string_view guest_directory, bool* is_read_only = nullptr); + const MntPair* GetMountFromHostPath(const std::string& host_path) { + std::scoped_lock lock{m_mutex}; + const auto it = std::ranges::find_if(m_mnt_pairs, [&](const MntPair& mount) { + return host_path.starts_with(std::string{fmt::UTF(mount.host_path.u8string()).data}); + }); + return it == m_mnt_pairs.end() ? nullptr : &*it; + } + const MntPair* GetMount(const std::string& guest_path) { std::scoped_lock lock{m_mutex}; - const auto it = std::ranges::find_if( - m_mnt_pairs, [&](const auto& mount) { return guest_path.starts_with(mount.mount); }); + const auto it = std::ranges::find_if(m_mnt_pairs, [&](const auto& mount) { + // When doing starts-with check, add a trailing slash to make sure we don't match + // against only part of the mount path. + return guest_path == mount.mount || guest_path.starts_with(mount.mount + "/"); + }); return it == m_mnt_pairs.end() ? nullptr : &*it; } @@ -83,6 +95,7 @@ public: void DeleteHandle(int d); File* GetFile(int d); File* GetFile(const std::filesystem::path& host_name); + int GetFileDescriptor(File* file); void CreateStdHandles(); diff --git a/src/core/libraries/audio3d/audio3d.cpp b/src/core/libraries/audio3d/audio3d.cpp index 44670d87b..d896524c6 100644 --- a/src/core/libraries/audio3d/audio3d.cpp +++ b/src/core/libraries/audio3d/audio3d.cpp @@ -80,7 +80,7 @@ int PS4_SYSV_ABI sceAudio3dPortGetAttributesSupported(OrbisAudio3dPortId uiPortI int PS4_SYSV_ABI sceAudio3dPortGetQueueLevel(OrbisAudio3dPortId uiPortId, u32* pQueueLevel, u32* pQueueAvailable) { - LOG_INFO(Lib_Audio3d, "uiPortId = {}", uiPortId); + LOG_TRACE(Lib_Audio3d, "uiPortId = {}", uiPortId); return ORBIS_OK; } diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 18035e6ce..1a6007bf8 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -296,17 +296,12 @@ static_assert(CtxInitSequence400.size() == 0x61); // In case if `submitDone` is issued we need to block submissions until GPU idle static u32 submission_lock{}; std::condition_variable cv_lock{}; -static std::mutex m_submission{}; +std::mutex m_submission{}; static u64 frames_submitted{}; // frame counter static bool send_init_packet{true}; // initialize HW state before first game's submit in a frame static int sdk_version{0}; -struct AscQueueInfo { - VAddr map_addr; - u32* read_addr; - u32 ring_size_dw; -}; -static Common::SlotVector asc_queues{}; +static u32 asc_next_offs_dw[Liverpool::NumComputeRings]; static constexpr VAddr tessellation_factors_ring_addr = Core::SYSTEM_RESERVED_MAX - 0xFFFFFFF; static constexpr u32 tessellation_offchip_buffer_size = 0x800000u; @@ -493,6 +488,7 @@ int PS4_SYSV_ABI sceGnmDestroyWorkloadStream() { } void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) { + HLE_TRACE; LOG_DEBUG(Lib_GnmDriver, "vqid {}, offset_dw {}", gnm_vqid, next_offs_dw); if (gnm_vqid == 0) { @@ -506,11 +502,19 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) { } auto vqid = gnm_vqid - 1; - auto& asc_queue = asc_queues[{vqid}]; - const auto* acb_ptr = reinterpret_cast(asc_queue.map_addr + *asc_queue.read_addr); - const auto acb_size = next_offs_dw ? (next_offs_dw << 2u) - *asc_queue.read_addr - : (asc_queue.ring_size_dw << 2u) - *asc_queue.read_addr; - const std::span acb_span{acb_ptr, acb_size >> 2u}; + auto& asc_queue = liverpool->asc_queues[{vqid}]; + + const auto& offs_dw = asc_next_offs_dw[vqid]; + + if (next_offs_dw < offs_dw) { + ASSERT_MSG(next_offs_dw == 0, "ACB submission is split at the end of ring buffer"); + } + + const auto* acb_ptr = reinterpret_cast(asc_queue.map_addr) + offs_dw; + const auto acb_size_dw = (next_offs_dw ? next_offs_dw : asc_queue.ring_size_dw) - offs_dw; + const std::span acb_span{acb_ptr, acb_size_dw}; + + asc_next_offs_dw[vqid] = next_offs_dw; if (DebugState.DumpingCurrentFrame()) { static auto last_frame_num = -1LL; @@ -545,9 +549,6 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) { }); } liverpool->SubmitAsc(gnm_vqid, acb_span); - - *asc_queue.read_addr += acb_size; - *asc_queue.read_addr %= asc_queue.ring_size_dw * 4; } void PS4_SYSV_ABI sceGnmDingDongForWorkload(u32 gnm_vqid, u32 next_offs_dw, u64 workload_id) { @@ -971,7 +972,7 @@ s32 PS4_SYSV_ABI sceGnmFindResourcesPublic() { } void PS4_SYSV_ABI sceGnmFlushGarlic() { - LOG_WARNING(Lib_GnmDriver, "(STUBBED) called"); + LOG_TRACE(Lib_GnmDriver, "(STUBBED) called"); } int PS4_SYSV_ABI sceGnmGetCoredumpAddress() { @@ -1266,12 +1267,16 @@ int PS4_SYSV_ABI sceGnmMapComputeQueue(u32 pipe_id, u32 queue_id, VAddr ring_bas return ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_READ_PTR_ADDR; } - auto vqid = asc_queues.insert(VAddr(ring_base_addr), read_ptr_addr, ring_size_dw); + const auto vqid = + liverpool->asc_queues.insert(VAddr(ring_base_addr), read_ptr_addr, ring_size_dw, pipe_id); // We need to offset index as `dingDong` assumes it to be from the range [1..64] const auto gnm_vqid = vqid.index + 1; LOG_INFO(Lib_GnmDriver, "ASC pipe {} queue {} mapped to vqueue {}", pipe_id, queue_id, gnm_vqid); + const auto& queue = liverpool->asc_queues[vqid]; + *queue.read_addr = 0u; + return gnm_vqid; } @@ -1642,7 +1647,6 @@ s32 PS4_SYSV_ABI sceGnmSetGsShader(u32* cmdbuf, u32 size, const u32* gs_regs) { s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u32 param4) { LOG_TRACE(Lib_GnmDriver, "called"); - if (!cmdbuf || size < 0x1E) { return -1; } @@ -1660,11 +1664,13 @@ s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u3 cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x108u, hs_regs[0], 0u); // SPI_SHADER_PGM_LO_HS cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x10au, hs_regs[2], hs_regs[3]); // SPI_SHADER_PGM_RSRC1_HS/SPI_SHADER_PGM_RSRC2_HS - cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u, hs_regs[5], - hs_regs[5]); // VGT_HOS_MAX_TESS_LEVEL + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u, + hs_regs[5], // VGT_HOS_MAX_TESS_LEVEL + hs_regs[6]); // VGT_HOS_MIN_TESS_LEVEL cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2dbu, hs_regs[4]); // VGT_TF_PARAM cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2d6u, param4); // VGT_LS_HS_CONFIG + // right padding? WriteTrailingNop<11>(cmdbuf); return ORBIS_OK; } @@ -2161,6 +2167,7 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(u32 workload, u32 count, u32* dcb_sizes_in_bytes, const u32* ccb_gpu_addrs[], u32* ccb_sizes_in_bytes) { + HLE_TRACE; LOG_DEBUG(Lib_GnmDriver, "called"); if (!dcb_gpu_addrs || !dcb_sizes_in_bytes) { @@ -2253,6 +2260,7 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[ } int PS4_SYSV_ABI sceGnmSubmitDone() { + HLE_TRACE; LOG_DEBUG(Lib_GnmDriver, "called"); WaitGpuIdle(); if (!liverpool->IsGpuIdle()) { diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index 5ba9976c6..57efbb631 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -695,12 +695,66 @@ static int GetDents(int fd, char* buf, int nbytes, s64* basep) { return sizeof(OrbisKernelDirent); } +static int HandleSeparateUpdateDents(int fd, char* buf, int nbytes, s64* basep) { + int dir_entries = 0; + + auto* h = Common::Singleton::Instance(); + auto* mnt = Common::Singleton::Instance(); + auto* file = h->GetFile(fd); + auto update_dir_name = std::string{fmt::UTF(file->m_host_name.u8string()).data}; + auto mount = mnt->GetMountFromHostPath(update_dir_name); + auto suffix = std::string{fmt::UTF(mount->host_path.u8string()).data}; + + size_t pos = update_dir_name.find("-UPDATE"); + if (pos != std::string::npos) { + update_dir_name.erase(pos, 7); + auto guest_name = mount->mount + "/" + update_dir_name.substr(suffix.size() + 1); + int descriptor; + + auto existent_folder = h->GetFile(update_dir_name); + if (!existent_folder) { + u32 handle = h->CreateHandle(); + auto* new_file = h->GetFile(handle); + new_file->type = Core::FileSys::FileType::Directory; + new_file->m_guest_name = guest_name; + new_file->m_host_name = update_dir_name; + if (!std::filesystem::is_directory(new_file->m_host_name)) { + h->DeleteHandle(handle); + return dir_entries; + } else { + new_file->dirents = GetDirectoryEntries(new_file->m_host_name); + new_file->dirents_index = 0; + } + new_file->is_opened = true; + descriptor = h->GetFileDescriptor(new_file); + } else { + descriptor = h->GetFileDescriptor(existent_folder); + } + + dir_entries = GetDents(descriptor, buf, nbytes, basep); + if (dir_entries == ORBIS_OK && existent_folder) { + existent_folder->dirents_index = 0; + file->dirents_index = 0; + } + } + + return dir_entries; +} + int PS4_SYSV_ABI sceKernelGetdents(int fd, char* buf, int nbytes) { - return GetDents(fd, buf, nbytes, nullptr); + int a = GetDents(fd, buf, nbytes, nullptr); + if (a == ORBIS_OK) { + return HandleSeparateUpdateDents(fd, buf, nbytes, nullptr); + } + return a; } int PS4_SYSV_ABI sceKernelGetdirentries(int fd, char* buf, int nbytes, s64* basep) { - return GetDents(fd, buf, nbytes, basep); + int a = GetDents(fd, buf, nbytes, basep); + if (a == ORBIS_OK) { + return HandleSeparateUpdateDents(fd, buf, nbytes, basep); + } + return a; } s64 PS4_SYSV_ABI sceKernelPwrite(int d, void* buf, size_t nbytes, s64 offset) { diff --git a/src/core/libraries/kernel/process.cpp b/src/core/libraries/kernel/process.cpp index 6c29d9305..97cc01ebc 100644 --- a/src/core/libraries/kernel/process.cpp +++ b/src/core/libraries/kernel/process.cpp @@ -50,6 +50,9 @@ s32 PS4_SYSV_ABI sceKernelLoadStartModule(const char* moduleFileName, size_t arg return handle; } handle = linker->LoadModule(path, true); + if (handle == -1) { + return ORBIS_KERNEL_ERROR_ESRCH; + } auto* module = linker->GetModule(handle); linker->RelocateAnyImports(module); diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp index 08886c6eb..610e61238 100644 --- a/src/core/libraries/kernel/threads/pthread.cpp +++ b/src/core/libraries/kernel/threads/pthread.cpp @@ -327,7 +327,8 @@ void PS4_SYSV_ABI sched_yield() { std::this_thread::yield(); } -int PS4_SYSV_ABI posix_pthread_once(PthreadOnce* once_control, void (*init_routine)()) { +int PS4_SYSV_ABI posix_pthread_once(PthreadOnce* once_control, + void PS4_SYSV_ABI (*init_routine)()) { for (;;) { auto state = once_control->state.load(); if (state == PthreadOnceState::Done) { diff --git a/src/core/libraries/libc_internal/libc_internal.cpp b/src/core/libraries/libc_internal/libc_internal.cpp index eb6046c7a..8453a78b9 100644 --- a/src/core/libraries/libc_internal/libc_internal.cpp +++ b/src/core/libraries/libc_internal/libc_internal.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include "common/assert.h" #include "common/logging/log.h" @@ -65,6 +66,15 @@ char* PS4_SYSV_ABI internal_strncpy(char* dest, const char* src, std::size_t cou return std::strncpy(dest, src, count); } +int PS4_SYSV_ABI internal_strncpy_s(char* dest, size_t destsz, const char* src, size_t count) { +#ifdef _WIN64 + return strncpy_s(dest, destsz, src, count); +#else + std::strcpy(dest, src); + return 0; +#endif +} + char* PS4_SYSV_ABI internal_strcat(char* dest, const char* src) { return std::strcat(dest, src); } @@ -237,6 +247,8 @@ void RegisterlibSceLibcInternal(Core::Loader::SymbolsResolver* sym) { internal_strlen); LIB_FUNCTION("6sJWiWSRuqk", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_strncpy); + LIB_FUNCTION("YNzNkJzYqEg", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_strncpy_s); LIB_FUNCTION("Ls4tzzhimqQ", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_strcat); LIB_FUNCTION("ob5xAW4ln-0", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, diff --git a/src/core/libraries/np_trophy/trophy_ui.cpp b/src/core/libraries/np_trophy/trophy_ui.cpp index 618f8db46..55ef7b8de 100644 --- a/src/core/libraries/np_trophy/trophy_ui.cpp +++ b/src/core/libraries/np_trophy/trophy_ui.cpp @@ -38,21 +38,22 @@ void TrophyUI::Finish() { void TrophyUI::Draw() { const auto& io = GetIO(); + float AdjustWidth = io.DisplaySize.x / 1280; + float AdjustHeight = io.DisplaySize.y / 720; const ImVec2 window_size{ - std::min(io.DisplaySize.x, 250.f), - std::min(io.DisplaySize.y, 70.f), + std::min(io.DisplaySize.x, (300 * AdjustWidth)), + std::min(io.DisplaySize.y, (70 * AdjustHeight)), }; SetNextWindowSize(window_size); SetNextWindowCollapsed(false); - SetNextWindowPos(ImVec2(io.DisplaySize.x - 250, 50)); + SetNextWindowPos(ImVec2(io.DisplaySize.x - (300 * AdjustWidth), (50 * AdjustHeight))); KeepNavHighlight(); - if (Begin("Trophy Window", nullptr, ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoInputs)) { if (trophy_icon) { - Image(trophy_icon.GetTexture().im_id, ImVec2(50, 50)); + Image(trophy_icon.GetTexture().im_id, ImVec2((50 * AdjustWidth), (50 * AdjustHeight))); ImGui::SameLine(); } else { // placeholder @@ -61,6 +62,7 @@ void TrophyUI::Draw() { GetColorU32(ImVec4{0.7f})); ImGui::Indent(60); } + SetWindowFontScale((1.2 * AdjustHeight)); TextWrapped("Trophy earned!\n%s", trophy_name.c_str()); } End(); diff --git a/src/core/libraries/pad/pad.cpp b/src/core/libraries/pad/pad.cpp index ec4186f11..98f086dd9 100644 --- a/src/core/libraries/pad/pad.cpp +++ b/src/core/libraries/pad/pad.cpp @@ -155,6 +155,9 @@ int PS4_SYSV_ABI scePadGetFeatureReport() { } int PS4_SYSV_ABI scePadGetHandle(s32 userId, s32 type, s32 index) { + if (userId == -1) { + return ORBIS_PAD_ERROR_DEVICE_NO_HANDLE; + } LOG_DEBUG(Lib_Pad, "(DUMMY) called"); return 1; } @@ -246,6 +249,9 @@ int PS4_SYSV_ABI scePadMbusTerm() { int PS4_SYSV_ABI scePadOpen(s32 userId, s32 type, s32 index, const OrbisPadOpenParam* pParam) { LOG_INFO(Lib_Pad, "(DUMMY) called user_id = {} type = {} index = {}", userId, type, index); + if (userId == -1) { + return ORBIS_PAD_ERROR_DEVICE_NO_HANDLE; + } if (Config::getUseSpecialPad()) { if (type != ORBIS_PAD_PORT_TYPE_SPECIAL) return ORBIS_PAD_ERROR_DEVICE_NOT_CONNECTED; @@ -346,6 +352,9 @@ int PS4_SYSV_ABI scePadReadHistory() { } int PS4_SYSV_ABI scePadReadState(s32 handle, OrbisPadData* pData) { + if (handle == ORBIS_PAD_ERROR_DEVICE_NO_HANDLE) { + return ORBIS_PAD_ERROR_INVALID_HANDLE; + } auto* controller = Common::Singleton::Instance(); int connectedCount = 0; bool isConnected = false; diff --git a/src/core/libraries/playgo/playgo.cpp b/src/core/libraries/playgo/playgo.cpp index 13d6f991f..848533ff7 100644 --- a/src/core/libraries/playgo/playgo.cpp +++ b/src/core/libraries/playgo/playgo.cpp @@ -137,8 +137,8 @@ s32 PS4_SYSV_ABI scePlayGoGetLanguageMask(OrbisPlayGoHandle handle, s32 PS4_SYSV_ABI scePlayGoGetLocus(OrbisPlayGoHandle handle, const OrbisPlayGoChunkId* chunkIds, uint32_t numberOfEntries, OrbisPlayGoLocus* outLoci) { - LOG_INFO(Lib_PlayGo, "called handle = {}, chunkIds = {}, numberOfEntries = {}", handle, - *chunkIds, numberOfEntries); + LOG_DEBUG(Lib_PlayGo, "called handle = {}, chunkIds = {}, numberOfEntries = {}", handle, + *chunkIds, numberOfEntries); if (handle != PlaygoHandle) { return ORBIS_PLAYGO_ERROR_BAD_HANDLE; diff --git a/src/emulator.cpp b/src/emulator.cpp index 60d6e18d7..252a34418 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include #include "common/config.h" @@ -100,15 +101,17 @@ Emulator::Emulator() { Emulator::~Emulator() { const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); - Config::save(config_dir / "config.toml"); + Config::saveMainWindow(config_dir / "config.toml"); } void Emulator::Run(const std::filesystem::path& file) { // Use the eboot from the separated updates folder if it's there - std::filesystem::path game_patch_folder = file.parent_path().concat("-UPDATE"); - bool use_game_patch = std::filesystem::exists(game_patch_folder / "sce_sys"); - std::filesystem::path eboot_path = use_game_patch ? game_patch_folder / file.filename() : file; + std::filesystem::path game_patch_folder = file.parent_path(); + game_patch_folder += "-UPDATE"; + std::filesystem::path eboot_path = std::filesystem::exists(game_patch_folder / file.filename()) + ? game_patch_folder / file.filename() + : file; // Applications expect to be run from /app0 so mount the file's parent path as app0. auto* mnt = Common::Singleton::Instance(); @@ -226,20 +229,37 @@ void Emulator::Run(const std::filesystem::path& file) { LoadSystemModules(eboot_path, game_info.game_serial); // Load all prx from game's sce_module folder - std::filesystem::path sce_module_folder = file.parent_path() / "sce_module"; - if (std::filesystem::is_directory(sce_module_folder)) { - for (const auto& entry : std::filesystem::directory_iterator(sce_module_folder)) { - std::filesystem::path module_path = entry.path(); - std::filesystem::path update_module_path = - eboot_path.parent_path() / "sce_module" / entry.path().filename(); - if (std::filesystem::exists(update_module_path) && use_game_patch) { - module_path = update_module_path; + std::vector modules_to_load; + std::filesystem::path game_module_folder = file.parent_path() / "sce_module"; + if (std::filesystem::is_directory(game_module_folder)) { + for (const auto& entry : std::filesystem::directory_iterator(game_module_folder)) { + if (entry.is_regular_file()) { + modules_to_load.push_back(entry.path()); } - LOG_INFO(Loader, "Loading {}", fmt::UTF(module_path.u8string())); - linker->LoadModule(module_path); } } + // Load all prx from separate update's sce_module folder + std::filesystem::path update_module_folder = game_patch_folder / "sce_module"; + if (std::filesystem::is_directory(update_module_folder)) { + for (const auto& entry : std::filesystem::directory_iterator(update_module_folder)) { + auto it = std::find_if(modules_to_load.begin(), modules_to_load.end(), + [&entry](const std::filesystem::path& p) { + return p.filename() == entry.path().filename(); + }); + if (it != modules_to_load.end()) { + *it = entry.path(); + } else { + modules_to_load.push_back(entry.path()); + } + } + } + + for (const auto& module_path : modules_to_load) { + LOG_INFO(Loader, "Loading {}", fmt::UTF(module_path.u8string())); + linker->LoadModule(module_path); + } + #ifdef ENABLE_DISCORD_RPC // Discord RPC if (Config::getEnableDiscordRPC()) { @@ -266,7 +286,7 @@ void Emulator::Run(const std::filesystem::path& file) { } void Emulator::LoadSystemModules(const std::filesystem::path& file, std::string game_serial) { - constexpr std::array ModulesToLoad{ + constexpr std::array ModulesToLoad{ {{"libSceNgs2.sprx", &Libraries::Ngs2::RegisterlibSceNgs2}, {"libSceFiber.sprx", &Libraries::Fiber::RegisterlibSceFiber}, {"libSceUlt.sprx", nullptr}, @@ -276,7 +296,10 @@ void Emulator::LoadSystemModules(const std::filesystem::path& file, std::string {"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap}, {"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc}, {"libSceJpegEnc.sprx", &Libraries::JpegEnc::RegisterlibSceJpegEnc}, - {"libSceCesCs.sprx", nullptr}}}; + {"libSceCesCs.sprx", nullptr}, + {"libSceFont.sprx", nullptr}, + {"libSceFontFt.sprx", nullptr}, + {"libSceFreeTypeOt.sprx", nullptr}}}; std::vector found_modules; const auto& sys_module_path = Common::FS::GetUserPath(Common::FS::PathType::SysModuleDir); diff --git a/src/qt_gui/game_list_frame.cpp b/src/qt_gui/game_list_frame.cpp index 32072b30b..d43c35ef4 100644 --- a/src/qt_gui/game_list_frame.cpp +++ b/src/qt_gui/game_list_frame.cpp @@ -139,7 +139,7 @@ void GameListFrame::PopulateGameList() { formattedPlayTime = formattedPlayTime.trimmed(); m_game_info->m_games[i].play_time = playTime.toStdString(); if (formattedPlayTime.isEmpty()) { - SetTableItem(i, 8, "0"); + SetTableItem(i, 8, QString("%1s").arg(seconds)); } else { SetTableItem(i, 8, formattedPlayTime); } diff --git a/src/qt_gui/gui_context_menus.h b/src/qt_gui/gui_context_menus.h index 6eef1230c..3cc12c11e 100644 --- a/src/qt_gui/gui_context_menus.h +++ b/src/qt_gui/gui_context_menus.h @@ -122,11 +122,11 @@ public: if (selected == &openSfoViewer) { PSF psf; - QString game_update_path; - Common::FS::PathToQString(game_update_path, m_games[itemID].path.concat("-UPDATE")); std::filesystem::path game_folder_path = m_games[itemID].path; - if (std::filesystem::exists(Common::FS::PathFromQString(game_update_path))) { - game_folder_path = Common::FS::PathFromQString(game_update_path); + std::filesystem::path game_update_path = game_folder_path; + game_update_path += "UPDATE"; + if (std::filesystem::exists(game_update_path)) { + game_folder_path = game_update_path; } if (psf.Open(game_folder_path / "sce_sys" / "param.sfo")) { int rows = psf.GetEntries().size(); @@ -320,21 +320,17 @@ public: bool error = false; QString folder_path, game_update_path, dlc_path; Common::FS::PathToQString(folder_path, m_games[itemID].path); - Common::FS::PathToQString(game_update_path, m_games[itemID].path.concat("-UPDATE")); + game_update_path = folder_path + "-UPDATE"; Common::FS::PathToQString( dlc_path, Config::getAddonInstallDir() / Common::FS::PathFromQString(folder_path).parent_path().filename()); QString message_type = tr("Game"); if (selected == deleteUpdate) { - if (!Config::getSeparateUpdateEnabled()) { - QMessageBox::critical(nullptr, tr("Error"), - QString(tr("requiresEnableSeparateUpdateFolder_MSG"))); - error = true; - } else if (!std::filesystem::exists( - Common::FS::PathFromQString(game_update_path))) { - QMessageBox::critical(nullptr, tr("Error"), - QString(tr("This game has no update to delete!"))); + if (!std::filesystem::exists(Common::FS::PathFromQString(game_update_path))) { + QMessageBox::critical( + nullptr, tr("Error"), + QString(tr("This game has no separate update to delete!"))); error = true; } else { folder_path = game_update_path; diff --git a/src/qt_gui/main_window.cpp b/src/qt_gui/main_window.cpp index b00e14e8b..90cc947f4 100644 --- a/src/qt_gui/main_window.cpp +++ b/src/qt_gui/main_window.cpp @@ -35,7 +35,7 @@ MainWindow::MainWindow(QWidget* parent) : QMainWindow(parent), ui(new Ui::MainWi MainWindow::~MainWindow() { SaveWindowState(); const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); - Config::save(config_dir / "config.toml"); + Config::saveMainWindow(config_dir / "config.toml"); } bool MainWindow::Init() { @@ -111,6 +111,7 @@ void MainWindow::CreateActions() { m_theme_act_group->addAction(ui->setThemeGreen); m_theme_act_group->addAction(ui->setThemeBlue); m_theme_act_group->addAction(ui->setThemeViolet); + m_theme_act_group->addAction(ui->setThemeGruvbox); } void MainWindow::AddUiWidgets() { @@ -542,6 +543,14 @@ void MainWindow::CreateConnects() { isIconBlack = false; } }); + connect(ui->setThemeGruvbox, &QAction::triggered, &m_window_themes, [this]() { + m_window_themes.SetWindowTheme(Theme::Gruvbox, ui->mw_searchbar); + Config::setMainWindowTheme(static_cast(Theme::Gruvbox)); + if (isIconBlack) { + SetUiIcons(false); + isIconBlack = false; + } + }); } void MainWindow::StartGame() { @@ -915,6 +924,11 @@ void MainWindow::SetLastUsedTheme() { isIconBlack = false; SetUiIcons(false); break; + case Theme::Gruvbox: + ui->setThemeGruvbox->setChecked(true); + isIconBlack = false; + SetUiIcons(false); + break; } } @@ -1008,7 +1022,7 @@ void MainWindow::AddRecentFiles(QString filePath) { } Config::setRecentFiles(vec); const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); - Config::save(config_dir / "config.toml"); + Config::saveMainWindow(config_dir / "config.toml"); CreateRecentGameActions(); // Refresh the QActions. } @@ -1079,4 +1093,4 @@ bool MainWindow::eventFilter(QObject* obj, QEvent* event) { } } return QMainWindow::eventFilter(obj, event); -} +} \ No newline at end of file diff --git a/src/qt_gui/main_window_themes.cpp b/src/qt_gui/main_window_themes.cpp index 35e64ef74..a52b4466e 100644 --- a/src/qt_gui/main_window_themes.cpp +++ b/src/qt_gui/main_window_themes.cpp @@ -8,14 +8,15 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { switch (theme) { case Theme::Dark: - mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background - "color: #ffffff;" // White text - "border: 2px solid #ffffff;" // White border - "padding: 5px;"); + mw_searchbar->setStyleSheet( + "QLineEdit {" + "background-color: #1e1e1e; color: #ffffff; border: 1px solid #ffffff; " + "border-radius: 4px; padding: 5px; }" + "QLineEdit:focus {" + "border: 1px solid #2A82DA; }"); themePalette.setColor(QPalette::Window, QColor(50, 50, 50)); themePalette.setColor(QPalette::WindowText, Qt::white); themePalette.setColor(QPalette::Base, QColor(20, 20, 20)); - themePalette.setColor(QPalette::AlternateBase, QColor(25, 25, 25)); themePalette.setColor(QPalette::AlternateBase, QColor(53, 53, 53)); themePalette.setColor(QPalette::ToolTipBase, Qt::white); themePalette.setColor(QPalette::ToolTipText, Qt::white); @@ -28,12 +29,13 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { themePalette.setColor(QPalette::HighlightedText, Qt::black); qApp->setPalette(themePalette); break; - case Theme::Light: - mw_searchbar->setStyleSheet("background-color: #ffffff;" // Light gray background - "color: #000000;" // Black text - "border: 2px solid #000000;" // Black border - "padding: 5px;"); + mw_searchbar->setStyleSheet( + "QLineEdit {" + "background-color: #ffffff; color: #000000; border: 1px solid #000000; " + "border-radius: 4px; padding: 5px; }" + "QLineEdit:focus {" + "border: 1px solid #2A82DA; }"); themePalette.setColor(QPalette::Window, QColor(240, 240, 240)); // Light gray themePalette.setColor(QPalette::WindowText, Qt::black); // Black themePalette.setColor(QPalette::Base, QColor(230, 230, 230, 80)); // Grayish @@ -48,12 +50,13 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { themePalette.setColor(QPalette::HighlightedText, Qt::white); // White qApp->setPalette(themePalette); break; - case Theme::Green: - mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background - "color: #ffffff;" // White text - "border: 2px solid #ffffff;" // White border - "padding: 5px;"); + mw_searchbar->setStyleSheet( + "QLineEdit {" + "background-color: #192819; color: #ffffff; border: 1px solid #ffffff; " + "border-radius: 4px; padding: 5px; }" + "QLineEdit:focus {" + "border: 1px solid #2A82DA; }"); themePalette.setColor(QPalette::Window, QColor(53, 69, 53)); // Dark green background themePalette.setColor(QPalette::WindowText, Qt::white); // White text themePalette.setColor(QPalette::Base, QColor(25, 40, 25)); // Darker green base @@ -68,15 +71,15 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { themePalette.setColor(QPalette::Link, QColor(42, 130, 218)); // Light blue links themePalette.setColor(QPalette::Highlight, QColor(42, 130, 218)); // Light blue highlight themePalette.setColor(QPalette::HighlightedText, Qt::black); // Black highlighted text - qApp->setPalette(themePalette); break; - case Theme::Blue: - mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background - "color: #ffffff;" // White text - "border: 2px solid #ffffff;" // White border - "padding: 5px;"); + mw_searchbar->setStyleSheet( + "QLineEdit {" + "background-color: #14283c; color: #ffffff; border: 1px solid #ffffff; " + "border-radius: 4px; padding: 5px; }" + "QLineEdit:focus {" + "border: 1px solid #2A82DA; }"); themePalette.setColor(QPalette::Window, QColor(40, 60, 90)); // Dark blue background themePalette.setColor(QPalette::WindowText, Qt::white); // White text themePalette.setColor(QPalette::Base, QColor(20, 40, 60)); // Darker blue base @@ -94,12 +97,13 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { qApp->setPalette(themePalette); break; - case Theme::Violet: - mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background - "color: #ffffff;" // White text - "border: 2px solid #ffffff;" // White border - "padding: 5px;"); + mw_searchbar->setStyleSheet( + "QLineEdit {" + "background-color: #501e5a; color: #ffffff; border: 1px solid #ffffff; " + "border-radius: 4px; padding: 5px; }" + "QLineEdit:focus {" + "border: 1px solid #2A82DA; }"); themePalette.setColor(QPalette::Window, QColor(100, 50, 120)); // Violet background themePalette.setColor(QPalette::WindowText, Qt::white); // White text themePalette.setColor(QPalette::Base, QColor(80, 30, 90)); // Darker violet base @@ -115,6 +119,28 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { themePalette.setColor(QPalette::Highlight, QColor(42, 130, 218)); // Light blue highlight themePalette.setColor(QPalette::HighlightedText, Qt::black); // Black highlighted text + qApp->setPalette(themePalette); + break; + case Theme::Gruvbox: + mw_searchbar->setStyleSheet( + "QLineEdit {" + "background-color: #1d2021; color: #f9f5d7; border: 1px solid #f9f5d7; " + "border-radius: 4px; padding: 5px; }" + "QLineEdit:focus {" + "border: 1px solid #83A598; }"); + themePalette.setColor(QPalette::Window, QColor(29, 32, 33)); + themePalette.setColor(QPalette::WindowText, QColor(249, 245, 215)); + themePalette.setColor(QPalette::Base, QColor(29, 32, 33)); + themePalette.setColor(QPalette::AlternateBase, QColor(50, 48, 47)); + themePalette.setColor(QPalette::ToolTipBase, QColor(249, 245, 215)); + themePalette.setColor(QPalette::ToolTipText, QColor(249, 245, 215)); + themePalette.setColor(QPalette::Text, QColor(249, 245, 215)); + themePalette.setColor(QPalette::Button, QColor(40, 40, 40)); + themePalette.setColor(QPalette::ButtonText, QColor(249, 245, 215)); + themePalette.setColor(QPalette::BrightText, QColor(251, 73, 52)); + themePalette.setColor(QPalette::Link, QColor(131, 165, 152)); + themePalette.setColor(QPalette::Highlight, QColor(131, 165, 152)); + themePalette.setColor(QPalette::HighlightedText, Qt::black); qApp->setPalette(themePalette); break; } diff --git a/src/qt_gui/main_window_themes.h b/src/qt_gui/main_window_themes.h index 6da70e995..d162da87b 100644 --- a/src/qt_gui/main_window_themes.h +++ b/src/qt_gui/main_window_themes.h @@ -7,13 +7,7 @@ #include #include -enum class Theme : int { - Dark, - Light, - Green, - Blue, - Violet, -}; +enum class Theme : int { Dark, Light, Green, Blue, Violet, Gruvbox }; class WindowThemes : public QObject { Q_OBJECT diff --git a/src/qt_gui/main_window_ui.h b/src/qt_gui/main_window_ui.h index 5ff572f86..df64361fd 100644 --- a/src/qt_gui/main_window_ui.h +++ b/src/qt_gui/main_window_ui.h @@ -36,6 +36,7 @@ public: QAction* setThemeGreen; QAction* setThemeBlue; QAction* setThemeViolet; + QAction* setThemeGruvbox; QWidget* centralWidget; QLineEdit* mw_searchbar; QPushButton* playButton; @@ -158,6 +159,9 @@ public: setThemeViolet = new QAction(MainWindow); setThemeViolet->setObjectName("setThemeViolet"); setThemeViolet->setCheckable(true); + setThemeGruvbox = new QAction(MainWindow); + setThemeGruvbox->setObjectName("setThemeGruvbox"); + setThemeGruvbox->setCheckable(true); centralWidget = new QWidget(MainWindow); centralWidget->setObjectName("centralWidget"); sizePolicy.setHeightForWidth(centralWidget->sizePolicy().hasHeightForWidth()); @@ -282,6 +286,7 @@ public: menuThemes->addAction(setThemeGreen); menuThemes->addAction(setThemeBlue); menuThemes->addAction(setThemeViolet); + menuThemes->addAction(setThemeGruvbox); menuGame_List_Icons->addAction(setIconSizeTinyAct); menuGame_List_Icons->addAction(setIconSizeSmallAct); menuGame_List_Icons->addAction(setIconSizeMediumAct); @@ -368,6 +373,7 @@ public: setThemeGreen->setText(QCoreApplication::translate("MainWindow", "Green", nullptr)); setThemeBlue->setText(QCoreApplication::translate("MainWindow", "Blue", nullptr)); setThemeViolet->setText(QCoreApplication::translate("MainWindow", "Violet", nullptr)); + setThemeGruvbox->setText("Gruvbox"); toolBar->setWindowTitle(QCoreApplication::translate("MainWindow", "toolBar", nullptr)); } // retranslateUi }; diff --git a/src/qt_gui/settings_dialog.cpp b/src/qt_gui/settings_dialog.cpp index 1fd4b6e8b..09d3674f7 100644 --- a/src/qt_gui/settings_dialog.cpp +++ b/src/qt_gui/settings_dialog.cpp @@ -12,12 +12,13 @@ #ifdef ENABLE_UPDATER #include "check_update.h" #endif +#include #include "common/logging/backend.h" #include "common/logging/filter.h" +#include "common/logging/formatter.h" #include "main_window.h" #include "settings_dialog.h" #include "ui_settings_dialog.h" - QStringList languageNames = {"Arabic", "Czech", "Danish", @@ -94,13 +95,18 @@ SettingsDialog::SettingsDialog(std::span physical_devices, QWidge connect(ui->buttonBox, &QDialogButtonBox::clicked, this, [this, config_dir](QAbstractButton* button) { if (button == ui->buttonBox->button(QDialogButtonBox::Save)) { + UpdateSettings(); Config::save(config_dir / "config.toml"); QWidget::close(); } else if (button == ui->buttonBox->button(QDialogButtonBox::Apply)) { + UpdateSettings(); Config::save(config_dir / "config.toml"); } else if (button == ui->buttonBox->button(QDialogButtonBox::RestoreDefaults)) { Config::setDefaultValues(); + Config::save(config_dir / "config.toml"); LoadValuesFromConfig(); + } else if (button == ui->buttonBox->button(QDialogButtonBox::Close)) { + ResetInstallFolders(); } if (Common::Log::IsActive()) { Common::Log::Filter filter; @@ -119,35 +125,6 @@ SettingsDialog::SettingsDialog(std::span physical_devices, QWidge // GENERAL TAB { - connect(ui->userNameLineEdit, &QLineEdit::textChanged, this, - [](const QString& text) { Config::setUserName(text.toStdString()); }); - - connect(ui->consoleLanguageComboBox, QOverload::of(&QComboBox::currentIndexChanged), - this, [](int index) { - if (index >= 0 && index < languageIndexes.size()) { - int languageCode = languageIndexes[index]; - Config::setLanguage(languageCode); - } - }); - - connect(ui->fullscreenCheckBox, &QCheckBox::stateChanged, this, - [](int val) { Config::setFullscreenMode(val); }); - - connect(ui->separateUpdatesCheckBox, &QCheckBox::stateChanged, this, - [](int val) { Config::setSeparateUpdateEnabled(val); }); - - connect(ui->showSplashCheckBox, &QCheckBox::stateChanged, this, - [](int val) { Config::setShowSplash(val); }); - - connect(ui->ps4proCheckBox, &QCheckBox::stateChanged, this, - [](int val) { Config::setNeoMode(val); }); - - connect(ui->logTypeComboBox, &QComboBox::currentTextChanged, this, - [](const QString& text) { Config::setLogType(text.toStdString()); }); - - connect(ui->logFilterLineEdit, &QLineEdit::textChanged, this, - [](const QString& text) { Config::setLogFilter(text.toStdString()); }); - #ifdef ENABLE_UPDATER connect(ui->updateCheckBox, &QCheckBox::stateChanged, this, [](int state) { Config::setAutoUpdate(state == Qt::Checked); }); @@ -163,74 +140,12 @@ SettingsDialog::SettingsDialog(std::span physical_devices, QWidge ui->updaterGroupBox->setVisible(false); ui->GUIgroupBox->setMaximumSize(265, 16777215); #endif - - connect(ui->playBGMCheckBox, &QCheckBox::stateChanged, this, [](int val) { - Config::setPlayBGM(val); - if (val == Qt::Unchecked) { - BackgroundMusicPlayer::getInstance().stopMusic(); - } - }); - - connect(ui->BGMVolumeSlider, &QSlider::valueChanged, this, [](float val) { - Config::setBGMvolume(val); - BackgroundMusicPlayer::getInstance().setVolume(val); - }); - -#ifdef ENABLE_DISCORD_RPC - connect(ui->discordRPCCheckbox, &QCheckBox::stateChanged, this, [](int val) { - Config::setEnableDiscordRPC(val); - auto* rpc = Common::Singleton::Instance(); - if (val == Qt::Checked) { - rpc->init(); - rpc->setStatusIdling(); - } else { - rpc->shutdown(); - } - }); -#endif } // Input TAB { connect(ui->hideCursorComboBox, QOverload::of(&QComboBox::currentIndexChanged), this, - [this](s16 index) { - Config::setCursorState(index); - OnCursorStateChanged(index); - }); - - connect(ui->idleTimeoutSpinBox, &QSpinBox::valueChanged, this, - [](int index) { Config::setCursorHideTimeout(index); }); - - connect(ui->backButtonBehaviorComboBox, QOverload::of(&QComboBox::currentIndexChanged), - this, [this](int index) { - if (index >= 0 && index < ui->backButtonBehaviorComboBox->count()) { - QString data = ui->backButtonBehaviorComboBox->itemData(index).toString(); - Config::setBackButtonBehavior(data.toStdString()); - } - }); - } - - // GPU TAB - { - // First options is auto selection -1, so gpuId on the GUI will always have to subtract 1 - // when setting and add 1 when getting to select the correct gpu in Qt - connect(ui->graphicsAdapterBox, &QComboBox::currentIndexChanged, this, - [](int index) { Config::setGpuId(index - 1); }); - - connect(ui->widthSpinBox, &QSpinBox::valueChanged, this, - [](int val) { Config::setScreenWidth(val); }); - - connect(ui->heightSpinBox, &QSpinBox::valueChanged, this, - [](int val) { Config::setScreenHeight(val); }); - - connect(ui->vblankSpinBox, &QSpinBox::valueChanged, this, - [](int val) { Config::setVblankDiv(val); }); - - connect(ui->dumpShadersCheckBox, &QCheckBox::stateChanged, this, - [](int val) { Config::setDumpShaders(val); }); - - connect(ui->nullGpuCheckBox, &QCheckBox::stateChanged, this, - [](int val) { Config::setNullGpu(val); }); + [this](s16 index) { OnCursorStateChanged(index); }); } // PATH TAB @@ -262,21 +177,6 @@ SettingsDialog::SettingsDialog(std::span physical_devices, QWidge }); } - // DEBUG TAB - { - connect(ui->debugDump, &QCheckBox::stateChanged, this, - [](int val) { Config::setDebugDump(val); }); - - connect(ui->vkValidationCheckBox, &QCheckBox::stateChanged, this, - [](int val) { Config::setVkValidation(val); }); - - connect(ui->vkSyncValidationCheckBox, &QCheckBox::stateChanged, this, - [](int val) { Config::setVkSyncValidation(val); }); - - connect(ui->rdocCheckBox, &QCheckBox::stateChanged, this, - [](int val) { Config::setRdocEnabled(val); }); - } - // Descriptions { // General @@ -323,40 +223,69 @@ SettingsDialog::SettingsDialog(std::span physical_devices, QWidge } void SettingsDialog::LoadValuesFromConfig() { - ui->consoleLanguageComboBox->setCurrentIndex( - std::distance( - languageIndexes.begin(), - std::find(languageIndexes.begin(), languageIndexes.end(), Config::GetLanguage())) % - languageIndexes.size()); - ui->emulatorLanguageComboBox->setCurrentIndex(languages[Config::getEmulatorLanguage()]); - ui->hideCursorComboBox->setCurrentIndex(Config::getCursorState()); - OnCursorStateChanged(Config::getCursorState()); - ui->idleTimeoutSpinBox->setValue(Config::getCursorHideTimeout()); - ui->graphicsAdapterBox->setCurrentIndex(Config::getGpuId() + 1); - ui->widthSpinBox->setValue(Config::getScreenWidth()); - ui->heightSpinBox->setValue(Config::getScreenHeight()); - ui->vblankSpinBox->setValue(Config::vblankDiv()); - ui->dumpShadersCheckBox->setChecked(Config::dumpShaders()); - ui->nullGpuCheckBox->setChecked(Config::nullGpu()); - ui->playBGMCheckBox->setChecked(Config::getPlayBGM()); - ui->BGMVolumeSlider->setValue((Config::getBGMvolume())); - ui->discordRPCCheckbox->setChecked(Config::getEnableDiscordRPC()); - ui->fullscreenCheckBox->setChecked(Config::isFullscreenMode()); - ui->separateUpdatesCheckBox->setChecked(Config::getSeparateUpdateEnabled()); - ui->showSplashCheckBox->setChecked(Config::showSplash()); - ui->ps4proCheckBox->setChecked(Config::isNeoMode()); - ui->logTypeComboBox->setCurrentText(QString::fromStdString(Config::getLogType())); - ui->logFilterLineEdit->setText(QString::fromStdString(Config::getLogFilter())); - ui->userNameLineEdit->setText(QString::fromStdString(Config::getUserName())); - ui->debugDump->setChecked(Config::debugDump()); - ui->vkValidationCheckBox->setChecked(Config::vkValidationEnabled()); - ui->vkSyncValidationCheckBox->setChecked(Config::vkValidationSyncEnabled()); - ui->rdocCheckBox->setChecked(Config::isRdocEnabled()); + std::filesystem::path userdir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); + std::error_code error; + if (!std::filesystem::exists(userdir / "config.toml", error)) { + Config::load(userdir / "config.toml"); + return; + } + + try { + std::ifstream ifs; + ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit); + const toml::value data = toml::parse(userdir / "config.toml"); + } catch (std::exception& ex) { + fmt::print("Got exception trying to load config file. Exception: {}\n", ex.what()); + return; + } + + const toml::value data = toml::parse(userdir / "config.toml"); + const QVector languageIndexes = {21, 23, 14, 6, 18, 1, 12, 22, 2, 4, 25, 24, 29, 5, 0, 9, + 15, 16, 17, 7, 26, 8, 11, 20, 3, 13, 27, 10, 19, 30, 28}; + + ui->consoleLanguageComboBox->setCurrentIndex( + std::distance(languageIndexes.begin(), + std::find(languageIndexes.begin(), languageIndexes.end(), + toml::find_or(data, "Settings", "consoleLanguage", 6))) % + languageIndexes.size()); + ui->emulatorLanguageComboBox->setCurrentIndex( + languages[toml::find_or(data, "GUI", "emulatorLanguage", "en")]); + ui->hideCursorComboBox->setCurrentIndex(toml::find_or(data, "Input", "cursorState", 1)); + OnCursorStateChanged(toml::find_or(data, "Input", "cursorState", 1)); + ui->idleTimeoutSpinBox->setValue(toml::find_or(data, "Input", "cursorHideTimeout", 5)); + // First options is auto selection -1, so gpuId on the GUI will always have to subtract 1 + // when setting and add 1 when getting to select the correct gpu in Qt + ui->graphicsAdapterBox->setCurrentIndex(toml::find_or(data, "Vulkan", "gpuId", -1) + 1); + ui->widthSpinBox->setValue(toml::find_or(data, "GPU", "screenWidth", 1280)); + ui->heightSpinBox->setValue(toml::find_or(data, "GPU", "screenHeight", 720)); + ui->vblankSpinBox->setValue(toml::find_or(data, "GPU", "vblankDivider", 1)); + ui->dumpShadersCheckBox->setChecked(toml::find_or(data, "GPU", "dumpShaders", false)); + ui->nullGpuCheckBox->setChecked(toml::find_or(data, "GPU", "nullGpu", false)); + ui->playBGMCheckBox->setChecked(toml::find_or(data, "General", "playBGM", false)); + ui->BGMVolumeSlider->setValue(toml::find_or(data, "General", "BGMvolume", 50)); + ui->discordRPCCheckbox->setChecked( + toml::find_or(data, "General", "enableDiscordRPC", true)); + ui->fullscreenCheckBox->setChecked(toml::find_or(data, "General", "Fullscreen", false)); + ui->separateUpdatesCheckBox->setChecked( + toml::find_or(data, "General", "separateUpdateEnabled", false)); + ui->showSplashCheckBox->setChecked(toml::find_or(data, "General", "showSplash", false)); + ui->ps4proCheckBox->setChecked(toml::find_or(data, "General", "isPS4Pro", false)); + ui->logTypeComboBox->setCurrentText( + QString::fromStdString(toml::find_or(data, "General", "logType", "async"))); + ui->logFilterLineEdit->setText( + QString::fromStdString(toml::find_or(data, "General", "logFilter", ""))); + ui->userNameLineEdit->setText( + QString::fromStdString(toml::find_or(data, "General", "userName", "shadPS4"))); + ui->debugDump->setChecked(toml::find_or(data, "Debug", "DebugDump", false)); + ui->vkValidationCheckBox->setChecked(toml::find_or(data, "Vulkan", "validation", false)); + ui->vkSyncValidationCheckBox->setChecked( + toml::find_or(data, "Vulkan", "validation_sync", false)); + ui->rdocCheckBox->setChecked(toml::find_or(data, "Vulkan", "rdocEnable", false)); #ifdef ENABLE_UPDATER - ui->updateCheckBox->setChecked(Config::autoUpdate()); - std::string updateChannel = Config::getUpdateChannel(); + ui->updateCheckBox->setChecked(toml::find_or(data, "General", "autoUpdate", false)); + std::string updateChannel = toml::find_or(data, "General", "updateChannel", ""); if (updateChannel != "Release" && updateChannel != "Nightly") { if (Common::isRelease) { updateChannel = "Release"; @@ -367,18 +296,13 @@ void SettingsDialog::LoadValuesFromConfig() { ui->updateComboBox->setCurrentText(QString::fromStdString(updateChannel)); #endif - for (const auto& dir : Config::getGameInstallDirs()) { - QString path_string; - Common::FS::PathToQString(path_string, dir); - QListWidgetItem* item = new QListWidgetItem(path_string); - ui->gameFoldersListWidget->addItem(item); - } - - QString backButtonBehavior = QString::fromStdString(Config::getBackButtonBehavior()); + QString backButtonBehavior = QString::fromStdString( + toml::find_or(data, "Input", "backButtonBehavior", "left")); int index = ui->backButtonBehaviorComboBox->findData(backButtonBehavior); ui->backButtonBehaviorComboBox->setCurrentIndex(index != -1 ? index : 0); ui->removeFolderButton->setEnabled(!ui->gameFoldersListWidget->selectedItems().isEmpty()); + ResetInstallFolders(); } void SettingsDialog::InitializeEmulatorLanguages() { @@ -554,3 +478,75 @@ bool SettingsDialog::eventFilter(QObject* obj, QEvent* event) { } return QDialog::eventFilter(obj, event); } + +void SettingsDialog::UpdateSettings() { + + const QVector TouchPadIndex = {"left", "center", "right", "none"}; + Config::setBackButtonBehavior(TouchPadIndex[ui->backButtonBehaviorComboBox->currentIndex()]); + Config::setNeoMode(ui->ps4proCheckBox->isChecked()); + Config::setFullscreenMode(ui->fullscreenCheckBox->isChecked()); + Config::setPlayBGM(ui->playBGMCheckBox->isChecked()); + Config::setNeoMode(ui->ps4proCheckBox->isChecked()); + Config::setLogType(ui->logTypeComboBox->currentText().toStdString()); + Config::setLogFilter(ui->logFilterLineEdit->text().toStdString()); + Config::setUserName(ui->userNameLineEdit->text().toStdString()); + Config::setCursorState(ui->hideCursorComboBox->currentIndex()); + Config::setCursorHideTimeout(ui->idleTimeoutSpinBox->value()); + Config::setGpuId(ui->graphicsAdapterBox->currentIndex() - 1); + Config::setBGMvolume(ui->BGMVolumeSlider->value()); + Config::setLanguage(languageIndexes[ui->consoleLanguageComboBox->currentIndex()]); + Config::setEnableDiscordRPC(ui->discordRPCCheckbox->isChecked()); + Config::setScreenWidth(ui->widthSpinBox->value()); + Config::setScreenHeight(ui->heightSpinBox->value()); + Config::setVblankDiv(ui->vblankSpinBox->value()); + Config::setDumpShaders(ui->dumpShadersCheckBox->isChecked()); + Config::setNullGpu(ui->nullGpuCheckBox->isChecked()); + Config::setSeparateUpdateEnabled(ui->separateUpdatesCheckBox->isChecked()); + Config::setShowSplash(ui->showSplashCheckBox->isChecked()); + Config::setDebugDump(ui->debugDump->isChecked()); + Config::setVkValidation(ui->vkValidationCheckBox->isChecked()); + Config::setVkSyncValidation(ui->vkSyncValidationCheckBox->isChecked()); + Config::setRdocEnabled(ui->rdocCheckBox->isChecked()); + Config::setAutoUpdate(ui->updateCheckBox->isChecked()); + Config::setUpdateChannel(ui->updateComboBox->currentText().toStdString()); + +#ifdef ENABLE_DISCORD_RPC + auto* rpc = Common::Singleton::Instance(); + if (Config::getEnableDiscordRPC()) { + rpc->init(); + rpc->setStatusIdling(); + } else { + rpc->shutdown(); + } +#endif + + BackgroundMusicPlayer::getInstance().setVolume(ui->BGMVolumeSlider->value()); +} + +void SettingsDialog::ResetInstallFolders() { + + std::filesystem::path userdir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); + const toml::value data = toml::parse(userdir / "config.toml"); + + if (data.contains("GUI")) { + const toml::value& gui = data.at("GUI"); + const auto install_dir_array = + toml::find_or>(gui, "installDirs", {}); + std::vector settings_install_dirs_config = {}; + + for (const auto& dir : install_dir_array) { + if (std::find(settings_install_dirs_config.begin(), settings_install_dirs_config.end(), + dir) == settings_install_dirs_config.end()) { + settings_install_dirs_config.push_back(dir); + } + } + + for (const auto& dir : settings_install_dirs_config) { + QString path_string; + Common::FS::PathToQString(path_string, dir); + QListWidgetItem* item = new QListWidgetItem(path_string); + ui->gameFoldersListWidget->addItem(item); + } + Config::setGameInstallDirs(settings_install_dirs_config); + } +} \ No newline at end of file diff --git a/src/qt_gui/settings_dialog.h b/src/qt_gui/settings_dialog.h index 8cdded980..987b35d45 100644 --- a/src/qt_gui/settings_dialog.h +++ b/src/qt_gui/settings_dialog.h @@ -31,6 +31,8 @@ signals: private: void LoadValuesFromConfig(); + void UpdateSettings(); + void ResetInstallFolders(); void InitializeEmulatorLanguages(); void OnLanguageChanged(int index); void OnCursorStateChanged(s16 index); diff --git a/src/qt_gui/translations/en.ts b/src/qt_gui/translations/en.ts index 7ae583040..9eccec8ea 100644 --- a/src/qt_gui/translations/en.ts +++ b/src/qt_gui/translations/en.ts @@ -1159,7 +1159,7 @@ separateUpdatesCheckBox - Enable Separate Update Folder:\nEnables installing game updates into a separate folder for easy management. + Enable Separate Update Folder:\nEnables installing game updates into a separate folder for easy management.\nThis can be manually created by adding the extracted update to the game folder with the name "CUSA00000-UPDATE" where the CUSA ID matches the game's ID. diff --git a/src/sdl_window.cpp b/src/sdl_window.cpp index f6b57436f..4b13844b8 100644 --- a/src/sdl_window.cpp +++ b/src/sdl_window.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include #include #include @@ -68,6 +69,9 @@ static Uint32 SDLCALL PollController(void* userdata, SDL_TimerID timer_id, Uint3 WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_, std::string_view window_title) : width{width_}, height{height_}, controller{controller_} { + if (!SDL_SetHint(SDL_HINT_APP_NAME, "shadPS4")) { + UNREACHABLE_MSG("Failed to set SDL window hint: {}", SDL_GetError()); + } if (!SDL_Init(SDL_INIT_VIDEO)) { UNREACHABLE_MSG("Failed to initialize SDL video subsystem: {}", SDL_GetError()); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 23800fc49..e545e8e36 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -1,6 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later - #include #include #include @@ -13,6 +12,7 @@ #include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/program.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/types.h" namespace Shader::Backend::SPIRV { @@ -72,7 +72,10 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { return arg.VectorReg(); } else if constexpr (std::is_same_v) { return arg.StringLiteral(); + } else if constexpr (std::is_same_v) { + return arg.Patch(); } + UNREACHABLE(); } template @@ -206,6 +209,32 @@ Id DefineMain(EmitContext& ctx, const IR::Program& program) { return main; } +spv::ExecutionMode ExecutionMode(AmdGpu::TessellationType primitive) { + switch (primitive) { + case AmdGpu::TessellationType::Isoline: + return spv::ExecutionMode::Isolines; + case AmdGpu::TessellationType::Triangle: + return spv::ExecutionMode::Triangles; + case AmdGpu::TessellationType::Quad: + return spv::ExecutionMode::Quads; + } + UNREACHABLE_MSG("Tessellation primitive {}", primitive); +} + +spv::ExecutionMode ExecutionMode(AmdGpu::TessellationPartitioning spacing) { + switch (spacing) { + case AmdGpu::TessellationPartitioning::Integer: + return spv::ExecutionMode::SpacingEqual; + case AmdGpu::TessellationPartitioning::FracOdd: + return spv::ExecutionMode::SpacingFractionalOdd; + case AmdGpu::TessellationPartitioning::FracEven: + return spv::ExecutionMode::SpacingFractionalEven; + default: + break; + } + UNREACHABLE_MSG("Tessellation spacing {}", spacing); +} + void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ctx) { ctx.AddCapability(spv::Capability::Image1D); ctx.AddCapability(spv::Capability::Sampled1D); @@ -222,6 +251,10 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct ctx.AddCapability(spv::Capability::StorageImageExtendedFormats); ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat); ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat); + if (profile.supports_image_load_store_lod) { + ctx.AddExtension("SPV_AMD_shader_image_load_store_lod"); + ctx.AddCapability(spv::Capability::ImageReadWriteLodAMD); + } } if (info.has_texel_buffers) { ctx.AddCapability(spv::Capability::SampledBuffer); @@ -244,36 +277,55 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct if (info.uses_group_ballot) { ctx.AddCapability(spv::Capability::GroupNonUniformBallot); } - if (info.stage == Stage::Export || info.stage == Stage::Vertex) { + const auto stage = info.l_stage; + if (stage == LogicalStage::Vertex) { ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddCapability(spv::Capability::DrawParameters); } - if (info.stage == Stage::Geometry) { + if (stage == LogicalStage::Geometry) { ctx.AddCapability(spv::Capability::Geometry); } if (info.stage == Stage::Fragment && profile.needs_manual_interpolation) { ctx.AddExtension("SPV_KHR_fragment_shader_barycentric"); ctx.AddCapability(spv::Capability::FragmentBarycentricKHR); } + if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) { + ctx.AddCapability(spv::Capability::Tessellation); + } } -void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { - const auto& info = program.info; +void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) { const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); spv::ExecutionModel execution_model{}; - switch (program.info.stage) { - case Stage::Compute: { + switch (info.l_stage) { + case LogicalStage::Compute: { const std::array workgroup_size{ctx.runtime_info.cs_info.workgroup_size}; execution_model = spv::ExecutionModel::GLCompute; ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], workgroup_size[2]); break; } - case Stage::Export: - case Stage::Vertex: + case LogicalStage::Vertex: execution_model = spv::ExecutionModel::Vertex; break; - case Stage::Fragment: + case LogicalStage::TessellationControl: + execution_model = spv::ExecutionModel::TessellationControl; + ctx.AddCapability(spv::Capability::Tessellation); + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, + ctx.runtime_info.hs_info.NumOutputControlPoints()); + break; + case LogicalStage::TessellationEval: { + execution_model = spv::ExecutionModel::TessellationEvaluation; + const auto& vs_info = ctx.runtime_info.vs_info; + ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_type)); + ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_partitioning)); + ctx.AddExecutionMode(main, + vs_info.tess_topology == AmdGpu::TessellationTopology::TriangleCcw + ? spv::ExecutionMode::VertexOrderCcw + : spv::ExecutionMode::VertexOrderCw); + break; + } + case LogicalStage::Fragment: execution_model = spv::ExecutionModel::Fragment; if (ctx.profile.lower_left_origin_mode) { ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft); @@ -288,7 +340,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); } break; - case Stage::Geometry: + case LogicalStage::Geometry: execution_model = spv::ExecutionModel::Geometry; ctx.AddExecutionMode(main, GetInputPrimitiveType(ctx.runtime_info.gs_info.in_primitive)); ctx.AddExecutionMode(main, @@ -299,7 +351,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { ctx.runtime_info.gs_info.num_invocations); break; default: - throw NotImplementedException("Stage {}", u32(program.info.stage)); + UNREACHABLE_MSG("Stage {}", u32(info.stage)); } ctx.AddEntryPoint(execution_model, main, "main", interfaces); } @@ -345,7 +397,7 @@ std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in const IR::Program& program, Bindings& binding) { EmitContext ctx{profile, runtime_info, program.info, binding}; const Id main{DefineMain(ctx, program)}; - DefineEntryPoint(program, ctx, main); + DefineEntryPoint(program.info, ctx, main); SetupCapabilities(program.info, profile, ctx); SetupFloatMode(ctx, profile, runtime_info, main); PatchPhiNodes(program, ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp index 22b3523aa..611225e8b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp @@ -18,9 +18,16 @@ void MemoryBarrier(EmitContext& ctx, spv::Scope scope) { void EmitBarrier(EmitContext& ctx) { const auto execution{spv::Scope::Workgroup}; - const auto memory{spv::Scope::Workgroup}; - const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease | - spv::MemorySemanticsMask::WorkgroupMemory}; + spv::Scope memory; + spv::MemorySemanticsMask memory_semantics; + if (ctx.l_stage == Shader::LogicalStage::TessellationControl) { + memory = spv::Scope::Invocation; + memory_semantics = spv::MemorySemanticsMask::MaskNone; + } else { + memory = spv::Scope::Workgroup; + memory_semantics = + spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::WorkgroupMemory; + } ctx.OpControlBarrier(ctx.ConstU32(static_cast(execution)), ctx.ConstU32(static_cast(memory)), ctx.ConstU32(static_cast(memory_semantics))); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index d005169c4..f3db6af56 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -4,6 +4,9 @@ #include "common/assert.h" #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" +#include "shader_recompiler/ir/attribute.h" +#include "shader_recompiler/ir/patch.h" +#include "shader_recompiler/runtime_info.h" #include @@ -45,13 +48,19 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) { Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { if (IR::IsParam(attr)) { - const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; - const auto& info{ctx.output_params.at(index)}; - ASSERT(info.num_components > 0); - if (info.num_components == 1) { - return info.id; + const u32 attr_index{u32(attr) - u32(IR::Attribute::Param0)}; + if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) { + const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]); + return ctx.OpAccessChain(component_ptr, ctx.output_attr_array, ctx.ConstU32(attr_index), + ctx.ConstU32(element)); } else { - return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element)); + const auto& info{ctx.output_params.at(attr_index)}; + ASSERT(info.num_components > 0); + if (info.num_components == 1) { + return info.id; + } else { + return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element)); + } } } if (IR::IsMrt(attr)) { @@ -82,9 +91,13 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { std::pair OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr) { if (IR::IsParam(attr)) { - const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; - const auto& info{ctx.output_params.at(index)}; - return {info.component_type, info.is_integer}; + if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) { + return {ctx.F32[1], false}; + } else { + const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; + const auto& info{ctx.output_params.at(index)}; + return {info.component_type, info.is_integer}; + } } if (IR::IsMrt(attr)) { const u32 index{u32(attr) - u32(IR::Attribute::RenderTarget0)}; @@ -171,12 +184,11 @@ Id EmitReadStepRate(EmitContext& ctx, int rate_idx) { rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value)); } -Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) { +Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) { if (IR::IsPosition(attr)) { ASSERT(attr == IR::Attribute::Position0); const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); - const auto pointer{ - ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index), ctx.ConstU32(0u))}; + const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, index, ctx.ConstU32(0u))}; const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); @@ -186,7 +198,7 @@ Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)}; const auto param = ctx.input_params.at(param_id).id; const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); - const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))}; + const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)}; const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); @@ -194,9 +206,27 @@ Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u UNREACHABLE(); } -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) { - if (ctx.info.stage == Stage::Geometry) { +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) { + if (ctx.info.l_stage == LogicalStage::Geometry) { return EmitGetAttributeForGeometry(ctx, attr, comp, index); + } else if (ctx.info.l_stage == LogicalStage::TessellationControl || + ctx.info.l_stage == LogicalStage::TessellationEval) { + if (IR::IsTessCoord(attr)) { + const u32 component = attr == IR::Attribute::TessellationEvaluationPointU ? 0 : 1; + const auto component_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); + const auto pointer{ + ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))}; + return ctx.OpLoad(ctx.F32[1], pointer); + } else if (IR::IsParam(attr)) { + const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)}; + const auto param = ctx.input_params.at(param_id).id; + const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); + const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)}; + const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); + } + UNREACHABLE(); } if (IR::IsParam(attr)) { @@ -242,8 +272,14 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) { } return coord; } + case IR::Attribute::TessellationEvaluationPointU: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value)); + case IR::Attribute::TessellationEvaluationPointV: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.ConstU32(1U))); default: - throw NotImplementedException("Read attribute {}", attr); + UNREACHABLE_MSG("Read attribute {}", attr); } } @@ -266,10 +302,32 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) { return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value, ctx.u32_zero_value); case IR::Attribute::PrimitiveId: - ASSERT(ctx.info.stage == Stage::Geometry); return ctx.OpLoad(ctx.U32[1], ctx.primitive_id); + case IR::Attribute::InvocationId: + ASSERT(ctx.info.l_stage == LogicalStage::Geometry || + ctx.info.l_stage == LogicalStage::TessellationControl); + return ctx.OpLoad(ctx.U32[1], ctx.invocation_id); + case IR::Attribute::PatchVertices: + ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl); + return ctx.OpLoad(ctx.U32[1], ctx.patch_vertices); + case IR::Attribute::PackedHullInvocationInfo: { + ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl); + // [0:8]: patch id within VGT + // [8:12]: output control point id + // But 0:8 should be treated as 0 for attribute addressing purposes + if (ctx.runtime_info.hs_info.IsPassthrough()) { + // Gcn shader would run with 1 thread, but we need to run a thread for + // each output control point. + // If Gcn shader uses this value, we should make sure all threads in the + // Vulkan shader use 0 + return ctx.ConstU32(0u); + } else { + const Id invocation_id = ctx.OpLoad(ctx.U32[1], ctx.invocation_id); + return ctx.OpShiftLeftLogical(ctx.U32[1], invocation_id, ctx.ConstU32(8u)); + } + } default: - throw NotImplementedException("Read U32 attribute {}", attr); + UNREACHABLE_MSG("Read U32 attribute {}", attr); } } @@ -287,6 +345,58 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen } } +Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index) { + const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); + return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.input_attr_array, + vertex_index, attr_index, comp_index)); +} + +void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index) { + // Implied vertex index is invocation_id + const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]); + Id pointer = + ctx.OpAccessChain(component_ptr, ctx.output_attr_array, + ctx.OpLoad(ctx.U32[1], ctx.invocation_id), attr_index, comp_index); + ctx.OpStore(pointer, value); +} + +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) { + const u32 index{IR::GenericPatchIndex(patch)}; + const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))}; + const Id type{ctx.l_stage == LogicalStage::TessellationControl ? ctx.output_f32 + : ctx.input_f32}; + const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)}; + return ctx.OpLoad(ctx.F32[1], pointer); +} + +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { + const Id pointer{[&] { + if (IR::IsGeneric(patch)) { + const u32 index{IR::GenericPatchIndex(patch)}; + const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))}; + return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element); + } + switch (patch) { + case IR::Patch::TessellationLodLeft: + case IR::Patch::TessellationLodRight: + case IR::Patch::TessellationLodTop: + case IR::Patch::TessellationLodBottom: { + const u32 index{static_cast(patch) - u32(IR::Patch::TessellationLodLeft)}; + const Id index_id{ctx.ConstU32(index)}; + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id); + } + case IR::Patch::TessellationLodInteriorU: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, + ctx.u32_zero_value); + case IR::Patch::TessellationLodInteriorV: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.ConstU32(1u)); + default: + UNREACHABLE_MSG("Patch {}", u32(patch)); + } + }()}; + ctx.OpStore(pointer, value); +} + template static Id EmitLoadBufferU32xN(EmitContext& ctx, u32 handle, Id address) { auto& buffer = ctx.buffers[handle]; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index e822eabef..a63be87e2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -87,6 +87,14 @@ Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b)); } +Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFDiv(ctx.F32[1], a, b)); +} + +Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFDiv(ctx.F64[1], a, b)); +} + Id EmitFPNeg16(EmitContext& ctx, Id value) { return ctx.OpFNegate(ctx.F16[1], value); } @@ -217,10 +225,34 @@ Id EmitFPTrunc64(EmitContext& ctx, Id value) { return ctx.OpTrunc(ctx.F64[1], value); } -Id EmitFPFract(EmitContext& ctx, Id value) { +Id EmitFPFract32(EmitContext& ctx, Id value) { return ctx.OpFract(ctx.F32[1], value); } +Id EmitFPFract64(EmitContext& ctx, Id value) { + return ctx.OpFract(ctx.F64[1], value); +} + +Id EmitFPFrexpSig32(EmitContext& ctx, Id value) { + const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f32, value); + return ctx.OpCompositeExtract(ctx.F32[1], frexp, 0); +} + +Id EmitFPFrexpSig64(EmitContext& ctx, Id value) { + const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f64, value); + return ctx.OpCompositeExtract(ctx.F64[1], frexp, 0); +} + +Id EmitFPFrexpExp32(EmitContext& ctx, Id value) { + const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f32, value); + return ctx.OpCompositeExtract(ctx.U32[1], frexp, 1); +} + +Id EmitFPFrexpExp64(EmitContext& ctx, Id value) { + const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f64, value); + return ctx.OpCompositeExtract(ctx.U32[1], frexp, 1); +} + Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpFOrdEqual(ctx.U1[1], lhs, rhs); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index fe2660705..e5d4f3077 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -130,8 +130,8 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); ImageOperands operands; - operands.AddOffset(ctx, offset); operands.Add(spv::ImageOperandsMask::Lod, lod); + operands.AddOffset(ctx, offset); const Id sample = ctx.OpImageSampleDrefExplicitLod(result_type, sampled_image, coords, dref, operands.mask, operands.operands); const Id sample_typed = texture.is_integer ? ctx.OpBitcast(ctx.F32[1], sample) : sample; @@ -168,8 +168,8 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], texels) : texels; } -Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset, - Id lod, Id ms) { +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, + const IR::Value& offset, Id ms) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id result_type = texture.data_types->Get(4); @@ -236,15 +236,22 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], sample) : sample; } -Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { +Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id lod) { UNREACHABLE_MSG("SPIR-V Instruction"); } -void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color) { +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id color) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id color_type = texture.data_types->Get(4); - ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color)); + ImageOperands operands; + if (ctx.profile.supports_image_load_store_lod) { + operands.Add(spv::ImageOperandsMask::Lod, lod); + } else if (Sirit::ValidId(lod)) { + LOG_WARNING(Render, "Image write with LOD not supported by driver"); + } + ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color), operands.mask, + operands.operands); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index cc3db880c..f71c61af6 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -9,6 +9,7 @@ namespace Shader::IR { enum class Attribute : u64; enum class ScalarReg : u32; +enum class Patch : u64; class Inst; class Value; } // namespace Shader::IR @@ -27,8 +28,6 @@ Id EmitConditionRef(EmitContext& ctx, const IR::Value& value); void EmitReference(EmitContext&); void EmitPhiMove(EmitContext&); void EmitJoin(EmitContext& ctx); -void EmitWorkgroupMemoryBarrier(EmitContext& ctx); -void EmitDeviceMemoryBarrier(EmitContext& ctx); void EmitGetScc(EmitContext& ctx); void EmitGetExec(EmitContext& ctx); void EmitGetVcc(EmitContext& ctx); @@ -85,9 +84,13 @@ Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index); Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); +Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index); +void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index); +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); void EmitSetSampleMask(EmitContext& ctx, Id value); void EmitSetFragDepth(EmitContext& ctx, Id value); @@ -189,6 +192,8 @@ Id EmitFPMin64(EmitContext& ctx, Id a, Id b); Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPNeg16(EmitContext& ctx, Id value); Id EmitFPNeg32(EmitContext& ctx, Id value); Id EmitFPNeg64(EmitContext& ctx, Id value); @@ -220,7 +225,12 @@ Id EmitFPCeil64(EmitContext& ctx, Id value); Id EmitFPTrunc16(EmitContext& ctx, Id value); Id EmitFPTrunc32(EmitContext& ctx, Id value); Id EmitFPTrunc64(EmitContext& ctx, Id value); -Id EmitFPFract(EmitContext& ctx, Id value); +Id EmitFPFract32(EmitContext& ctx, Id value); +Id EmitFPFract64(EmitContext& ctx, Id value); +Id EmitFPFrexpSig32(EmitContext& ctx, Id value); +Id EmitFPFrexpSig64(EmitContext& ctx, Id value); +Id EmitFPFrexpExp32(EmitContext& ctx, Id value); +Id EmitFPFrexpExp64(EmitContext& ctx, Id value); Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); @@ -385,14 +395,14 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset); Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset, Id dref); -Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset, - Id lod, Id ms); +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, + const IR::Value& offset, Id ms); Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips); Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords); Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id derivatives_dx, Id derivatives_dy, const IR::Value& offset, const IR::Value& lod_clamp); -Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); -void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color); +Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id lod); +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id color); Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 5c7278c6b..255a3e2b2 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -6,6 +6,7 @@ #include "shader_recompiler/backend/spirv/spirv_emit_context.h" #include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/ir/passes/srt.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/types.h" #include @@ -34,7 +35,7 @@ std::string_view StageName(Stage stage) { case Stage::Compute: return "cs"; } - throw InvalidArgument("Invalid stage {}", u32(stage)); + UNREACHABLE_MSG("Invalid hw stage {}", u32(stage)); } static constexpr u32 NumVertices(AmdGpu::PrimitiveType type) { @@ -65,7 +66,7 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, const Info& info_, Bindings& binding_) : Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_}, - profile{profile_}, stage{info.stage}, binding{binding_} { + profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} { AddCapability(spv::Capability::Shader); DefineArithmeticTypes(); DefineInterfaces(); @@ -147,6 +148,10 @@ void EmitContext::DefineArithmeticTypes() { full_result_i32x2 = Name(TypeStruct(S32[1], S32[1]), "full_result_i32x2"); full_result_u32x2 = Name(TypeStruct(U32[1], U32[1]), "full_result_u32x2"); + frexp_result_f32 = Name(TypeStruct(F32[1], U32[1]), "frexp_result_f32"); + if (info.uses_fp64) { + frexp_result_f64 = Name(TypeStruct(F64[1], U32[1]), "frexp_result_f64"); + } } void EmitContext::DefineInterfaces() { @@ -264,9 +269,8 @@ void EmitContext::DefineInputs() { U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input); Decorate(subgroup_local_invocation_id, spv::Decoration::Flat); } - switch (stage) { - case Stage::Export: - case Stage::Vertex: { + switch (l_stage) { + case LogicalStage::Vertex: { vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input); base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input); instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input); @@ -290,7 +294,7 @@ void EmitContext::DefineInputs() { }); // Note that we pass index rather than Id input_params[attrib.semantic] = SpirvAttribute{ - .id = rate_idx, + .id = {rate_idx}, .pointer_type = input_u32, .component_type = U32[1], .num_components = std::min(attrib.num_elements, num_components), @@ -307,12 +311,11 @@ void EmitContext::DefineInputs() { } input_params[attrib.semantic] = GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false); - interfaces.push_back(id); } } break; } - case Stage::Fragment: + case LogicalStage::Fragment: frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input); frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output); front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input); @@ -347,15 +350,14 @@ void EmitContext::DefineInputs() { } input_params[semantic] = GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false); - interfaces.push_back(attr_id); } break; - case Stage::Compute: + case LogicalStage::Compute: workgroup_id = DefineVariable(U32[3], spv::BuiltIn::WorkgroupId, spv::StorageClass::Input); local_invocation_id = DefineVariable(U32[3], spv::BuiltIn::LocalInvocationId, spv::StorageClass::Input); break; - case Stage::Geometry: { + case LogicalStage::Geometry: { primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input); const auto gl_per_vertex = Name(TypeStruct(TypeVector(F32[1], 4), F32[1], TypeArray(F32[1], ConstU32(1u))), @@ -379,9 +381,50 @@ void EmitContext::DefineInputs() { for (int param_id = 0; param_id < num_params; ++param_id) { const Id type{TypeArray(F32[4], ConstU32(num_verts_in))}; const Id id{DefineInput(type, param_id)}; - Name(id, fmt::format("in_attr{}", param_id)); + Name(id, fmt::format("gs_in_attr{}", param_id)); input_params[param_id] = {id, input_f32, F32[1], 4}; - interfaces.push_back(id); + } + break; + } + case LogicalStage::TessellationControl: { + invocation_id = + DefineVariable(U32[1], spv::BuiltIn::InvocationId, spv::StorageClass::Input); + patch_vertices = + DefineVariable(U32[1], spv::BuiltIn::PatchVertices, spv::StorageClass::Input); + primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input); + + const u32 num_attrs = runtime_info.hs_info.ls_stride >> 4; + if (num_attrs > 0) { + const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))}; + // The input vertex count isn't statically known, so make length 32 (what glslang does) + const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))}; + input_attr_array = DefineInput(patch_array_type, 0); + Name(input_attr_array, "in_attrs"); + } + break; + } + case LogicalStage::TessellationEval: { + tess_coord = DefineInput(F32[3], std::nullopt, spv::BuiltIn::TessCoord); + primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input); + + const u32 num_attrs = runtime_info.vs_info.hs_output_cp_stride >> 4; + if (num_attrs > 0) { + const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))}; + // The input vertex count isn't statically known, so make length 32 (what glslang does) + const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))}; + input_attr_array = DefineInput(patch_array_type, 0); + Name(input_attr_array, "in_attrs"); + } + + u32 patch_base_location = runtime_info.vs_info.hs_output_cp_stride >> 4; + for (size_t index = 0; index < 30; ++index) { + if (!(info.uses_patches & (1U << index))) { + continue; + } + const Id id{DefineInput(F32[4], patch_base_location + index)}; + Decorate(id, spv::Decoration::Patch); + Name(id, fmt::format("patch_in{}", index)); + patches[index] = id; } break; } @@ -391,9 +434,81 @@ void EmitContext::DefineInputs() { } void EmitContext::DefineOutputs() { - switch (stage) { - case Stage::Export: - case Stage::Vertex: { + switch (l_stage) { + case LogicalStage::Vertex: { + // No point in defining builtin outputs (i.e. position) unless next stage is fragment? + // Might cause problems linking with tcs + + output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); + const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) || + info.stores.Get(IR::Attribute::Position2) || + info.stores.Get(IR::Attribute::Position3); + if (has_extra_pos_stores) { + const Id type{TypeArray(F32[1], ConstU32(8U))}; + clip_distances = + DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output); + cull_distances = + DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output); + } + if (stage == Shader::Stage::Local && runtime_info.ls_info.links_with_tcs) { + const u32 num_attrs = runtime_info.ls_info.ls_stride >> 4; + if (num_attrs > 0) { + const Id type{TypeArray(F32[4], ConstU32(num_attrs))}; + output_attr_array = DefineOutput(type, 0); + Name(output_attr_array, "out_attrs"); + } + } else { + for (u32 i = 0; i < IR::NumParams; i++) { + const IR::Attribute param{IR::Attribute::Param0 + i}; + if (!info.stores.GetAny(param)) { + continue; + } + const u32 num_components = info.stores.NumComponents(param); + const Id id{DefineOutput(F32[num_components], i)}; + Name(id, fmt::format("out_attr{}", i)); + output_params[i] = + GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true); + } + } + break; + } + case LogicalStage::TessellationControl: { + if (info.stores_tess_level_outer) { + const Id type{TypeArray(F32[1], ConstU32(4U))}; + output_tess_level_outer = + DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelOuter); + Decorate(output_tess_level_outer, spv::Decoration::Patch); + } + if (info.stores_tess_level_inner) { + const Id type{TypeArray(F32[1], ConstU32(2U))}; + output_tess_level_inner = + DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelInner); + Decorate(output_tess_level_inner, spv::Decoration::Patch); + } + + const u32 num_attrs = runtime_info.hs_info.hs_output_cp_stride >> 4; + if (num_attrs > 0) { + const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))}; + // The input vertex count isn't statically known, so make length 32 (what glslang does) + const Id patch_array_type{TypeArray( + per_vertex_type, ConstU32(runtime_info.hs_info.NumOutputControlPoints()))}; + output_attr_array = DefineOutput(patch_array_type, 0); + Name(output_attr_array, "out_attrs"); + } + + u32 patch_base_location = runtime_info.hs_info.hs_output_cp_stride >> 4; + for (size_t index = 0; index < 30; ++index) { + if (!(info.uses_patches & (1U << index))) { + continue; + } + const Id id{DefineOutput(F32[4], patch_base_location + index)}; + Decorate(id, spv::Decoration::Patch); + Name(id, fmt::format("patch_out{}", index)); + patches[index] = id; + } + break; + } + case LogicalStage::TessellationEval: { output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) || info.stores.Get(IR::Attribute::Position2) || @@ -415,11 +530,10 @@ void EmitContext::DefineOutputs() { Name(id, fmt::format("out_attr{}", i)); output_params[i] = GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true); - interfaces.push_back(id); } break; } - case Stage::Fragment: + case LogicalStage::Fragment: for (u32 i = 0; i < IR::NumRenderTargets; i++) { const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i}; if (!info.stores.GetAny(mrt)) { @@ -431,22 +545,22 @@ void EmitContext::DefineOutputs() { const Id id{DefineOutput(type, i)}; Name(id, fmt::format("frag_color{}", i)); frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true); - interfaces.push_back(id); } break; - case Stage::Geometry: { + case LogicalStage::Geometry: { output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) { const Id id{DefineOutput(F32[4], attr_id)}; Name(id, fmt::format("out_attr{}", attr_id)); output_params[attr_id] = {id, output_f32, F32[1], 4u}; - interfaces.push_back(id); } break; } - default: + case LogicalStage::Compute: break; + default: + UNREACHABLE(); } } @@ -582,6 +696,10 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) { image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) { return spv::ImageFormat::R32ui; } + if (image.GetDataFmt() == AmdGpu::DataFormat::Format32 && + image.GetNumberFmt() == AmdGpu::NumberFormat::Sint) { + return spv::ImageFormat::R32i; + } if (image.GetDataFmt() == AmdGpu::DataFormat::Format32 && image.GetNumberFmt() == AmdGpu::NumberFormat::Float) { return spv::ImageFormat::R32f; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 4e5e7dd3b..583d96b99 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -46,14 +46,18 @@ public: void DefineBufferOffsets(); void DefineInterpolatedAttribs(); - [[nodiscard]] Id DefineInput(Id type, u32 location) { - const Id input_id{DefineVar(type, spv::StorageClass::Input)}; - Decorate(input_id, spv::Decoration::Location, location); + [[nodiscard]] Id DefineInput(Id type, std::optional location = std::nullopt, + std::optional builtin = std::nullopt) { + const Id input_id{DefineVariable(type, builtin, spv::StorageClass::Input)}; + if (location) { + Decorate(input_id, spv::Decoration::Location, *location); + } return input_id; } - [[nodiscard]] Id DefineOutput(Id type, std::optional location = std::nullopt) { - const Id output_id{DefineVar(type, spv::StorageClass::Output)}; + [[nodiscard]] Id DefineOutput(Id type, std::optional location = std::nullopt, + std::optional builtin = std::nullopt) { + const Id output_id{DefineVariable(type, builtin, spv::StorageClass::Output)}; if (location) { Decorate(output_id, spv::Decoration::Location, *location); } @@ -131,7 +135,8 @@ public: const Info& info; const RuntimeInfo& runtime_info; const Profile& profile; - Stage stage{}; + Stage stage; + LogicalStage l_stage{}; Id void_id{}; Id U8{}; @@ -148,6 +153,8 @@ public: Id full_result_i32x2; Id full_result_u32x2; + Id frexp_result_f32; + Id frexp_result_f64; Id pi_x2; @@ -186,8 +193,15 @@ public: Id clip_distances{}; Id cull_distances{}; + Id patch_vertices{}; + Id output_tess_level_outer{}; + Id output_tess_level_inner{}; + Id tess_coord; + std::array patches{}; + Id workgroup_id{}; Id local_invocation_id{}; + Id invocation_id{}; // for instanced geoshaders or output vertices within TCS patch Id subgroup_local_invocation_id{}; Id image_u32{}; @@ -250,6 +264,8 @@ public: bool is_loaded{}; s32 buffer_handle{-1}; }; + Id input_attr_array; + Id output_attr_array; std::array input_params{}; std::array output_params{}; std::array frag_outputs{}; diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index 8c3122b28..0816ec088 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -80,6 +80,8 @@ void CFG::EmitLabels() { if (inst.IsUnconditionalBranch()) { const u32 target = inst.BranchTarget(pc); AddLabel(target); + // Emit this label so that the block ends with s_branch instruction + AddLabel(pc + inst.length); } else if (inst.IsConditionalBranch()) { const u32 true_label = inst.BranchTarget(pc); const u32 false_label = pc + inst.length; diff --git a/src/shader_recompiler/frontend/tessellation.h b/src/shader_recompiler/frontend/tessellation.h new file mode 100644 index 000000000..bfcaa4fdc --- /dev/null +++ b/src/shader_recompiler/frontend/tessellation.h @@ -0,0 +1,38 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/types.h" + +namespace Shader { + +struct TessellationDataConstantBuffer { + u32 ls_stride; + u32 hs_cp_stride; // HullStateConstants::m_cpStride != 0 ? HullStateConstants::m_cpStride : + // ls_stride + u32 num_patches; // num patches submitted in threadgroup + u32 hs_output_base; // HullStateConstants::m_numInputCP::m_cpStride != 0 ? + // HullStateConstants::m_numInputCP * ls_stride * num_patches : 0 + // basically 0 when passthrough + u32 patch_const_size; // 16 * num_patch_attrs + u32 patch_const_base; // hs_output_base + patch_output_size + u32 patch_output_size; // output_cp_stride * num_output_cp_per_patch + f32 off_chip_tessellation_factor_threshold; + u32 first_edge_tess_factor_index; +}; + +// Assign names to dword fields of TessellationDataConstantBuffer +enum class TessConstantAttribute : u32 { + LsStride, + HsCpStride, + HsNumPatch, + HsOutputBase, + PatchConstSize, + PatchConstBase, + PatchOutputSize, + OffChipTessellationFactorThreshold, + FirstEdgeTessFactorIndex, +}; + +} // namespace Shader \ No newline at end of file diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index 5914f9fe3..116935b94 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -1,8 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later - #include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/ir/reg.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Gcn { @@ -73,10 +73,11 @@ void Translator::EmitDataShare(const GcnInst& inst) { void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) { const IR::U32 value{GetSrc(inst.src[0])}; - if (info.stage != Stage::Compute) { - SetDst(inst.dst[0], value); - } else { + if (info.l_stage == LogicalStage::Compute || + info.l_stage == LogicalStage::TessellationControl) { SetDst(inst.dst[0], ir.ReadFirstLane(value)); + } else { + SetDst(inst.dst[0], value); } } diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index f82f8fc1b..5927aa696 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -13,6 +13,11 @@ void Translator::EmitExport(const GcnInst& inst) { const auto& exp = inst.control.exp; const IR::Attribute attrib{exp.target}; + if (attrib == IR::Attribute::Depth && exp.en != 0 && exp.en != 1) { + LOG_WARNING(Render_Vulkan, "Unsupported depth export"); + return; + } + const std::array vsrc = { IR::VectorReg(inst.src[0].code), IR::VectorReg(inst.src[1].code), diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 5b411d83e..f96fd0f40 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include +#include "common/assert.h" #include "shader_recompiler/frontend/translate/translate.h" namespace Shader::Gcn { @@ -78,8 +80,10 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { return S_BFM_B32(inst); case Opcode::S_MUL_I32: return S_MUL_I32(inst); + case Opcode::S_BFE_I32: + return S_BFE(inst, true); case Opcode::S_BFE_U32: - return S_BFE_U32(inst); + return S_BFE(inst, false); case Opcode::S_ABSDIFF_I32: return S_ABSDIFF_I32(inst); @@ -94,8 +98,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { break; case Opcode::S_BREV_B32: return S_BREV_B32(inst); - case Opcode::S_BCNT1_I32_B64: - return S_BCNT1_I32_B64(inst); + case Opcode::S_BCNT1_I32_B32: + return S_BCNT1_I32_B32(inst); case Opcode::S_FF1_I32_B32: return S_FF1_I32_B32(inst); case Opcode::S_AND_SAVEEXEC_B64: @@ -157,8 +161,9 @@ void Translator::EmitSOPK(const GcnInst& inst) { switch (inst.opcode) { // SOPK case Opcode::S_MOVK_I32: - return S_MOVK(inst); - + return S_MOVK(inst, false); + case Opcode::S_CMOVK_I32: + return S_MOVK(inst, true); case Opcode::S_CMPK_EQ_I32: return S_CMPK(ConditionOp::EQ, true, inst); case Opcode::S_CMPK_LG_I32: @@ -434,12 +439,12 @@ void Translator::S_MUL_I32(const GcnInst& inst) { SetDst(inst.dst[0], ir.IMul(GetSrc(inst.src[0]), GetSrc(inst.src[1]))); } -void Translator::S_BFE_U32(const GcnInst& inst) { +void Translator::S_BFE(const GcnInst& inst, bool is_signed) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; const IR::U32 offset{ir.BitwiseAnd(src1, ir.Imm32(0x1F))}; const IR::U32 count{ir.BitFieldExtract(src1, ir.Imm32(16), ir.Imm32(7))}; - const IR::U32 result{ir.BitFieldExtract(src0, offset, count)}; + const IR::U32 result{ir.BitFieldExtract(src0, offset, count, is_signed)}; SetDst(inst.dst[0], result); ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); } @@ -454,13 +459,16 @@ void Translator::S_ABSDIFF_I32(const GcnInst& inst) { // SOPK -void Translator::S_MOVK(const GcnInst& inst) { - const auto simm16 = inst.control.sopk.simm; - if (simm16 & (1 << 15)) { - // TODO: need to verify the case of imm sign extension - UNREACHABLE(); +void Translator::S_MOVK(const GcnInst& inst, bool is_conditional) { + const s16 simm16 = inst.control.sopk.simm; + // do the sign extension + const s32 simm32 = static_cast(simm16); + IR::U32 val = ir.Imm32(simm32); + if (is_conditional) { + // if !SCC its a NOP + val = IR::U32{ir.Select(ir.GetScc(), val, GetSrc(inst.dst[0]))}; } - SetDst(inst.dst[0], ir.Imm32(simm16)); + SetDst(inst.dst[0], val); } void Translator::S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst) { @@ -571,7 +579,7 @@ void Translator::S_BREV_B32(const GcnInst& inst) { SetDst(inst.dst[0], ir.BitReverse(GetSrc(inst.src[0]))); } -void Translator::S_BCNT1_I32_B64(const GcnInst& inst) { +void Translator::S_BCNT1_I32_B32(const GcnInst& inst) { const IR::U32 result = ir.BitCount(GetSrc(inst.src[0])); SetDst(inst.dst[0], result); ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); @@ -594,6 +602,8 @@ void Translator::S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& in return ir.GetVcc(); case OperandField::ScalarGPR: return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code)); + case OperandField::ExecLo: + return ir.GetExec(); default: UNREACHABLE(); } diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 97978ff6b..a14bff706 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -8,6 +8,8 @@ #include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/info.h" +#include "shader_recompiler/ir/attribute.h" +#include "shader_recompiler/ir/reg.h" #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/types.h" @@ -34,9 +36,8 @@ void Translator::EmitPrologue() { } IR::VectorReg dst_vreg = IR::VectorReg::V0; - switch (info.stage) { - case Stage::Vertex: - case Stage::Export: + switch (info.l_stage) { + case LogicalStage::Vertex: // v0: vertex ID, always present ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId)); // v1: instance ID, step rate 0 @@ -52,7 +53,7 @@ void Translator::EmitPrologue() { ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId)); } break; - case Stage::Fragment: + case LogicalStage::Fragment: dst_vreg = IR::VectorReg::V0; if (runtime_info.fs_info.addr_flags.persp_sample_ena) { ++dst_vreg; // I @@ -122,7 +123,30 @@ void Translator::EmitPrologue() { } } break; - case Stage::Compute: + case LogicalStage::TessellationControl: { + // Should be laid out like: + // [0:8]: patch id within VGT + // [8:12]: output control point id + ir.SetVectorReg(IR::VectorReg::V1, + ir.GetAttributeU32(IR::Attribute::PackedHullInvocationInfo)); + // TODO PrimitiveId is probably V2 but haven't seen it yet + break; + } + case LogicalStage::TessellationEval: + ir.SetVectorReg(IR::VectorReg::V0, + ir.GetAttribute(IR::Attribute::TessellationEvaluationPointU)); + ir.SetVectorReg(IR::VectorReg::V1, + ir.GetAttribute(IR::Attribute::TessellationEvaluationPointV)); + // V2 is similar to PrimitiveID but not the same. It seems to only be used in + // compiler-generated address calculations. Its probably the patch id within the + // patches running locally on a given VGT (or CU, whichever is the granularity of LDS + // memory) + // Set to 0. See explanation in comment describing hull/domain passes + ir.SetVectorReg(IR::VectorReg::V2, ir.Imm32(0u)); + // V3 is the actual PrimitiveID as intended by the shader author. + ir.SetVectorReg(IR::VectorReg::V3, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); + break; + case LogicalStage::Compute: ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 0)); ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1)); ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2)); @@ -137,7 +161,7 @@ void Translator::EmitPrologue() { ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2)); } break; - case Stage::Geometry: + case LogicalStage::Geometry: switch (runtime_info.gs_info.out_primitive[0]) { case AmdGpu::GsOutputPrimitiveType::TriangleStrip: ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2 @@ -152,7 +176,7 @@ void Translator::EmitPrologue() { ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); break; default: - throw NotImplementedException("Unknown shader stage"); + UNREACHABLE_MSG("Unknown shader stage"); } } @@ -415,7 +439,8 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra ir.SetVectorReg(IR::VectorReg(operand.code + 1), hi); return ir.SetVectorReg(IR::VectorReg(operand.code), lo); case OperandField::VccLo: - UNREACHABLE(); + ir.SetVccLo(lo); + return ir.SetVccHi(hi); case OperandField::VccHi: UNREACHABLE(); case OperandField::M0: @@ -503,7 +528,8 @@ void Translate(IR::Block* block, u32 pc, std::span inst_list, Inf // Special case for emitting fetch shader. if (inst.opcode == Opcode::S_SWAPPC_B64) { - ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export); + ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export || + info.stage == Stage::Local); translator.EmitFetch(inst); continue; } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 43f3ccef2..218b66d74 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -94,12 +94,13 @@ public: void S_ASHR_I32(const GcnInst& inst); void S_BFM_B32(const GcnInst& inst); void S_MUL_I32(const GcnInst& inst); - void S_BFE_U32(const GcnInst& inst); + void S_BFE(const GcnInst& inst, bool is_signed); + void S_BFE_I32(const GcnInst& inst); void S_ABSDIFF_I32(const GcnInst& inst); void S_NOT_B32(const GcnInst& inst); // SOPK - void S_MOVK(const GcnInst& inst); + void S_MOVK(const GcnInst& inst, bool is_conditional); void S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst); void S_ADDK_I32(const GcnInst& inst); void S_MULK_I32(const GcnInst& inst); @@ -109,7 +110,7 @@ public: void S_MOV_B64(const GcnInst& inst); void S_NOT_B64(const GcnInst& inst); void S_BREV_B32(const GcnInst& inst); - void S_BCNT1_I32_B64(const GcnInst& inst); + void S_BCNT1_I32_B32(const GcnInst& inst); void S_FF1_I32_B32(const GcnInst& inst); void S_GETPC_B64(u32 pc, const GcnInst& inst); void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst); @@ -200,6 +201,11 @@ public: void V_BFREV_B32(const GcnInst& inst); void V_FFBH_U32(const GcnInst& inst); void V_FFBL_B32(const GcnInst& inst); + void V_FREXP_EXP_I32_F64(const GcnInst& inst); + void V_FREXP_MANT_F64(const GcnInst& inst); + void V_FRACT_F64(const GcnInst& inst); + void V_FREXP_EXP_I32_F32(const GcnInst& inst); + void V_FREXP_MANT_F32(const GcnInst& inst); void V_MOVRELD_B32(const GcnInst& inst); void V_MOVRELS_B32(const GcnInst& inst); void V_MOVRELSD_B32(const GcnInst& inst); @@ -212,7 +218,7 @@ public: // VOP3a void V_MAD_F32(const GcnInst& inst); - void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = false); + void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = true); void V_MAD_U32_U24(const GcnInst& inst); void V_CUBEID_F32(const GcnInst& inst); void V_CUBESC_F32(const GcnInst& inst); @@ -271,7 +277,7 @@ public: // Image Memory // MIMG void IMAGE_LOAD(bool has_mip, const GcnInst& inst); - void IMAGE_STORE(const GcnInst& inst); + void IMAGE_STORE(bool has_mip, const GcnInst& inst); void IMAGE_GET_RESINFO(const GcnInst& inst); void IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst); void IMAGE_SAMPLE(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 8149230db..2b32ca2ce 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -179,6 +179,16 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_FFBH_U32(inst); case Opcode::V_FFBL_B32: return V_FFBL_B32(inst); + case Opcode::V_FREXP_EXP_I32_F64: + return V_FREXP_EXP_I32_F64(inst); + case Opcode::V_FREXP_MANT_F64: + return V_FREXP_MANT_F64(inst); + case Opcode::V_FRACT_F64: + return V_FRACT_F64(inst); + case Opcode::V_FREXP_EXP_I32_F32: + return V_FREXP_EXP_I32_F32(inst); + case Opcode::V_FREXP_MANT_F32: + return V_FREXP_MANT_F32(inst); case Opcode::V_MOVRELD_B32: return V_MOVRELD_B32(inst); case Opcode::V_MOVRELS_B32: @@ -733,7 +743,7 @@ void Translator::V_CVT_F32_UBYTE(u32 index, const GcnInst& inst) { void Translator::V_FRACT_F32(const GcnInst& inst) { const IR::F32 src0{GetSrc(inst.src[0])}; - SetDst(inst.dst[0], ir.Fract(src0)); + SetDst(inst.dst[0], ir.FPFract(src0)); } void Translator::V_TRUNC_F32(const GcnInst& inst) { @@ -822,6 +832,31 @@ void Translator::V_FFBL_B32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FindILsb(src0)); } +void Translator::V_FREXP_EXP_I32_F64(const GcnInst& inst) { + const IR::F64 src0{GetSrc64(inst.src[0])}; + SetDst(inst.dst[0], ir.FPFrexpExp(src0)); +} + +void Translator::V_FREXP_MANT_F64(const GcnInst& inst) { + const IR::F64 src0{GetSrc64(inst.src[0])}; + SetDst64(inst.dst[0], ir.FPFrexpSig(src0)); +} + +void Translator::V_FRACT_F64(const GcnInst& inst) { + const IR::F32 src0{GetSrc64(inst.src[0])}; + SetDst64(inst.dst[0], ir.FPFract(src0)); +} + +void Translator::V_FREXP_EXP_I32_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + SetDst(inst.dst[0], ir.FPFrexpExp(src0)); +} + +void Translator::V_FREXP_MANT_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + SetDst(inst.dst[0], ir.FPFrexpSig(src0)); +} + void Translator::V_MOVRELD_B32(const GcnInst& inst) { const IR::U32 src_val{GetSrc(inst.src[0])}; u32 dst_vgprno = inst.dst[0].code - static_cast(IR::VectorReg::V0); @@ -1025,8 +1060,14 @@ void Translator::V_CUBEMA_F32(const GcnInst& inst) { void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{ir.BitwiseAnd(GetSrc(inst.src[1]), ir.Imm32(0x1F))}; - const IR::U32 src2{ir.BitwiseAnd(GetSrc(inst.src[2]), ir.Imm32(0x1F))}; + IR::U32 src1{GetSrc(inst.src[1])}; + IR::U32 src2{GetSrc(inst.src[2])}; + if (!src1.IsImmediate()) { + src1 = ir.BitwiseAnd(src1, ir.Imm32(0x1F)); + } + if (!src2.IsImmediate()) { + src2 = ir.BitwiseAnd(src2, ir.Imm32(0x1F)); + } SetDst(inst.dst[0], ir.BitFieldExtract(src0, src1, src2, is_signed)); } diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index b7ad3b36b..072b1f88e 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -98,7 +98,9 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { // Buffer store operations case Opcode::IMAGE_STORE: - return IMAGE_STORE(inst); + return IMAGE_STORE(false, inst); + case Opcode::IMAGE_STORE_MIP: + return IMAGE_STORE(true, inst); // Image misc operations case Opcode::IMAGE_GET_RESINFO: @@ -187,7 +189,8 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst) buffer_info.index_enable.Assign(mtbuf.idxen); buffer_info.offset_enable.Assign(mtbuf.offen); buffer_info.inst_offset.Assign(mtbuf.offset); - buffer_info.ring_access.Assign(is_ring); + buffer_info.globally_coherent.Assign(mtbuf.glc); + buffer_info.system_coherent.Assign(mtbuf.slc); if (is_typed) { const auto dmft = static_cast(mtbuf.dfmt); const auto nfmt = static_cast(mtbuf.nfmt); @@ -245,11 +248,15 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst const IR::ScalarReg sharp{inst.src[2].code * 4}; const IR::Value soffset{GetSrc(inst.src[3])}; - if (info.stage != Stage::Export && info.stage != Stage::Geometry) { + if (info.stage != Stage::Export && info.stage != Stage::Hull && info.stage != Stage::Geometry) { ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); } + if (info.stage == Stage::Hull) { + // printf("here\n"); // break + } + IR::Value address = [&] -> IR::Value { if (is_ring) { return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset); @@ -267,7 +274,8 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst buffer_info.index_enable.Assign(mtbuf.idxen); buffer_info.offset_enable.Assign(mtbuf.offen); buffer_info.inst_offset.Assign(mtbuf.offset); - buffer_info.ring_access.Assign(is_ring); + buffer_info.globally_coherent.Assign(mtbuf.glc); + buffer_info.system_coherent.Assign(mtbuf.slc); if (is_typed) { const auto dmft = static_cast(mtbuf.dfmt); const auto nfmt = static_cast(mtbuf.nfmt); @@ -423,7 +431,7 @@ void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) { } } -void Translator::IMAGE_STORE(const GcnInst& inst) { +void Translator::IMAGE_STORE(bool has_mip, const GcnInst& inst) { const auto& mimg = inst.control.mimg; IR::VectorReg addr_reg{inst.src[0].code}; IR::VectorReg data_reg{inst.dst[0].code}; @@ -434,6 +442,9 @@ void Translator::IMAGE_STORE(const GcnInst& inst) { ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1), ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3)); + IR::TextureInstInfo info{}; + info.has_lod.Assign(has_mip); + boost::container::static_vector comps; for (u32 i = 0; i < 4; i++) { if (((mimg.dmask >> i) & 1) == 0) { @@ -443,7 +454,7 @@ void Translator::IMAGE_STORE(const GcnInst& inst) { comps.push_back(ir.GetVectorReg(data_reg++)); } const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]); - ir.ImageWrite(handle, body, value, {}); + ir.ImageWrite(handle, body, {}, value, info); } void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) { @@ -527,6 +538,7 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal info.has_offset.Assign(flags.test(MimgModifier::Offset)); info.has_lod.Assign(flags.any(MimgModifier::Lod)); info.is_array.Assign(mimg.da); + info.is_unnormalized.Assign(mimg.unrm); if (gather) { info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1); diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 494bbb4bb..dbea2af8a 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -11,6 +11,7 @@ #include "common/types.h" #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/frontend/copy_shader.h" +#include "shader_recompiler/frontend/tessellation.h" #include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/passes/srt.h" #include "shader_recompiler/ir/reg.h" @@ -163,6 +164,7 @@ struct Info { UserDataMask ud_mask{}; CopyShaderData gs_copy_data; + u32 uses_patches{}; BufferResourceList buffers; TextureBufferResourceList texture_buffers; @@ -173,8 +175,12 @@ struct Info { PersistentSrtInfo srt_info; std::vector flattened_ud_buf; + IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max; + s32 tess_consts_dword_offset = -1; + std::span user_data; Stage stage; + LogicalStage l_stage; u64 pgm_hash{}; VAddr pgm_base; @@ -190,14 +196,16 @@ struct Info { bool uses_shared{}; bool uses_fp16{}; bool uses_fp64{}; + bool stores_tess_level_outer{}; + bool stores_tess_level_inner{}; bool translation_failed{}; // indicates that shader has unsupported instructions bool has_readconst{}; u8 mrt_mask{0u}; bool has_fetch_shader{false}; u32 fetch_shader_sgpr_base{0u}; - explicit Info(Stage stage_, ShaderParams params) - : stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, + explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params) + : stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, user_data{params.user_data} {} template @@ -244,6 +252,16 @@ struct Info { srt_info.walker_func(user_data.data(), flattened_ud_buf.data()); } } + + void ReadTessConstantBuffer(TessellationDataConstantBuffer& tess_constants) const { + ASSERT(tess_consts_dword_offset >= 0); // We've already tracked the V# UD + auto buf = ReadUdReg(static_cast(tess_consts_ptr_base), + static_cast(tess_consts_dword_offset)); + VAddr tess_constants_addr = buf.base_address; + memcpy(&tess_constants, + reinterpret_cast(tess_constants_addr), + sizeof(tess_constants)); + } }; constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept { diff --git a/src/shader_recompiler/ir/attribute.cpp b/src/shader_recompiler/ir/attribute.cpp index e219dfb64..6a267e21b 100644 --- a/src/shader_recompiler/ir/attribute.cpp +++ b/src/shader_recompiler/ir/attribute.cpp @@ -104,6 +104,8 @@ std::string NameOf(Attribute attribute) { return "VertexId"; case Attribute::InstanceId: return "InstanceId"; + case Attribute::PrimitiveId: + return "PrimitiveId"; case Attribute::FragCoord: return "FragCoord"; case Attribute::IsFrontFace: @@ -114,6 +116,16 @@ std::string NameOf(Attribute attribute) { return "LocalInvocationId"; case Attribute::LocalInvocationIndex: return "LocalInvocationIndex"; + case Attribute::InvocationId: + return "InvocationId"; + case Attribute::PatchVertices: + return "PatchVertices"; + case Attribute::TessellationEvaluationPointU: + return "TessellationEvaluationPointU"; + case Attribute::TessellationEvaluationPointV: + return "TessellationEvaluationPointV"; + case Attribute::PackedHullInvocationInfo: + return "PackedHullInvocationInfo"; default: break; } diff --git a/src/shader_recompiler/ir/attribute.h b/src/shader_recompiler/ir/attribute.h index 0890e88f1..bcb2b44a9 100644 --- a/src/shader_recompiler/ir/attribute.h +++ b/src/shader_recompiler/ir/attribute.h @@ -72,8 +72,13 @@ enum class Attribute : u64 { LocalInvocationId = 75, LocalInvocationIndex = 76, FragCoord = 77, - InstanceId0 = 78, // step rate 0 - InstanceId1 = 79, // step rate 1 + InstanceId0 = 78, // step rate 0 + InstanceId1 = 79, // step rate 1 + InvocationId = 80, // TCS id in output patch and instanced geometry shader id + PatchVertices = 81, + TessellationEvaluationPointU = 82, + TessellationEvaluationPointV = 83, + PackedHullInvocationInfo = 84, // contains patch id within the VGT and invocation ID Max, }; @@ -85,6 +90,11 @@ constexpr bool IsPosition(Attribute attribute) noexcept { return attribute >= Attribute::Position0 && attribute <= Attribute::Position3; } +constexpr bool IsTessCoord(Attribute attribute) noexcept { + return attribute >= Attribute::TessellationEvaluationPointU && + attribute <= Attribute::TessellationEvaluationPointV; +} + constexpr bool IsParam(Attribute attribute) noexcept { return attribute >= Attribute::Param0 && attribute <= Attribute::Param31; } diff --git a/src/shader_recompiler/ir/basic_block.cpp b/src/shader_recompiler/ir/basic_block.cpp index b4d1a78c7..a312eabde 100644 --- a/src/shader_recompiler/ir/basic_block.cpp +++ b/src/shader_recompiler/ir/basic_block.cpp @@ -94,6 +94,8 @@ static std::string ArgToIndex(std::map& inst_to_index, size return fmt::format("{}", arg.VectorReg()); case Type::Attribute: return fmt::format("{}", arg.Attribute()); + case Type::Patch: + return fmt::format("{}", arg.Patch()); default: return ""; } diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 78e7f2289..21df53391 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -266,8 +266,8 @@ void IREmitter::SetM0(const U32& value) { Inst(Opcode::SetM0, value); } -F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, u32 index) { - return Inst(Opcode::GetAttribute, attribute, Imm32(comp), Imm32(index)); +F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, IR::Value index) { + return Inst(Opcode::GetAttribute, attribute, Imm32(comp), index); } U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) { @@ -278,6 +278,24 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp Inst(Opcode::SetAttribute, attribute, value, Imm32(comp)); } +F32 IREmitter::GetTessGenericAttribute(const U32& vertex_index, const U32& attr_index, + const U32& comp_index) { + return Inst(IR::Opcode::GetTessGenericAttribute, vertex_index, attr_index, comp_index); +} + +void IREmitter::SetTcsGenericAttribute(const F32& value, const U32& attr_index, + const U32& comp_index) { + Inst(Opcode::SetTcsGenericAttribute, value, attr_index, comp_index); +} + +F32 IREmitter::GetPatch(Patch patch) { + return Inst(Opcode::GetPatch, patch); +} + +void IREmitter::SetPatch(Patch patch, const F32& value) { + Inst(Opcode::SetPatch, patch, value); +} + Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) { switch (bit_size) { case 32: @@ -552,6 +570,19 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu } } +Value IREmitter::CompositeConstruct(std::span elements) { + switch (elements.size()) { + case 2: + return CompositeConstruct(elements[0], elements[1]); + case 3: + return CompositeConstruct(elements[0], elements[1], elements[2]); + case 4: + return CompositeConstruct(elements[0], elements[1], elements[2], elements[3]); + default: + UNREACHABLE_MSG("Composite construct with greater than 4 elements"); + } +} + Value IREmitter::CompositeExtract(const Value& vector, size_t element) { const auto read{[&](Opcode opcode, size_t limit) -> Value { if (element >= limit) { @@ -692,6 +723,20 @@ F32F64 IREmitter::FPMul(const F32F64& a, const F32F64& b) { } } +F32F64 IREmitter::FPDiv(const F32F64& a, const F32F64& b) { + if (a.Type() != b.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::F32: + return Inst(Opcode::FPDiv32, a, b); + case Type::F64: + return Inst(Opcode::FPDiv64, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + F32F64 IREmitter::FPFma(const F32F64& a, const F32F64& b, const F32F64& c) { if (a.Type() != b.Type() || a.Type() != c.Type()) { UNREACHABLE_MSG("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type()); @@ -855,8 +900,37 @@ F32F64 IREmitter::FPTrunc(const F32F64& value) { } } -F32 IREmitter::Fract(const F32& value) { - return Inst(Opcode::FPFract, value); +F32F64 IREmitter::FPFract(const F32F64& value) { + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::FPFract32, value); + case Type::F64: + return Inst(Opcode::FPFract64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +F32F64 IREmitter::FPFrexpSig(const F32F64& value) { + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::FPFrexpSig32, value); + case Type::F64: + return Inst(Opcode::FPFrexpSig64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32 IREmitter::FPFrexpExp(const F32F64& value) { + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::FPFrexpExp32, value); + case Type::F64: + return Inst(Opcode::FPFrexpExp64, value); + default: + ThrowInvalidType(value.Type()); + } } U1 IREmitter::FPEqual(const F32F64& lhs, const F32F64& rhs, bool ordered) { @@ -1556,9 +1630,9 @@ Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, dref); } -Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset, - const U32& lod, const U32& multisampling, TextureInstInfo info) { - return Inst(Opcode::ImageFetch, Flags{info}, handle, coords, offset, lod, multisampling); +Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const U32& lod, + const Value& offset, const U32& multisampling, TextureInstInfo info) { + return Inst(Opcode::ImageFetch, Flags{info}, handle, coords, lod, offset, multisampling); } Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod, @@ -1582,13 +1656,14 @@ Value IREmitter::ImageGradient(const Value& handle, const Value& coords, offset, lod_clamp); } -Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) { - return Inst(Opcode::ImageRead, Flags{info}, handle, coords); +Value IREmitter::ImageRead(const Value& handle, const Value& coords, const U32& lod, + TextureInstInfo info) { + return Inst(Opcode::ImageRead, Flags{info}, handle, coords, lod); } -void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color, - TextureInstInfo info) { - Inst(Opcode::ImageWrite, Flags{info}, handle, coords, color); +void IREmitter::ImageWrite(const Value& handle, const Value& coords, const U32& lod, + const Value& color, TextureInstInfo info) { + Inst(Opcode::ImageWrite, Flags{info}, handle, coords, lod, color); } // Debug print maps to SPIRV's NonSemantic DebugPrintf instruction diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index cbd3780de..95713565b 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -10,6 +10,7 @@ #include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/condition.h" +#include "shader_recompiler/ir/patch.h" #include "shader_recompiler/ir/value.h" namespace Shader::IR { @@ -80,10 +81,18 @@ public: [[nodiscard]] U1 Condition(IR::Condition cond); - [[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, u32 index = 0); + [[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, + IR::Value index = IR::Value(u32(0u))); [[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0); void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0); + [[nodiscard]] F32 GetTessGenericAttribute(const U32& vertex_index, const U32& attr_index, + const U32& comp_index); + void SetTcsGenericAttribute(const F32& value, const U32& attr_index, const U32& comp_index); + + [[nodiscard]] F32 GetPatch(Patch patch); + void SetPatch(Patch patch, const F32& value); + [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset); void WriteShared(int bit_size, const Value& value, const U32& offset); @@ -138,6 +147,8 @@ public: [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, const Value& e4); + [[nodiscard]] Value CompositeConstruct(std::span values); + [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element); @@ -158,6 +169,7 @@ public: [[nodiscard]] F32F64 FPAdd(const F32F64& a, const F32F64& b); [[nodiscard]] F32F64 FPSub(const F32F64& a, const F32F64& b); [[nodiscard]] F32F64 FPMul(const F32F64& a, const F32F64& b); + [[nodiscard]] F32F64 FPDiv(const F32F64& a, const F32F64& b); [[nodiscard]] F32F64 FPFma(const F32F64& a, const F32F64& b, const F32F64& c); [[nodiscard]] F32F64 FPAbs(const F32F64& value); @@ -179,7 +191,9 @@ public: [[nodiscard]] F32F64 FPFloor(const F32F64& value); [[nodiscard]] F32F64 FPCeil(const F32F64& value); [[nodiscard]] F32F64 FPTrunc(const F32F64& value); - [[nodiscard]] F32 Fract(const F32& value); + [[nodiscard]] F32F64 FPFract(const F32F64& value); + [[nodiscard]] F32F64 FPFrexpSig(const F32F64& value); + [[nodiscard]] U32 FPFrexpExp(const F32F64& value); [[nodiscard]] U1 FPEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true); [[nodiscard]] U1 FPNotEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true); @@ -311,14 +325,16 @@ public: TextureInstInfo info); [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, const F32& dref, TextureInstInfo info); - [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, - const U32& lod, const U32& multisampling, TextureInstInfo info); + [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const U32& lod, + const Value& offset, const U32& multisampling, + TextureInstInfo info); [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, const Value& derivatives_dx, const Value& derivatives_dy, const Value& offset, const F32& lod_clamp, TextureInstInfo info); - [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info); - void ImageWrite(const Value& handle, const Value& coords, const Value& color, + [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, const U32& lod, + TextureInstInfo info); + void ImageWrite(const Value& handle, const Value& coords, const U32& lod, const Value& color, TextureInstInfo info); void EmitVertex(); @@ -330,6 +346,7 @@ private: template T Inst(Opcode op, Args... args) { auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})}; + it->SetParent(block); return T{Value{&*it}}; } @@ -347,6 +364,7 @@ private: u32 raw_flags{}; std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy)); auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)}; + it->SetParent(block); return T{Value{&*it}}; } }; diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index 9b4ad63d2..6e7bbe661 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -52,6 +52,8 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::Discard: case Opcode::DiscardCond: case Opcode::SetAttribute: + case Opcode::SetTcsGenericAttribute: + case Opcode::SetPatch: case Opcode::StoreBufferU32: case Opcode::StoreBufferU32x2: case Opcode::StoreBufferU32x3: diff --git a/src/shader_recompiler/ir/opcodes.h b/src/shader_recompiler/ir/opcodes.h index be640297a..cd73ace7e 100644 --- a/src/shader_recompiler/ir/opcodes.h +++ b/src/shader_recompiler/ir/opcodes.h @@ -30,7 +30,7 @@ constexpr Type Opaque{Type::Opaque}; constexpr Type ScalarReg{Type::ScalarReg}; constexpr Type VectorReg{Type::VectorReg}; constexpr Type Attribute{Type::Attribute}; -constexpr Type SystemValue{Type::SystemValue}; +constexpr Type Patch{Type::Patch}; constexpr Type U1{Type::U1}; constexpr Type U8{Type::U8}; constexpr Type U16{Type::U16}; diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 0283ccd0f..470f9fbe5 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -60,6 +60,10 @@ OPCODE(SetGotoVariable, Void, U32, OPCODE(GetAttribute, F32, Attribute, U32, U32, ) OPCODE(GetAttributeU32, U32, Attribute, U32, ) OPCODE(SetAttribute, Void, Attribute, F32, U32, ) +OPCODE(GetPatch, F32, Patch, ) +OPCODE(SetPatch, Void, Patch, F32, ) +OPCODE(GetTessGenericAttribute, F32, U32, U32, U32, ) +OPCODE(SetTcsGenericAttribute, Void, F32, U32, U32, ) // Flags OPCODE(GetScc, U1, Void, ) @@ -184,6 +188,8 @@ OPCODE(FPMin32, F32, F32, OPCODE(FPMin64, F64, F64, F64, ) OPCODE(FPMul32, F32, F32, F32, ) OPCODE(FPMul64, F64, F64, F64, ) +OPCODE(FPDiv32, F32, F32, F32, ) +OPCODE(FPDiv64, F64, F64, F64, ) OPCODE(FPNeg32, F32, F32, ) OPCODE(FPNeg64, F64, F64, ) OPCODE(FPRecip32, F32, F32, ) @@ -208,7 +214,12 @@ OPCODE(FPCeil32, F32, F32, OPCODE(FPCeil64, F64, F64, ) OPCODE(FPTrunc32, F32, F32, ) OPCODE(FPTrunc64, F64, F64, ) -OPCODE(FPFract, F32, F32, ) +OPCODE(FPFract32, F32, F32, ) +OPCODE(FPFract64, F64, F64, ) +OPCODE(FPFrexpSig32, F32, F32, ) +OPCODE(FPFrexpSig64, F64, F64, ) +OPCODE(FPFrexpExp32, U32, F32, ) +OPCODE(FPFrexpExp64, U32, F64, ) OPCODE(FPOrdEqual32, U1, F32, F32, ) OPCODE(FPOrdEqual64, U1, F64, F64, ) @@ -327,12 +338,12 @@ OPCODE(ImageSampleDrefImplicitLod, F32x4, Opaq OPCODE(ImageSampleDrefExplicitLod, F32x4, Opaque, Opaque, F32, F32, Opaque, ) OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, ) OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, F32, ) -OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) +OPCODE(ImageFetch, F32x4, Opaque, Opaque, U32, Opaque, Opaque, ) OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, ) OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, F32, ) -OPCODE(ImageRead, U32x4, Opaque, Opaque, ) -OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) +OPCODE(ImageRead, U32x4, Opaque, Opaque, U32, ) +OPCODE(ImageWrite, Void, Opaque, Opaque, U32, U32x4, ) // Image atomic operations OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, ) diff --git a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp index 9624ce6a5..16b07e1a1 100644 --- a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp @@ -216,6 +216,18 @@ void FoldAdd(IR::Block& block, IR::Inst& inst) { } } +template +void FoldMul(IR::Block& block, IR::Inst& inst) { + if (!FoldCommutative(inst, [](T a, T b) { return a * b; })) { + return; + } + const IR::Value rhs{inst.Arg(1)}; + if (rhs.IsImmediate() && Arg(rhs) == 0) { + inst.ReplaceUsesWithAndRemove(IR::Value(0u)); + return; + } +} + void FoldCmpClass(IR::Block& block, IR::Inst& inst) { ASSERT_MSG(inst.Arg(1).IsImmediate(), "Unable to resolve compare operation"); const auto class_mask = static_cast(inst.Arg(1).U32()); @@ -292,7 +304,19 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { FoldWhenAllImmediates(inst, [](u32 a) { return static_cast(a); }); return; case IR::Opcode::IMul32: - FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; }); + FoldMul(block, inst); + return; + case IR::Opcode::UDiv32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { + ASSERT_MSG(b != 0, "Folding UDiv32 with divisor 0"); + return a / b; + }); + return; + case IR::Opcode::UMod32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { + ASSERT_MSG(b != 0, "Folding UMod32 with modulo 0"); + return a % b; + }); return; case IR::Opcode::FPCmpClass32: FoldCmpClass(block, inst); diff --git a/src/shader_recompiler/ir/passes/constant_propogation.h b/src/shader_recompiler/ir/passes/constant_propogation.h new file mode 100644 index 000000000..313a3cc6a --- /dev/null +++ b/src/shader_recompiler/ir/passes/constant_propogation.h @@ -0,0 +1,4 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once \ No newline at end of file diff --git a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp new file mode 100644 index 000000000..895c9823e --- /dev/null +++ b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp @@ -0,0 +1,746 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later +#include "common/assert.h" +#include "shader_recompiler/info.h" +#include "shader_recompiler/ir/attribute.h" +#include "shader_recompiler/ir/breadth_first_search.h" +#include "shader_recompiler/ir/ir_emitter.h" +#include "shader_recompiler/ir/opcodes.h" +#include "shader_recompiler/ir/pattern_matching.h" +#include "shader_recompiler/ir/program.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Optimization { + +/** + * Tessellation shaders pass outputs to the next shader using LDS. + * The Hull shader stage receives input control points stored in LDS. + * + * These passes attempt to resolve LDS accesses to attribute accesses and correctly + * write to the tessellation factor tables. + * + * The LDS layout is: + * - TCS inputs for patch 0 + * - TCS inputs for patch 1 + * - TCS inputs for patch 2 + * - ... + * - TCS outputs for patch 0 + * - TCS outputs for patch 1 + * - TCS outputs for patch 2 + * - ... + * - PatchConst TCS outputs for patch 0 + * - PatchConst TCS outputs for patch 1 + * - PatchConst TCS outputs for patch 2 + * + * + * If the Hull stage does not write any new control points the driver will + * optimize LDS layout so input and output control point spaces overlap. + * (Passthrough) + * + * The gnm driver requires a V# holding special constants to be bound + * for reads by the shader. + * The Hull and Domain shaders read values from this buffer which + * contain size and offset information required to address input, output, + * or PatchConst attributes within the current patch. + * See the TessellationDataConstantBuffer struct to see the layout of this V#. + * + * Tessellation factors are stored to a special tessellation factor V# that is automatically bound + * by the driver. This is the input to the fixed function tessellator that actually subdivides the + * domain. We translate these to writes to SPIR-V builtins for tessellation factors in the Hull + * shader. + * The offset into the tess factor buffer determines which factor the shader is writing. + * Additionally, most hull shaders seem to redundantly write tess factors to PatchConst + * attributes, even if dead in the domain shader. We just treat these as generic PatchConst writes. + * + * LDS reads in the Hull shader can be from input control points, and in the the Domain shader can + * be hs output control points (output from the perspective of the Hull shader) and patchconst + * values. + * LDS stores in the Hull shader can either be output control point writes or per-patch + * (PatchConst) data writes. The Domain shader exports attributes using EXP instructions, unless its + * followed by the geometry stage (but we havent seen this yet), so nothing special there. + * The address calculations can vary significantly and can't be easily pattern matched. We are at + * the mercy of instruction selection the ps4 compiler wanted to use. + * Generally though, they could look something like this: + * Input control point: + * addr = PatchIdInVgt * input_cp_stride * #input_cp_per_patch + index * input_cp_stride + * + attr# * 16 + component + * Output control point: + * addr = #patches * input_cp_stride * #input_cp_per_patch + * + PatchIdInVgt * output_patch_stride + InvocationID * output_cp_stride + + attr# * 16 + component + * Per patch output: + * addr = #patches * input_cp_stride * #cp_per_input_patch + * + #patches * output_patch_stride + * + PatchIdInVgt * per_patch_output_stride + attr# * 16 + component + * + * output_patch_stride and output_cp_stride are usually compile time constants in the gcn + * + * Hull shaders can probably also read output control points corresponding to other threads, like + * shared memory (but we havent seen this yet). + * ^ This is an UNREACHABLE for now. We may need to insert additional barriers if this happens. + * They should also be able to read PatchConst values, + * although not sure if this happens in practice. + * + * To determine which type of attribute (input, output, patchconst) we the check the users of + * TessConstants V# reads to deduce which type of attribute a given load/store to LDS + * is touching. + * + * In the Hull shader, both the PatchId within the VGT group (PatchIdInVgt) and the output control + * point id (InvocationId) are packed in VGPR1 by the driver like + * V1 = InvocationId << 8 | PatchIdInVgt + * The shader typically uses V_BFE_(U|S)32 to extract them. We use the starting bit_pos to determine + * which is which. + * + * This pass does not attempt to deduce the exact attribute referenced in a LDS load/store. + * Instead, it feeds the address in the LDS load/store to the get/set Insts we use for TCS in/out's, + * TES in's, and PatchConst in/out's. + * + * TCS/TES Input attributes: + * We define input attributes using an array in the shader roughly like this: + * // equivalent GLSL in TCS + * layout (location = 0) in vec4 in_attrs[][NUM_INPUT_ATTRIBUTES]; + * + * Here the NUM_INPUT_ATTRIBUTES is derived from the ls_stride member of the TessConstants V#. + * We divide ls_stride (in bytes) by 16 to get the number of vec4 attributes. + * For TES, the number of attributes comes from hs_cp_stride / 16. + * The first (outer) dimension is unsized but corresponds to the number of vertices in the hs input + * patch (for Hull) or the hs output patch (for Domain). + * + * For input reads in TCS or TES, we emit SPIR-V like: + * float value = in_attrs[addr / ls_stride][(addr % ls_stride) >> 4][(addr & 0xF) >> 2]; + * + * For output writes, we assume the control point index is InvocationId, since high level languages + * impose that restriction (although maybe it's technically possible on hardware). So SPIR-V looks + * like this: + * layout (location = 0) in vec4 in_attrs[][NUM_OUTPUT_ATTRIBUTES]; + * out_attrs[InvocationId][(addr % hs_cp_stride) >> 4][(addr & 0xF) >> 2] = value; + * + * NUM_OUTPUT_ATTRIBUTES is derived by hs_cp_stride / 16, so it can link with the TES in_attrs + * variable. + * + * Another challenge is the fact that the GCN shader needs to address attributes from LDS as a whole + * which contains the attributes from many patches. On the other hand, higher level shading + * languages restrict attribute access to the patch of the current thread, which is naturally a + * restriction in SPIR-V also. + * The addresses the ps4 compiler generates for loads/stores and the fact that LDS holds many + * patches' attributes are just implementation details of the ps4 driver/compiler. To deal with + * this, we can replace certain TessConstant V# reads with 0, which only contribute to the base + * address of the current patch's attributes in LDS and not the indexes within the local patch. + * + * (A perfect implementation might need emulation of the VGTs in mesh/compute, loading/storing + * attributes to buffers and not caring about whether they are hs input, hs output, or patchconst + * attributes) + * + */ + +namespace { + +using namespace Shader::Optimiation::PatternMatching; + +static void InitTessConstants(IR::ScalarReg sharp_ptr_base, s32 sharp_dword_offset, + Shader::Info& info, Shader::RuntimeInfo& runtime_info, + TessellationDataConstantBuffer& tess_constants) { + info.tess_consts_ptr_base = sharp_ptr_base; + info.tess_consts_dword_offset = sharp_dword_offset; + info.ReadTessConstantBuffer(tess_constants); + if (info.l_stage == LogicalStage::TessellationControl) { + runtime_info.hs_info.InitFromTessConstants(tess_constants); + } else { + runtime_info.vs_info.InitFromTessConstants(tess_constants); + } + + return; +} + +struct TessSharpLocation { + IR::ScalarReg ptr_base; + u32 dword_off; +}; + +std::optional FindTessConstantSharp(IR::Inst* read_const_buffer) { + IR::Value sharp_ptr_base; + IR::Value sharp_dword_offset; + + IR::Value rv = IR::Value{read_const_buffer}; + IR::Value handle = read_const_buffer->Arg(0); + + if (M_COMPOSITECONSTRUCTU32X4(M_GETUSERDATA(MatchImm(sharp_dword_offset)), MatchIgnore(), + MatchIgnore(), MatchIgnore()) + .Match(handle)) { + return TessSharpLocation{.ptr_base = IR::ScalarReg::Max, + .dword_off = static_cast(sharp_dword_offset.ScalarReg())}; + } else if (M_COMPOSITECONSTRUCTU32X4( + M_READCONST(M_COMPOSITECONSTRUCTU32X2(M_GETUSERDATA(MatchImm(sharp_ptr_base)), + MatchIgnore()), + MatchImm(sharp_dword_offset)), + MatchIgnore(), MatchIgnore(), MatchIgnore()) + .Match(handle)) { + return TessSharpLocation{.ptr_base = sharp_ptr_base.ScalarReg(), + .dword_off = sharp_dword_offset.U32()}; + } + return {}; +} + +// Walker that helps deduce what type of attribute a DS instruction is reading +// or writing, which could be an input control point, output control point, +// or per-patch constant (PatchConst). +// For certain ReadConstBuffer instructions using the tess constants V#,, we visit the users +// recursively and increment a counter on the Load/WriteShared users. +// Namely NumPatch (from m_hsNumPatch), HsOutputBase (m_hsOutputBase), +// and PatchConstBase (m_patchConstBase). +// In addr calculations, the term NumPatch * ls_stride * #input_cp_in_patch +// is used as an addend to skip the region for input control points, and similarly +// NumPatch * hs_cp_stride * #output_cp_in_patch is used to skip the region +// for output control points. +// +// TODO: this will break if AMD compiler used distributive property like +// TcsNumPatches * (ls_stride * #input_cp_in_patch + hs_cp_stride * #output_cp_in_patch) +class TessConstantUseWalker { +public: + void MarkTessAttributeUsers(IR::Inst* read_const_buffer, TessConstantAttribute attr) { + u32 inc; + switch (attr) { + case TessConstantAttribute::HsNumPatch: + case TessConstantAttribute::HsOutputBase: + inc = 1; + break; + case TessConstantAttribute::PatchConstBase: + inc = 2; + break; + default: + UNREACHABLE(); + } + + for (IR::Use use : read_const_buffer->Uses()) { + MarkTessAttributeUsersHelper(use, inc); + } + + ++seq_num; + } + +private: + void MarkTessAttributeUsersHelper(IR::Use use, u32 inc) { + IR::Inst* inst = use.user; + + switch (use.user->GetOpcode()) { + case IR::Opcode::LoadSharedU32: + case IR::Opcode::LoadSharedU64: + case IR::Opcode::LoadSharedU128: + case IR::Opcode::WriteSharedU32: + case IR::Opcode::WriteSharedU64: + case IR::Opcode::WriteSharedU128: { + u32 counter = inst->Flags(); + inst->SetFlags(counter + inc); + // Stop here + return; + } + case IR::Opcode::Phi: { + struct PhiCounter { + u16 seq_num; + u8 unique_edge; + u8 counter; + }; + + PhiCounter count = inst->Flags(); + ASSERT_MSG(count.counter == 0 || count.unique_edge == use.operand); + // the point of seq_num is to tell us if we've already traversed this + // phi on the current walk. Alternatively we could keep a set of phi's + // seen on the current walk. This is to handle phi cycles + if (count.seq_num == 0) { + // First time we've encountered this phi + count.seq_num = seq_num; + // Mark the phi as having been traversed originally through this edge + count.unique_edge = use.operand; + count.counter = inc; + } else if (count.seq_num < seq_num) { + count.seq_num = seq_num; + // For now, assume we are visiting this phi via the same edge + // as on other walks. If not, some dataflow analysis might be necessary + ASSERT(count.unique_edge == use.operand); + count.counter += inc; + } else { + // count.seq_num == seq_num + // there's a cycle, and we've already been here on this walk + return; + } + inst->SetFlags(count); + break; + } + default: + break; + } + + for (IR::Use use : inst->Uses()) { + MarkTessAttributeUsersHelper(use, inc); + } + } + + u32 seq_num{1u}; +}; + +enum class AttributeRegion : u32 { InputCP, OutputCP, PatchConst }; + +static AttributeRegion GetAttributeRegionKind(IR::Inst* ring_access, const Shader::Info& info, + const Shader::RuntimeInfo& runtime_info) { + u32 count = ring_access->Flags(); + if (count == 0) { + return AttributeRegion::InputCP; + } else if (info.l_stage == LogicalStage::TessellationControl && + runtime_info.hs_info.IsPassthrough()) { + ASSERT(count <= 1); + return AttributeRegion::PatchConst; + } else { + ASSERT(count <= 2); + return AttributeRegion(count); + } +} + +static bool IsDivisibleByStride(IR::Value term, u32 stride) { + IR::Value a, b; + if (MatchU32(stride).Match(term)) { + return true; + } else if (M_BITFIELDUEXTRACT(MatchValue(a), MatchU32(0), MatchU32(24)).Match(term) || + M_BITFIELDSEXTRACT(MatchValue(a), MatchU32(0), MatchU32(24)).Match(term)) { + return IsDivisibleByStride(a, stride); + } else if (M_IMUL32(MatchValue(a), MatchValue(b)).Match(term)) { + return IsDivisibleByStride(a, stride) || IsDivisibleByStride(b, stride); + } + return false; +} + +// Return true if we can eliminate any addends +static bool TryOptimizeAddendInModulo(IR::Value addend, u32 stride, std::vector& addends) { + IR::Value a, b; + if (M_IADD32(MatchValue(a), MatchValue(b)).Match(addend)) { + bool ret = false; + ret = TryOptimizeAddendInModulo(a, stride, addends); + ret |= TryOptimizeAddendInModulo(b, stride, addends); + return ret; + } else if (!IsDivisibleByStride(addend, stride)) { + addends.push_back(IR::U32{addend}); + return false; + } else { + return true; + } +} + +// In calculation (a + b + ...) % stride +// Use this fact +// (a + b) mod N = (a mod N + b mod N) mod N +// If any addend is divisible by stride, then we can replace it with 0 in the attribute +// or component index calculation +static IR::U32 TryOptimizeAddressModulo(IR::U32 addr, u32 stride, IR::IREmitter& ir) { + std::vector addends; + if (TryOptimizeAddendInModulo(addr, stride, addends)) { + addr = ir.Imm32(0); + for (auto& addend : addends) { + addr = ir.IAdd(addr, addend); + } + } + return addr; +} + +// TODO: can optimize div in control point index similarly to mod + +// Read a TCS input (InputCP region) or TES input (OutputCP region) +static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmitter& ir, + u32 off_dw) { + if (off_dw > 0) { + addr = ir.IAdd(addr, ir.Imm32(off_dw)); + } + const IR::U32 control_point_index = ir.IDiv(addr, ir.Imm32(stride)); + const IR::U32 addr_for_attrs = TryOptimizeAddressModulo(addr, stride, ir); + const IR::U32 attr_index = + ir.ShiftRightLogical(ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u)); + const IR::U32 comp_index = + ir.ShiftRightLogical(ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u)); + return ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index); +} + +} // namespace + +void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { + const Info& info = program.info; + + for (IR::Block* block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + const auto opcode = inst.GetOpcode(); + switch (opcode) { + case IR::Opcode::StoreBufferU32: + case IR::Opcode::StoreBufferU32x2: + case IR::Opcode::StoreBufferU32x3: + case IR::Opcode::StoreBufferU32x4: { + const auto info = inst.Flags(); + if (!info.globally_coherent) { + break; + } + IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; + const auto GetValue = [&](IR::Value data) -> IR::F32 { + if (auto* inst = data.TryInstRecursive(); + inst && inst->GetOpcode() == IR::Opcode::BitCastU32F32) { + return IR::F32{inst->Arg(0)}; + } + return ir.BitCast(IR::U32{data}); + }; + const u32 num_dwords = u32(opcode) - u32(IR::Opcode::StoreBufferU32) + 1; + IR::U32 index = IR::U32{inst.Arg(1)}; + ASSERT(index.IsImmediate()); + const u32 gcn_factor_idx = (info.inst_offset.Value() + index.U32()) >> 2; + + const IR::Value data = inst.Arg(2); + auto get_factor_attr = [&](u32 gcn_factor_idx) -> IR::Patch { + // The hull outputs tess factors in different formats depending on the shader. + // For triangle domains, it seems to pack the entries into 4 consecutive floats, + // with the 3 edge factors followed by the 1 interior factor. + // For quads, it does 4 edge factors then 2 interior. + // There is a tess factor stride member of the GNMX hull constants struct in + // a hull program shader binary archive, but this doesn't seem to be + // communicated to the driver. + // The layout seems to be implied by the type of the abstract domain. + switch (runtime_info.hs_info.tess_type) { + case AmdGpu::TessellationType::Isoline: + ASSERT(gcn_factor_idx < 2); + return IR::PatchFactor(gcn_factor_idx); + case AmdGpu::TessellationType::Triangle: + ASSERT(gcn_factor_idx < 4); + if (gcn_factor_idx == 3) { + return IR::Patch::TessellationLodInteriorU; + } + return IR::PatchFactor(gcn_factor_idx); + case AmdGpu::TessellationType::Quad: + ASSERT(gcn_factor_idx < 6); + return IR::PatchFactor(gcn_factor_idx); + default: + UNREACHABLE(); + } + }; + + inst.Invalidate(); + if (num_dwords == 1) { + ir.SetPatch(get_factor_attr(gcn_factor_idx), GetValue(data)); + break; + } + auto* inst = data.TryInstRecursive(); + ASSERT(inst && (inst->GetOpcode() == IR::Opcode::CompositeConstructU32x2 || + inst->GetOpcode() == IR::Opcode::CompositeConstructU32x3 || + inst->GetOpcode() == IR::Opcode::CompositeConstructU32x4)); + for (s32 i = 0; i < num_dwords; i++) { + ir.SetPatch(get_factor_attr(gcn_factor_idx + i), GetValue(inst->Arg(i))); + } + break; + } + + case IR::Opcode::WriteSharedU32: + case IR::Opcode::WriteSharedU64: + case IR::Opcode::WriteSharedU128: { + IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; + const u32 num_dwords = opcode == IR::Opcode::WriteSharedU32 + ? 1 + : (opcode == IR::Opcode::WriteSharedU64 ? 2 : 4); + const IR::U32 addr{inst.Arg(0)}; + const IR::U32 data{inst.Arg(1).Resolve()}; + + const auto SetOutput = [&](IR::U32 addr, IR::U32 value, AttributeRegion output_kind, + u32 off_dw) { + const IR::F32 data_component = ir.BitCast(value); + + if (output_kind == AttributeRegion::OutputCP) { + if (off_dw > 0) { + addr = ir.IAdd(addr, ir.Imm32(off_dw)); + } + u32 stride = runtime_info.hs_info.hs_output_cp_stride; + // Invocation ID array index is implicit, handled by SPIRV backend + const IR::U32 addr_for_attrs = TryOptimizeAddressModulo(addr, stride, ir); + const IR::U32 attr_index = ir.ShiftRightLogical( + ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u)); + const IR::U32 comp_index = ir.ShiftRightLogical( + ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u)); + ir.SetTcsGenericAttribute(data_component, attr_index, comp_index); + } else { + ASSERT(output_kind == AttributeRegion::PatchConst); + ASSERT_MSG(addr.IsImmediate(), "patch addr non imm, inst {}", + fmt::ptr(addr.Inst())); + ir.SetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw), data_component); + } + }; + + AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info); + if (num_dwords == 1) { + SetOutput(addr, data, region, 0); + } else { + for (auto i = 0; i < num_dwords; i++) { + SetOutput(addr, IR::U32{data.Inst()->Arg(i)}, region, i); + } + } + inst.Invalidate(); + break; + } + + case IR::Opcode::LoadSharedU32: { + case IR::Opcode::LoadSharedU64: + case IR::Opcode::LoadSharedU128: + IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; + const IR::U32 addr{inst.Arg(0)}; + AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info); + const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32 + ? 1 + : (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4); + ASSERT_MSG(region == AttributeRegion::InputCP, + "Unhandled read of output or patchconst attribute in hull shader"); + IR::Value attr_read; + if (num_dwords == 1) { + attr_read = ir.BitCast( + ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, 0)); + } else { + boost::container::static_vector read_components; + for (auto i = 0; i < num_dwords; i++) { + const IR::F32 component = + ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, i); + read_components.push_back(ir.BitCast(component)); + } + attr_read = ir.CompositeConstruct(read_components); + } + inst.ReplaceUsesWithAndRemove(attr_read); + break; + } + + default: + break; + } + } + } + + if (runtime_info.hs_info.IsPassthrough()) { + // Copy input attributes to output attributes, indexed by InvocationID + // Passthrough should imply that input and output patches have same number of vertices + IR::Block* entry_block = *program.blocks.begin(); + auto it = std::ranges::find_if(entry_block->Instructions(), [](IR::Inst& inst) { + return inst.GetOpcode() == IR::Opcode::Prologue; + }); + ASSERT(it != entry_block->end()); + ++it; + ASSERT(it != entry_block->end()); + ++it; + // Prologue + // SetExec #true + // <- insert here + // ... + IR::IREmitter ir{*entry_block, it}; + + ASSERT(runtime_info.hs_info.ls_stride % 16 == 0); + u32 num_attributes = runtime_info.hs_info.ls_stride / 16; + const auto invocation_id = ir.GetAttributeU32(IR::Attribute::InvocationId); + for (u32 attr_no = 0; attr_no < num_attributes; attr_no++) { + for (u32 comp = 0; comp < 4; comp++) { + IR::F32 attr_read = + ir.GetTessGenericAttribute(invocation_id, ir.Imm32(attr_no), ir.Imm32(comp)); + // InvocationId is implicit index for output control point writes + ir.SetTcsGenericAttribute(attr_read, ir.Imm32(attr_no), ir.Imm32(comp)); + } + } + // We could wrap the rest of the program in an if stmt + // CopyInputAttrsToOutputs(); // psuedocode + // if (InvocationId == 0) { + // PatchConstFunction(); + // } + // But as long as we treat invocation ID as 0 for all threads, shouldn't matter functionally + } +} + +void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { + Info& info = program.info; + + for (IR::Block* block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; + const auto opcode = inst.GetOpcode(); + switch (inst.GetOpcode()) { + case IR::Opcode::LoadSharedU32: { + case IR::Opcode::LoadSharedU64: + case IR::Opcode::LoadSharedU128: + const IR::U32 addr{inst.Arg(0)}; + AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info); + const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32 + ? 1 + : (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4); + const auto GetInput = [&](IR::U32 addr, u32 off_dw) -> IR::F32 { + if (region == AttributeRegion::OutputCP) { + return ReadTessInputComponent( + addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw); + } else { + ASSERT(region == AttributeRegion::PatchConst); + return ir.GetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw)); + } + }; + IR::Value attr_read; + if (num_dwords == 1) { + attr_read = ir.BitCast(GetInput(addr, 0)); + } else { + boost::container::static_vector read_components; + for (auto i = 0; i < num_dwords; i++) { + const IR::F32 component = GetInput(addr, i); + read_components.push_back(ir.BitCast(component)); + } + attr_read = ir.CompositeConstruct(read_components); + } + inst.ReplaceUsesWithAndRemove(attr_read); + break; + } + default: + break; + } + } + } +} + +// Run before either hull or domain transform +void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info) { + TessellationDataConstantBuffer tess_constants; + Shader::Info& info = program.info; + // Find the TessellationDataConstantBuffer V# + for (IR::Block* block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + auto found_tess_consts_sharp = [&]() -> bool { + switch (inst.GetOpcode()) { + case IR::Opcode::LoadSharedU32: + case IR::Opcode::LoadSharedU64: + case IR::Opcode::LoadSharedU128: + case IR::Opcode::WriteSharedU32: + case IR::Opcode::WriteSharedU64: + case IR::Opcode::WriteSharedU128: { + IR::Value addr = inst.Arg(0); + auto read_const_buffer = IR::BreadthFirstSearch( + addr, [](IR::Inst* maybe_tess_const) -> std::optional { + if (maybe_tess_const->GetOpcode() == IR::Opcode::ReadConstBuffer) { + return maybe_tess_const; + } + return std::nullopt; + }); + if (read_const_buffer) { + auto sharp_location = FindTessConstantSharp(read_const_buffer.value()); + if (sharp_location) { + if (info.tess_consts_dword_offset >= 0) { + // Its possible theres a readconstbuffer that contributes to an + // LDS address and isnt a TessConstant V# read. Could improve on + // this somehow + ASSERT_MSG(static_cast(sharp_location->dword_off) == + info.tess_consts_dword_offset && + sharp_location->ptr_base == + info.tess_consts_ptr_base, + "TessConstants V# is ambiguous"); + } + InitTessConstants(sharp_location->ptr_base, + static_cast(sharp_location->dword_off), info, + runtime_info, tess_constants); + return true; + } + UNREACHABLE_MSG("Failed to match tess constant sharp"); + } + return false; + } + default: + return false; + } + }(); + + if (found_tess_consts_sharp) { + break; + } + } + } + + ASSERT(info.tess_consts_dword_offset >= 0); + + TessConstantUseWalker walker; + + for (IR::Block* block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) { + auto sharp_location = FindTessConstantSharp(&inst); + if (sharp_location && sharp_location->ptr_base == info.tess_consts_ptr_base && + sharp_location->dword_off == info.tess_consts_dword_offset) { + // The shader is reading from the TessConstants V# + IR::Value index = inst.Arg(1); + + ASSERT_MSG(index.IsImmediate(), + "Tessellation constant read with dynamic index"); + u32 off_dw = index.U32(); + ASSERT(off_dw <= + static_cast(TessConstantAttribute::FirstEdgeTessFactorIndex)); + + auto tess_const_attr = static_cast(off_dw); + switch (tess_const_attr) { + case TessConstantAttribute::LsStride: + // If not, we may need to make this runtime state for TES + ASSERT(info.l_stage == LogicalStage::TessellationControl); + inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.ls_stride)); + break; + case TessConstantAttribute::HsCpStride: + inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.hs_cp_stride)); + break; + case TessConstantAttribute::HsNumPatch: + case TessConstantAttribute::HsOutputBase: + case TessConstantAttribute::PatchConstBase: + walker.MarkTessAttributeUsers(&inst, tess_const_attr); + // We should be able to safely set these to 0 so that indexing happens only + // within the local patch in the recompiled Vulkan shader. This assumes + // these values only contribute to address calculations for in/out + // attributes in the original gcn shader. + // See the explanation for why we set V2 to 0 when emitting the prologue. + inst.ReplaceUsesWithAndRemove(IR::Value(0u)); + break; + case Shader::TessConstantAttribute::PatchConstSize: + case Shader::TessConstantAttribute::PatchOutputSize: + case Shader::TessConstantAttribute::OffChipTessellationFactorThreshold: + case Shader::TessConstantAttribute::FirstEdgeTessFactorIndex: + // May need to replace PatchConstSize and PatchOutputSize with 0 + break; + default: + UNREACHABLE_MSG("Read past end of TessConstantsBuffer"); + } + } + } + } + } + + // These pattern matching are neccessary for now unless we support dynamic indexing of + // PatchConst attributes and tess factors. PatchConst should be easy, turn those into a single + // vec4 array like in/out attrs. Not sure about tess factors. + if (info.l_stage == LogicalStage::TessellationControl) { + // Replace the BFEs on V1 (packed with patch id within VGT and output cp id) + for (IR::Block* block : program.blocks) { + for (auto it = block->Instructions().begin(); it != block->Instructions().end(); it++) { + IR::Inst& inst = *it; + if (M_BITFIELDUEXTRACT( + M_GETATTRIBUTEU32(MatchAttribute(IR::Attribute::PackedHullInvocationInfo), + MatchIgnore()), + MatchU32(0), MatchU32(8)) + .Match(IR::Value{&inst})) { + IR::IREmitter emit(*block, it); + // This is the patch id within the VGT, not the actual PrimitiveId + // in the draw + IR::Value replacement(0u); + inst.ReplaceUsesWithAndRemove(replacement); + } else if (M_BITFIELDUEXTRACT( + M_GETATTRIBUTEU32( + MatchAttribute(IR::Attribute::PackedHullInvocationInfo), + MatchIgnore()), + MatchU32(8), MatchU32(5)) + .Match(IR::Value{&inst})) { + IR::IREmitter ir(*block, it); + IR::Value replacement; + if (runtime_info.hs_info.IsPassthrough()) { + // Deal with annoying pattern in BB where InvocationID use makes no + // sense (in addr calculation for patchconst or tess factor write) + replacement = ir.Imm32(0); + } else { + replacement = ir.GetAttributeU32(IR::Attribute::InvocationId); + } + inst.ReplaceUsesWithAndRemove(replacement); + } + } + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index 7bd47992c..8a71d9e1f 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -6,6 +6,10 @@ #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/program.h" +namespace Shader { +struct Profile; +} + namespace Shader::Optimization { void SsaRewritePass(IR::BlockList& program); @@ -18,5 +22,9 @@ void CollectShaderInfoPass(IR::Program& program); void LowerSharedMemToRegisters(IR::Program& program); void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info, Stage stage); +void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info); +void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info); +void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info); +void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 89c5c78a0..a59398952 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -137,6 +137,35 @@ bool IsImageInstruction(const IR::Inst& inst) { } } +IR::Value SwizzleVector(IR::IREmitter& ir, auto sharp, IR::Value texel) { + boost::container::static_vector comps; + for (u32 i = 0; i < 4; i++) { + switch (sharp.GetSwizzle(i)) { + case AmdGpu::CompSwizzle::Zero: + comps.emplace_back(ir.Imm32(0.f)); + break; + case AmdGpu::CompSwizzle::One: + comps.emplace_back(ir.Imm32(1.f)); + break; + case AmdGpu::CompSwizzle::Red: + comps.emplace_back(ir.CompositeExtract(texel, 0)); + break; + case AmdGpu::CompSwizzle::Green: + comps.emplace_back(ir.CompositeExtract(texel, 1)); + break; + case AmdGpu::CompSwizzle::Blue: + comps.emplace_back(ir.CompositeExtract(texel, 2)); + break; + case AmdGpu::CompSwizzle::Alpha: + comps.emplace_back(ir.CompositeExtract(texel, 3)); + break; + default: + UNREACHABLE(); + } + } + return ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]); +}; + class Descriptors { public: explicit Descriptors(Info& info_) @@ -388,6 +417,15 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; inst.SetArg(0, ir.Imm32(binding)); ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable); + + // Apply dst_sel swizzle on formatted buffer instructions + if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) { + inst.SetArg(2, SwizzleVector(ir, buffer, inst.Arg(2))); + } else { + const auto inst_info = inst.Flags(); + const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info); + inst.ReplaceUsesWith(SwizzleVector(ir, buffer, texel)); + } } IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t, @@ -420,26 +458,29 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors, const IR::Inst* producer, const u32 image_binding, const AmdGpu::Image& image) { // Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions - const u32 sampler_binding = [&] { + const auto [sampler_binding, sampler] = [&] -> std::pair { ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2); const IR::Value& handle = producer->Arg(1); // Inline sampler resource. if (handle.IsImmediate()) { LOG_WARNING(Render_Vulkan, "Inline sampler detected"); - return descriptors.Add(SamplerResource{ + const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()}; + const auto binding = descriptors.Add(SamplerResource{ .sharp_idx = std::numeric_limits::max(), - .inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()}, + .inline_sampler = inline_sampler, }); + return {binding, inline_sampler}; } // Normal sampler resource. const auto ssharp_handle = handle.InstRecursive(); const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle); const auto ssharp = TrackSharp(ssharp_ud, info); - return descriptors.Add(SamplerResource{ + const auto binding = descriptors.Add(SamplerResource{ .sharp_idx = ssharp, .associated_image = image_binding, .disable_aniso = disable_aniso, }); + return {binding, info.ReadUdSharp(ssharp)}; }(); IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; @@ -539,28 +580,47 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info, } }(); + const auto unnormalized = sampler.force_unnormalized || inst_info.is_unnormalized; + // Query dimensions of image if needed for normalization. + // We can't use the image sharp because it could be bound to a different image later. + const auto dimensions = + unnormalized ? ir.ImageQueryDimension(ir.Imm32(image_binding), ir.Imm32(0u), ir.Imm1(false)) + : IR::Value{}; + const auto get_coord = [&](u32 coord_idx, u32 dim_idx) -> IR::Value { + const auto coord = get_addr_reg(coord_idx); + if (unnormalized) { + // Normalize the coordinate for sampling, dividing by its corresponding dimension. + const auto dim = + ir.ConvertUToF(32, 32, IR::U32{ir.CompositeExtract(dimensions, dim_idx)}); + return ir.FPDiv(coord, dim); + } + return coord; + }; + // Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler const IR::Value coords = [&] -> IR::Value { switch (image.GetType()) { case AmdGpu::ImageType::Color1D: // x addr_reg = addr_reg + 1; - return get_addr_reg(addr_reg - 1); + return get_coord(addr_reg - 1, 0); case AmdGpu::ImageType::Color1DArray: // x, slice [[fallthrough]]; case AmdGpu::ImageType::Color2D: // x, y addr_reg = addr_reg + 2; - return ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1)); + return ir.CompositeConstruct(get_coord(addr_reg - 2, 0), get_coord(addr_reg - 1, 1)); case AmdGpu::ImageType::Color2DArray: // x, y, slice [[fallthrough]]; case AmdGpu::ImageType::Color2DMsaa: // x, y, frag - [[fallthrough]]; + addr_reg = addr_reg + 3; + return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1), + get_addr_reg(addr_reg - 1)); case AmdGpu::ImageType::Color3D: // x, y, z addr_reg = addr_reg + 3; - return ir.CompositeConstruct(get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2), - get_addr_reg(addr_reg - 1)); + return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1), + get_coord(addr_reg - 1, 2)); case AmdGpu::ImageType::Cube: // x, y, face addr_reg = addr_reg + 3; - return PatchCubeCoord(ir, get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2), + return PatchCubeCoord(ir, get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1), get_addr_reg(addr_reg - 1), false, inst_info.is_array); default: UNREACHABLE(); @@ -711,11 +771,17 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip }(); inst.SetArg(1, coords); + if (inst.GetOpcode() == IR::Opcode::ImageWrite) { + inst.SetArg(3, SwizzleVector(ir, image, inst.Arg(3))); + } + if (inst_info.has_lod) { - ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch); + ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch || + inst.GetOpcode() == IR::Opcode::ImageRead || + inst.GetOpcode() == IR::Opcode::ImageWrite); ASSERT(image.GetType() != AmdGpu::ImageType::Color2DMsaa && image.GetType() != AmdGpu::ImageType::Color2DMsaaArray); - inst.SetArg(3, arg); + inst.SetArg(2, arg); } else if (image.GetType() == AmdGpu::ImageType::Color2DMsaa || image.GetType() == AmdGpu::ImageType::Color2DMsaaArray) { inst.SetArg(4, arg); diff --git a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp index eb1be2967..d6f1efb12 100644 --- a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp +++ b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp @@ -1,11 +1,13 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/assert.h" #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/opcodes.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/recompiler.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Optimization { @@ -23,12 +25,45 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim }; switch (stage) { + case Stage::Local: { + ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) { + const auto opcode = inst.GetOpcode(); + switch (opcode) { + case IR::Opcode::WriteSharedU64: + case IR::Opcode::WriteSharedU32: { + bool is_composite = opcode == IR::Opcode::WriteSharedU64; + u32 num_components = opcode == IR::Opcode::WriteSharedU32 ? 1 : 2; + + u32 offset = 0; + const auto* addr = inst.Arg(0).InstRecursive(); + if (addr->GetOpcode() == IR::Opcode::IAdd32) { + ASSERT(addr->Arg(1).IsImmediate()); + offset = addr->Arg(1).U32(); + } + IR::Value data = inst.Arg(1).Resolve(); + for (s32 i = 0; i < num_components; i++) { + const auto attrib = IR::Attribute::Param0 + (offset / 16); + const auto comp = (offset / 4) % 4; + const IR::U32 value = IR::U32{is_composite ? data.Inst()->Arg(i) : data}; + ir.SetAttribute(attrib, ir.BitCast(value), comp); + offset += 4; + } + inst.Invalidate(); + break; + } + default: + break; + } + }); + break; + } case Stage::Export: { ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) { const auto opcode = inst.GetOpcode(); switch (opcode) { case IR::Opcode::StoreBufferU32: { - if (!inst.Flags().ring_access) { + const auto info = inst.Flags(); + if (!info.system_coherent || !info.globally_coherent) { break; } @@ -61,12 +96,13 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim const auto opcode = inst.GetOpcode(); switch (opcode) { case IR::Opcode::LoadBufferU32: { - if (!inst.Flags().ring_access) { + const auto info = inst.Flags(); + if (!info.system_coherent || !info.globally_coherent) { break; } const auto shl_inst = inst.Arg(1).TryInstRecursive(); - const auto vertex_id = shl_inst->Arg(0).Resolve().U32() >> 2; + const auto vertex_id = ir.Imm32(shl_inst->Arg(0).Resolve().U32() >> 2); const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1); const auto bucket = offset.Resolve().U32() / 256u; const auto attrib = bucket < 4 ? IR::Attribute::Position0 @@ -80,7 +116,8 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim break; } case IR::Opcode::StoreBufferU32: { - if (!inst.Flags().ring_access) { + const auto buffer_info = inst.Flags(); + if (!buffer_info.system_coherent || !buffer_info.globally_coherent) { break; } diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index 8b93d72e3..c34b59b88 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -17,6 +17,22 @@ void Visit(Info& info, IR::Inst& inst) { case IR::Opcode::GetUserData: info.ud_mask.Set(inst.Arg(0).ScalarReg()); break; + case IR::Opcode::SetPatch: { + const auto patch = inst.Arg(0).Patch(); + if (patch <= IR::Patch::TessellationLodBottom) { + info.stores_tess_level_outer = true; + } else if (patch <= IR::Patch::TessellationLodInteriorV) { + info.stores_tess_level_inner = true; + } else { + info.uses_patches |= 1U << IR::GenericPatchIndex(patch); + } + break; + } + case IR::Opcode::GetPatch: { + const auto patch = inst.Arg(0).Patch(); + info.uses_patches |= 1U << IR::GenericPatchIndex(patch); + break; + } case IR::Opcode::LoadSharedU32: case IR::Opcode::LoadSharedU64: case IR::Opcode::WriteSharedU32: diff --git a/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp b/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp new file mode 100644 index 000000000..aad8fb148 --- /dev/null +++ b/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp @@ -0,0 +1,47 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/ir/breadth_first_search.h" +#include "shader_recompiler/ir/ir_emitter.h" +#include "shader_recompiler/ir/program.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Optimization { + +void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) { + if (!program.info.uses_shared || !profile.needs_lds_barriers) { + return; + } + using Type = IR::AbstractSyntaxNode::Type; + u32 branch_depth{}; + for (const IR::AbstractSyntaxNode& node : program.syntax_list) { + if (node.type == Type::EndIf) { + --branch_depth; + continue; + } + if (node.type != Type::If) { + continue; + } + u32 curr_depth = branch_depth++; + if (curr_depth != 0) { + continue; + } + const IR::U1 cond = node.data.if_node.cond; + const auto insert_barrier = + IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional { + if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 && + inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) { + return true; + } + return std::nullopt; + }); + if (insert_barrier) { + IR::Block* const merge = node.data.if_node.merge; + auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi); + IR::IREmitter ir{*merge, insert_point}; + ir.Barrier(); + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/patch.cpp b/src/shader_recompiler/ir/patch.cpp new file mode 100644 index 000000000..2485bc5b4 --- /dev/null +++ b/src/shader_recompiler/ir/patch.cpp @@ -0,0 +1,28 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/ir/patch.h" + +namespace Shader::IR { + +std::string NameOf(Patch patch) { + switch (patch) { + case Patch::TessellationLodLeft: + return "TessellationLodLeft"; + case Patch::TessellationLodTop: + return "TessellationLodTop"; + case Patch::TessellationLodRight: + return "TessellationLodRight"; + case Patch::TessellationLodBottom: + return "TessellationLodBottom"; + case Patch::TessellationLodInteriorU: + return "TessellationLodInteriorU"; + case Patch::TessellationLodInteriorV: + return "TessellationLodInteriorV"; + default: + const u32 index = u32(patch) - u32(Patch::Component0); + return fmt::format("Component{}", index); + } +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir/patch.h b/src/shader_recompiler/ir/patch.h new file mode 100644 index 000000000..65d2192e6 --- /dev/null +++ b/src/shader_recompiler/ir/patch.h @@ -0,0 +1,173 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "common/types.h" + +namespace Shader::IR { + +enum class Patch : u64 { + TessellationLodLeft, + TessellationLodTop, + TessellationLodRight, + TessellationLodBottom, + TessellationLodInteriorU, + TessellationLodInteriorV, + Component0, + Component1, + Component2, + Component3, + Component4, + Component5, + Component6, + Component7, + Component8, + Component9, + Component10, + Component11, + Component12, + Component13, + Component14, + Component15, + Component16, + Component17, + Component18, + Component19, + Component20, + Component21, + Component22, + Component23, + Component24, + Component25, + Component26, + Component27, + Component28, + Component29, + Component30, + Component31, + Component32, + Component33, + Component34, + Component35, + Component36, + Component37, + Component38, + Component39, + Component40, + Component41, + Component42, + Component43, + Component44, + Component45, + Component46, + Component47, + Component48, + Component49, + Component50, + Component51, + Component52, + Component53, + Component54, + Component55, + Component56, + Component57, + Component58, + Component59, + Component60, + Component61, + Component62, + Component63, + Component64, + Component65, + Component66, + Component67, + Component68, + Component69, + Component70, + Component71, + Component72, + Component73, + Component74, + Component75, + Component76, + Component77, + Component78, + Component79, + Component80, + Component81, + Component82, + Component83, + Component84, + Component85, + Component86, + Component87, + Component88, + Component89, + Component90, + Component91, + Component92, + Component93, + Component94, + Component95, + Component96, + Component97, + Component98, + Component99, + Component100, + Component101, + Component102, + Component103, + Component104, + Component105, + Component106, + Component107, + Component108, + Component109, + Component110, + Component111, + Component112, + Component113, + Component114, + Component115, + Component116, + Component117, + Component118, + Component119, +}; +static_assert(static_cast(Patch::Component119) == 125); + +constexpr bool IsGeneric(Patch patch) noexcept { + return patch >= Patch::Component0 && patch <= Patch::Component119; +} + +constexpr Patch PatchFactor(u32 index) { + return static_cast(index); +} + +constexpr Patch PatchGeneric(u32 index) { + return static_cast(static_cast(Patch::Component0) + index); +} + +constexpr u32 GenericPatchIndex(Patch patch) { + return (static_cast(patch) - static_cast(Patch::Component0)) / 4; +} + +constexpr u32 GenericPatchElement(Patch patch) { + return (static_cast(patch) - static_cast(Patch::Component0)) % 4; +} + +[[nodiscard]] std::string NameOf(Patch patch); + +} // namespace Shader::IR + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(const Shader::IR::Patch patch, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(patch)); + } +}; diff --git a/src/shader_recompiler/ir/pattern_matching.h b/src/shader_recompiler/ir/pattern_matching.h new file mode 100644 index 000000000..1279f14c3 --- /dev/null +++ b/src/shader_recompiler/ir/pattern_matching.h @@ -0,0 +1,127 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/ir/attribute.h" +#include "shader_recompiler/ir/value.h" + +namespace Shader::Optimiation::PatternMatching { + +// Attempt at pattern matching for Insts and Values +// Needs improvement, mostly a convenience + +template +struct MatchObject { + inline bool Match(IR::Value v) { + return static_cast(this)->Match(v); + } +}; + +struct MatchValue : MatchObject { + MatchValue(IR::Value& return_val_) : return_val(return_val_) {} + + inline bool Match(IR::Value v) { + return_val = v; + return true; + } + +private: + IR::Value& return_val; +}; + +struct MatchIgnore : MatchObject { + MatchIgnore() {} + + inline bool Match(IR::Value v) { + return true; + } +}; + +struct MatchImm : MatchObject { + MatchImm(IR::Value& v) : return_val(v) {} + + inline bool Match(IR::Value v) { + if (!v.IsImmediate()) { + return false; + } + + return_val = v; + return true; + } + +private: + IR::Value& return_val; +}; + +struct MatchAttribute : MatchObject { + MatchAttribute(IR::Attribute attribute_) : attribute(attribute_) {} + + inline bool Match(IR::Value v) { + return v.Type() == IR::Type::Attribute && v.Attribute() == attribute; + } + +private: + IR::Attribute attribute; +}; + +struct MatchU32 : MatchObject { + MatchU32(u32 imm_) : imm(imm_) {} + + inline bool Match(IR::Value v) { + return v.IsImmediate() && v.Type() == IR::Type::U32 && v.U32() == imm; + } + +private: + u32 imm; +}; + +template +struct MatchInstObject : MatchObject> { + static_assert(sizeof...(Args) == IR::NumArgsOf(opcode)); + MatchInstObject(Args&&... args) : pattern(std::forward_as_tuple(args...)) {} + + inline bool Match(IR::Value v) { + IR::Inst* inst = v.TryInstRecursive(); + if (!inst || inst->GetOpcode() != opcode) { + return false; + } + + bool matched = true; + + [&](std::index_sequence) { + ((matched = matched && std::get(pattern).Match(inst->Arg(Is))), ...); + }(std::make_index_sequence{}); + + return matched; + } + +private: + using MatchArgs = std::tuple; + MatchArgs pattern; +}; + +template +inline auto MakeInstPattern(Args&&... args) { + return MatchInstObject(std::forward(args)...); +} + +// Conveniences. TODO probably simpler way of doing this +#define M_READCONST(...) MakeInstPattern(__VA_ARGS__) +#define M_GETUSERDATA(...) MakeInstPattern(__VA_ARGS__) +#define M_BITFIELDUEXTRACT(...) MakeInstPattern(__VA_ARGS__) +#define M_BITFIELDSEXTRACT(...) MakeInstPattern(__VA_ARGS__) +#define M_GETATTRIBUTEU32(...) MakeInstPattern(__VA_ARGS__) +#define M_UMOD32(...) MakeInstPattern(__VA_ARGS__) +#define M_SHIFTRIGHTLOGICAL32(...) MakeInstPattern(__VA_ARGS__) +#define M_IADD32(...) MakeInstPattern(__VA_ARGS__) +#define M_IMUL32(...) MakeInstPattern(__VA_ARGS__) +#define M_BITWISEAND32(...) MakeInstPattern(__VA_ARGS__) +#define M_GETTESSGENERICATTRIBUTE(...) \ + MakeInstPattern(__VA_ARGS__) +#define M_SETTCSGENERICATTRIBUTE(...) \ + MakeInstPattern(__VA_ARGS__) +#define M_COMPOSITECONSTRUCTU32X2(...) \ + MakeInstPattern(__VA_ARGS__) +#define M_COMPOSITECONSTRUCTU32X4(...) \ + MakeInstPattern(__VA_ARGS__) + +} // namespace Shader::Optimiation::PatternMatching \ No newline at end of file diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index 3004d2b86..19e0da3dd 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -40,7 +40,8 @@ union TextureInstInfo { BitField<6, 2, u32> gather_comp; BitField<8, 1, u32> has_derivatives; BitField<9, 1, u32> is_array; - BitField<10, 1, u32> is_gather; + BitField<10, 1, u32> is_unnormalized; + BitField<11, 1, u32> is_gather; }; union BufferInstInfo { @@ -48,7 +49,8 @@ union BufferInstInfo { BitField<0, 1, u32> index_enable; BitField<1, 1, u32> offset_enable; BitField<2, 12, u32> inst_offset; - BitField<14, 1, u32> ring_access; // global + system coherency + BitField<14, 1, u32> system_coherent; + BitField<15, 1, u32> globally_coherent; }; enum class ScalarReg : u32 { diff --git a/src/shader_recompiler/ir/type.h b/src/shader_recompiler/ir/type.h index ec855a77e..0f043fb64 100644 --- a/src/shader_recompiler/ir/type.h +++ b/src/shader_recompiler/ir/type.h @@ -15,7 +15,7 @@ enum class Type { ScalarReg = 1 << 1, VectorReg = 1 << 2, Attribute = 1 << 3, - SystemValue = 1 << 4, + Patch = 1 << 4, U1 = 1 << 5, U8 = 1 << 6, U16 = 1 << 7, diff --git a/src/shader_recompiler/ir/value.cpp b/src/shader_recompiler/ir/value.cpp index 889e99556..8826b80f2 100644 --- a/src/shader_recompiler/ir/value.cpp +++ b/src/shader_recompiler/ir/value.cpp @@ -16,6 +16,8 @@ Value::Value(IR::VectorReg reg) noexcept : type{Type::VectorReg}, vreg{reg} {} Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {} +Value::Value(IR::Patch patch) noexcept : type{Type::Patch}, patch{patch} {} + Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {} Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {} diff --git a/src/shader_recompiler/ir/value.h b/src/shader_recompiler/ir/value.h index dbe8b5cc4..ed1e5536a 100644 --- a/src/shader_recompiler/ir/value.h +++ b/src/shader_recompiler/ir/value.h @@ -16,6 +16,7 @@ #include "shader_recompiler/exception.h" #include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/opcodes.h" +#include "shader_recompiler/ir/patch.h" #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/type.h" @@ -34,6 +35,7 @@ public: explicit Value(IR::ScalarReg reg) noexcept; explicit Value(IR::VectorReg reg) noexcept; explicit Value(IR::Attribute value) noexcept; + explicit Value(IR::Patch patch) noexcept; explicit Value(bool value) noexcept; explicit Value(u8 value) noexcept; explicit Value(u16 value) noexcept; @@ -56,6 +58,7 @@ public: [[nodiscard]] IR::ScalarReg ScalarReg() const; [[nodiscard]] IR::VectorReg VectorReg() const; [[nodiscard]] IR::Attribute Attribute() const; + [[nodiscard]] IR::Patch Patch() const; [[nodiscard]] bool U1() const; [[nodiscard]] u8 U8() const; [[nodiscard]] u16 U16() const; @@ -75,6 +78,7 @@ private: IR::ScalarReg sreg; IR::VectorReg vreg; IR::Attribute attribute; + IR::Patch patch; bool imm_u1; u8 imm_u8; u16 imm_u16; @@ -330,6 +334,11 @@ inline IR::Attribute Value::Attribute() const { return attribute; } +inline IR::Patch Value::Patch() const { + DEBUG_ASSERT(type == Type::Patch); + return patch; +} + inline bool Value::U1() const { if (IsIdentity()) { return inst->Arg(0).U1(); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 96c458d44..fc8c5956e 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -23,9 +23,11 @@ struct Profile { bool support_fp32_denorm_flush{}; bool support_explicit_workgroup_layout{}; bool support_legacy_vertex_attributes{}; + bool supports_image_load_store_lod{}; bool has_broken_spirv_clamp{}; bool lower_left_origin_mode{}; bool needs_manual_interpolation{}; + bool needs_lds_barriers{}; u64 min_ssbo_alignment{}; }; diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 64f842c42..bb027a11e 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -1,6 +1,9 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/config.h" +#include "common/io_file.h" +#include "common/path_util.h" #include "shader_recompiler/frontend/control_flow_graph.h" #include "shader_recompiler/frontend/decode.h" #include "shader_recompiler/frontend/structured_control_flow.h" @@ -29,7 +32,7 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { } IR::Program TranslateProgram(std::span code, Pools& pools, Info& info, - const RuntimeInfo& runtime_info, const Profile& profile) { + RuntimeInfo& runtime_info, const Profile& profile) { // Ensure first instruction is expected. constexpr u32 token_mov_vcchi = 0xBEEB03FF; if (code[0] != token_mov_vcchi) { @@ -60,17 +63,35 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); // Run optimization passes + const auto stage = program.info.stage; + Shader::Optimization::SsaRewritePass(program.post_order_blocks); + Shader::Optimization::IdentityRemovalPass(program.blocks); + if (info.l_stage == LogicalStage::TessellationControl) { + // Tess passes require previous const prop passes for now (for simplicity). TODO allow + // fine grained folding or opportunistic folding we set an operand to an immediate + Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); + Shader::Optimization::TessellationPreprocess(program, runtime_info); + Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); + Shader::Optimization::HullShaderTransform(program, runtime_info); + } else if (info.l_stage == LogicalStage::TessellationEval) { + Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); + Shader::Optimization::TessellationPreprocess(program, runtime_info); + Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); + Shader::Optimization::DomainShaderTransform(program, runtime_info); + } Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); - if (program.info.stage != Stage::Compute) { + Shader::Optimization::RingAccessElimination(program, runtime_info, stage); + if (stage != Stage::Compute) { Shader::Optimization::LowerSharedMemToRegisters(program); } - Shader::Optimization::RingAccessElimination(program, runtime_info, program.info.stage); + Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); Shader::Optimization::FlattenExtendedUserdataPass(program); Shader::Optimization::ResourceTrackingPass(program); Shader::Optimization::IdentityRemovalPass(program.blocks); Shader::Optimization::DeadCodeEliminationPass(program); Shader::Optimization::CollectShaderInfoPass(program); + Shader::Optimization::SharedMemoryBarrierPass(program, profile); return program; } diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h index f8acf6c9e..8180c29b3 100644 --- a/src/shader_recompiler/recompiler.h +++ b/src/shader_recompiler/recompiler.h @@ -28,6 +28,6 @@ struct Pools { }; [[nodiscard]] IR::Program TranslateProgram(std::span code, Pools& pools, Info& info, - const RuntimeInfo& runtime_info, const Profile& profile); + RuntimeInfo& runtime_info, const Profile& profile); } // namespace Shader diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 4c779a368..23e23c118 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -7,6 +7,7 @@ #include #include #include "common/types.h" +#include "shader_recompiler/frontend/tessellation.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/types.h" @@ -21,12 +22,31 @@ enum class Stage : u32 { Local, Compute, }; -constexpr u32 MaxStageTypes = 7; + +// Vertex intentionally comes after TCS/TES due to order of compilation +enum class LogicalStage : u32 { + Fragment, + TessellationControl, + TessellationEval, + Vertex, + Geometry, + Compute, + NumLogicalStages +}; + +constexpr u32 MaxStageTypes = static_cast(LogicalStage::NumLogicalStages); [[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept { return static_cast(index); } +struct LocalRuntimeInfo { + u32 ls_stride; + bool links_with_tcs; + + auto operator<=>(const LocalRuntimeInfo&) const noexcept = default; +}; + struct ExportRuntimeInfo { u32 vertex_data_size; @@ -64,9 +84,57 @@ struct VertexRuntimeInfo { u32 num_outputs; std::array outputs; bool emulate_depth_negative_one_to_one{}; + // Domain + AmdGpu::TessellationType tess_type; + AmdGpu::TessellationTopology tess_topology; + AmdGpu::TessellationPartitioning tess_partitioning; + u32 hs_output_cp_stride{}; bool operator==(const VertexRuntimeInfo& other) const noexcept { - return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one; + return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one && + tess_type == other.tess_type && tess_topology == other.tess_topology && + tess_partitioning == other.tess_partitioning && + hs_output_cp_stride == other.hs_output_cp_stride; + } + + void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) { + hs_output_cp_stride = tess_constants.hs_cp_stride; + } +}; + +struct HullRuntimeInfo { + // from registers + u32 num_input_control_points; + u32 num_threads; + AmdGpu::TessellationType tess_type; + + // from tess constants buffer + u32 ls_stride; + u32 hs_output_cp_stride; + u32 hs_output_base; + + auto operator<=>(const HullRuntimeInfo&) const noexcept = default; + + // It might be possible for a non-passthrough TCS to have these conditions, in some + // dumb situation. + // In that case, it should be fine to assume passthrough and declare some extra + // output control points and attributes that shouldnt be read by the TES anyways + bool IsPassthrough() const { + return hs_output_base == 0 && ls_stride == hs_output_cp_stride && num_threads == 1; + }; + + // regs.ls_hs_config.hs_output_control_points contains the number of threads, which + // isn't exactly the number of output control points. + // For passthrough shaders, the register field is set to 1, so use the number of + // input control points + u32 NumOutputControlPoints() const { + return IsPassthrough() ? num_input_control_points : num_threads; + } + + void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) { + ls_stride = tess_constants.ls_stride; + hs_output_cp_stride = tess_constants.hs_cp_stride; + hs_output_base = tess_constants.hs_output_base; } }; @@ -150,8 +218,10 @@ struct RuntimeInfo { AmdGpu::FpDenormMode fp_denorm_mode32; AmdGpu::FpRoundMode fp_round_mode32; union { + LocalRuntimeInfo ls_info; ExportRuntimeInfo es_info; VertexRuntimeInfo vs_info; + HullRuntimeInfo hs_info; GeometryRuntimeInfo gs_info; FragmentRuntimeInfo fs_info; ComputeRuntimeInfo cs_info; @@ -174,6 +244,10 @@ struct RuntimeInfo { return es_info == other.es_info; case Stage::Geometry: return gs_info == other.gs_info; + case Stage::Hull: + return hs_info == other.hs_info; + case Stage::Local: + return ls_info == other.ls_info; default: return true; } diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 2a3bd62f4..5799c4c95 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -31,6 +31,7 @@ struct BufferSpecialization { struct TextureBufferSpecialization { bool is_integer = false; + u32 dst_select = 0; auto operator<=>(const TextureBufferSpecialization&) const = default; }; @@ -38,8 +39,12 @@ struct TextureBufferSpecialization { struct ImageSpecialization { AmdGpu::ImageType type = AmdGpu::ImageType::Color2D; bool is_integer = false; + u32 dst_select = 0; - auto operator<=>(const ImageSpecialization&) const = default; + bool operator==(const ImageSpecialization& other) const { + return type == other.type && is_integer == other.is_integer && + (dst_select != 0 ? dst_select == other.dst_select : true); + } }; struct FMaskSpecialization { @@ -49,6 +54,12 @@ struct FMaskSpecialization { auto operator<=>(const FMaskSpecialization&) const = default; }; +struct SamplerSpecialization { + bool force_unnormalized = false; + + auto operator<=>(const SamplerSpecialization&) const = default; +}; + /** * Alongside runtime information, this structure also checks bound resources * for compatibility. Can be used as a key for storing shader permutations. @@ -67,6 +78,7 @@ struct StageSpecialization { boost::container::small_vector tex_buffers; boost::container::small_vector images; boost::container::small_vector fmasks; + boost::container::small_vector samplers; Backend::Bindings start{}; explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_, @@ -96,17 +108,37 @@ struct StageSpecialization { ForEachSharp(binding, tex_buffers, info->texture_buffers, [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); + spec.dst_select = sharp.DstSelect(); }); ForEachSharp(binding, images, info->images, [](auto& spec, const auto& desc, AmdGpu::Image sharp) { spec.type = sharp.GetBoundType(); spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); + if (desc.is_storage) { + spec.dst_select = sharp.DstSelect(); + } }); ForEachSharp(binding, fmasks, info->fmasks, [](auto& spec, const auto& desc, AmdGpu::Image sharp) { spec.width = sharp.width; spec.height = sharp.height; }); + ForEachSharp(samplers, info->samplers, + [](auto& spec, const auto& desc, AmdGpu::Sampler sharp) { + spec.force_unnormalized = sharp.force_unnormalized; + }); + + // Initialize runtime_info fields that rely on analysis in tessellation passes + if (info->l_stage == LogicalStage::TessellationControl || + info->l_stage == LogicalStage::TessellationEval) { + Shader::TessellationDataConstantBuffer tess_constants; + info->ReadTessConstantBuffer(tess_constants); + if (info->l_stage == LogicalStage::TessellationControl) { + runtime_info.hs_info.InitFromTessConstants(tess_constants); + } else { + runtime_info.vs_info.InitFromTessConstants(tess_constants); + } + } } void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) { @@ -175,6 +207,11 @@ struct StageSpecialization { return false; } } + for (u32 i = 0; i < samplers.size(); i++) { + if (samplers[i] != other.samplers[i]) { + return false; + } + } return true; } }; diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 8db2d63c4..5dd3edd6d 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include + #include "common/assert.h" #include "common/config.h" #include "common/debug.h" @@ -18,7 +20,32 @@ namespace AmdGpu { static const char* dcb_task_name{"DCB_TASK"}; static const char* ccb_task_name{"CCB_TASK"}; -static const char* acb_task_name{"ACB_TASK"}; + +#define MAX_NAMES 56 +static_assert(Liverpool::NumComputeRings <= MAX_NAMES); + +#define NAME_NUM(z, n, name) BOOST_PP_STRINGIZE(name) BOOST_PP_STRINGIZE(n), +#define NAME_ARRAY(name, num) {BOOST_PP_REPEAT(num, NAME_NUM, name)} + +static const char* acb_task_name[] = NAME_ARRAY(ACB_TASK, MAX_NAMES); + +#define YIELD(name) \ + FIBER_EXIT; \ + co_yield {}; \ + FIBER_ENTER(name); + +#define YIELD_CE() YIELD(ccb_task_name) +#define YIELD_GFX() YIELD(dcb_task_name) +#define YIELD_ASC(id) YIELD(acb_task_name[id]) + +#define RESUME(task, name) \ + FIBER_EXIT; \ + task.handle.resume(); \ + FIBER_ENTER(name); + +#define RESUME_CE(task) RESUME(task, ccb_task_name) +#define RESUME_GFX(task) RESUME(task, dcb_task_name) +#define RESUME_ASC(task, id) RESUME(task, acb_task_name[id]) std::array Liverpool::ConstantEngine::constants_heap; @@ -60,7 +87,7 @@ void Liverpool::Process(std::stop_token stoken) { VideoCore::StartCapture(); - int qid = -1; + curr_qid = -1; while (num_submits || num_commands) { @@ -79,9 +106,9 @@ void Liverpool::Process(std::stop_token stoken) { --num_commands; } - qid = (qid + 1) % NumTotalQueues; + curr_qid = (curr_qid + 1) % num_mapped_queues; - auto& queue = mapped_queues[qid]; + auto& queue = mapped_queues[curr_qid]; Task::Handle task{}; { @@ -119,7 +146,7 @@ void Liverpool::Process(std::stop_token stoken) { } Liverpool::Task Liverpool::ProcessCeUpdate(std::span ccb) { - TracyFiberEnter(ccb_task_name); + FIBER_ENTER(ccb_task_name); while (!ccb.empty()) { const auto* header = reinterpret_cast(ccb.data()); @@ -155,9 +182,7 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span ccb) { case PM4ItOpcode::WaitOnDeCounterDiff: { const auto diff = it_body[0]; while ((cblock.de_count - cblock.ce_count) >= diff) { - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(ccb_task_name); + YIELD_CE(); } break; } @@ -165,13 +190,12 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span ccb) { const auto* indirect_buffer = reinterpret_cast(header); auto task = ProcessCeUpdate({indirect_buffer->Address(), indirect_buffer->ib_size}); - while (!task.handle.done()) { - task.handle.resume(); + RESUME_CE(task); - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(ccb_task_name); - }; + while (!task.handle.done()) { + YIELD_CE(); + RESUME_CE(task); + } break; } default: @@ -182,11 +206,11 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span ccb) { ccb = NextPacket(ccb, header->type3.NumWords() + 1); } - TracyFiberLeave; + FIBER_EXIT; } Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span ccb) { - TracyFiberEnter(dcb_task_name); + FIBER_ENTER(dcb_task_name); cblock.Reset(); @@ -197,9 +221,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(dcb.data()); @@ -353,8 +375,18 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); - std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, - (count - 1) * sizeof(u32)); + const auto set_size = (count - 1) * sizeof(u32); + + if (set_data->reg_offset >= 0x200 && + set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) { + ASSERT(set_size <= sizeof(ComputeProgram)); + auto* addr = reinterpret_cast(&mapped_queues[GfxQueueId].cs_state) + + (set_data->reg_offset - 0x200); + std::memcpy(addr, header + 2, set_size); + } else { + std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, + set_size); + } break; } case PM4ItOpcode::SetUconfigReg: { @@ -474,15 +506,16 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); - regs.cs_program.dim_x = dispatch_direct->dim_x; - regs.cs_program.dim_y = dispatch_direct->dim_y; - regs.cs_program.dim_z = dispatch_direct->dim_z; - regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; + auto& cs_program = GetCsRegs(); + cs_program.dim_x = dispatch_direct->dim_x; + cs_program.dim_y = dispatch_direct->dim_y; + cs_program.dim_z = dispatch_direct->dim_z; + cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; if (DebugState.DumpingCurrentReg()) { - DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs, - true); + DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast(header), + cs_program); } - if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { + if (rasterizer && (cs_program.dispatch_initiator & 1)) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:Dispatch", cmd_address)); rasterizer->DispatchDirect(); @@ -493,14 +526,15 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); + auto& cs_program = GetCsRegs(); const auto offset = dispatch_indirect->data_offset; const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr; const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions); if (DebugState.DumpingCurrentReg()) { - DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs, - true); + DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast(header), + cs_program); } - if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { + if (rasterizer && (cs_program.dispatch_initiator & 1)) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin( fmt::format("dcb:{}:DispatchIndirect", cmd_address)); @@ -613,11 +647,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); while (!rewind->Valid()) { - mapped_queues[GfxQueueId].cs_state = regs.cs_program; - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(dcb_task_name); - regs.cs_program = mapped_queues[GfxQueueId].cs_state; + YIELD_GFX(); } break; } @@ -629,15 +659,12 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanAddress(); - if (vo_port->IsVoLabel(wait_addr) && num_submits == 1) { + if (vo_port->IsVoLabel(wait_addr) && + num_submits == mapped_queues[GfxQueueId].submits.size()) { vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); }); } while (!wait_reg_mem->Test()) { - mapped_queues[GfxQueueId].cs_state = regs.cs_program; - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(dcb_task_name); - regs.cs_program = mapped_queues[GfxQueueId].cs_state; + YIELD_GFX(); } break; } @@ -645,13 +672,12 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); auto task = ProcessGraphics( {indirect_buffer->Address(), indirect_buffer->ib_size}, {}); - while (!task.handle.done()) { - task.handle.resume(); + RESUME_GFX(task); - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(dcb_task_name); - }; + while (!task.handle.done()) { + YIELD_GFX(); + RESUME_GFX(task); + } break; } case PM4ItOpcode::IncrementDeCounter: { @@ -660,9 +686,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span dcb, std::span acb, int vqid) { - TracyFiberEnter(acb_task_name); +template +Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { + FIBER_ENTER(acb_task_name[vqid]); + const auto& queue = asc_queues[{vqid}]; auto base_addr = reinterpret_cast(acb.data()); while (!acb.empty()) { @@ -711,15 +737,14 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { } case PM4ItOpcode::IndirectBuffer: { const auto* indirect_buffer = reinterpret_cast(header); - auto task = ProcessCompute( + auto task = ProcessCompute( {indirect_buffer->Address(), indirect_buffer->ib_size}, vqid); - while (!task.handle.done()) { - task.handle.resume(); + RESUME_ASC(task, vqid); - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(acb_task_name); - }; + while (!task.handle.done()) { + YIELD_ASC(vqid); + RESUME_ASC(task, vqid); + } break; } case PM4ItOpcode::DmaData: { @@ -757,30 +782,38 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { case PM4ItOpcode::Rewind: { const PM4CmdRewind* rewind = reinterpret_cast(header); while (!rewind->Valid()) { - mapped_queues[vqid].cs_state = regs.cs_program; - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(acb_task_name); - regs.cs_program = mapped_queues[vqid].cs_state; + YIELD_ASC(vqid); } break; } case PM4ItOpcode::SetShReg: { const auto* set_data = reinterpret_cast(header); - std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, - (count - 1) * sizeof(u32)); + const auto set_size = (count - 1) * sizeof(u32); + + if (set_data->reg_offset >= 0x200 && + set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) { + ASSERT(set_size <= sizeof(ComputeProgram)); + auto* addr = reinterpret_cast(&mapped_queues[vqid + 1].cs_state) + + (set_data->reg_offset - 0x200); + std::memcpy(addr, header + 2, set_size); + } else { + std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, + set_size); + } break; } case PM4ItOpcode::DispatchDirect: { const auto* dispatch_direct = reinterpret_cast(header); - regs.cs_program.dim_x = dispatch_direct->dim_x; - regs.cs_program.dim_y = dispatch_direct->dim_y; - regs.cs_program.dim_z = dispatch_direct->dim_z; - regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; + auto& cs_program = GetCsRegs(); + cs_program.dim_x = dispatch_direct->dim_x; + cs_program.dim_y = dispatch_direct->dim_y; + cs_program.dim_z = dispatch_direct->dim_z; + cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; if (DebugState.DumpingCurrentReg()) { - DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs, true); + DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast(header), + cs_program); } - if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { + if (rasterizer && (cs_program.dispatch_initiator & 1)) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address)); rasterizer->DispatchDirect(); @@ -803,17 +836,13 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { const auto* wait_reg_mem = reinterpret_cast(header); ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); while (!wait_reg_mem->Test()) { - mapped_queues[vqid].cs_state = regs.cs_program; - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(acb_task_name); - regs.cs_program = mapped_queues[vqid].cs_state; + YIELD_ASC(vqid); } break; } case PM4ItOpcode::ReleaseMem: { const auto* release_mem = reinterpret_cast(header); - release_mem->SignalFence(Platform::InterruptId::Compute0RelMem); // <--- + release_mem->SignalFence(static_cast(queue.pipe_id)); break; } default: @@ -821,10 +850,16 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { static_cast(opcode), count); } - acb = NextPacket(acb, header->type3.NumWords() + 1); + const auto packet_size_dw = header->type3.NumWords() + 1; + acb = NextPacket(acb, packet_size_dw); + + if constexpr (!is_indirect) { + *queue.read_addr += packet_size_dw; + *queue.read_addr %= queue.ring_size_dw; + } } - TracyFiberLeave; + FIBER_EXIT; } std::pair, std::span> Liverpool::CopyCmdBuffers( @@ -881,10 +916,11 @@ void Liverpool::SubmitGfx(std::span dcb, std::span ccb) { submit_cv.notify_one(); } -void Liverpool::SubmitAsc(u32 vqid, std::span acb) { - ASSERT_MSG(vqid >= 0 && vqid < NumTotalQueues, "Invalid virtual ASC queue index"); - auto& queue = mapped_queues[vqid]; +void Liverpool::SubmitAsc(u32 gnm_vqid, std::span acb) { + ASSERT_MSG(gnm_vqid > 0 && gnm_vqid < NumTotalQueues, "Invalid virtual ASC queue index"); + auto& queue = mapped_queues[gnm_vqid]; + const auto vqid = gnm_vqid - 1; const auto& task = ProcessCompute(acb, vqid); { std::scoped_lock lock{queue.m_access}; @@ -892,6 +928,7 @@ void Liverpool::SubmitAsc(u32 vqid, std::span acb) { } std::scoped_lock lk{submit_mutex}; + num_mapped_queues = std::max(num_mapped_queues, gnm_vqid + 1); ++num_submits; submit_cv.notify_one(); } diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index ca3b01612..4c74d37d0 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -16,6 +16,7 @@ #include "common/assert.h" #include "common/bit_field.h" #include "common/polyfill_thread.h" +#include "common/slot_vector.h" #include "common/types.h" #include "common/unique_function.h" #include "shader_recompiler/params.h" @@ -45,7 +46,8 @@ struct Liverpool { static constexpr u32 NumGfxRings = 1u; // actually 2, but HP is reserved by system software static constexpr u32 NumComputePipes = 7u; // actually 8, but #7 is reserved by system software static constexpr u32 NumQueuesPerPipe = 8u; - static constexpr u32 NumTotalQueues = NumGfxRings + (NumComputePipes * NumQueuesPerPipe); + static constexpr u32 NumComputeRings = NumComputePipes * NumQueuesPerPipe; + static constexpr u32 NumTotalQueues = NumGfxRings + NumComputeRings; static_assert(NumTotalQueues < 64u); // need to fit into u64 bitmap for ffs static constexpr u32 NumColorBuffers = 8; @@ -143,6 +145,13 @@ struct Liverpool { } }; + struct HsTessFactorClamp { + // I've only seen min=0.0, max=1.0 so far. + // TODO why is max set to 1.0? Makes no sense + float hs_max_tess; + float hs_min_tess; + }; + struct ComputeProgram { u32 dispatch_initiator; u32 dim_x; @@ -431,6 +440,10 @@ struct Liverpool { return u64(z_read_base) << 8; } + u64 StencilAddress() const { + return u64(stencil_read_base) << 8; + } + u32 NumSamples() const { return 1u << z_info.num_samples; // spec doesn't say it is a log2 } @@ -952,6 +965,7 @@ struct Liverpool { enum VgtStages : u32 { Vs = 0u, // always enabled EsGs = 0xB0u, + LsHs = 0x45u, }; VgtStages raw; @@ -959,7 +973,8 @@ struct Liverpool { BitField<2, 1, u32> hs_en; BitField<3, 2, u32> es_en; BitField<5, 1, u32> gs_en; - BitField<6, 1, u32> vs_en; + BitField<6, 2, u32> vs_en; + BitField<8, 1, u32> dynamic_hs; bool IsStageEnabled(u32 stage) const { switch (stage) { @@ -1055,6 +1070,28 @@ struct Liverpool { }; }; + union LsHsConfig { + u32 raw; + BitField<0, 8, u32> num_patches; + BitField<8, 6, u32> hs_input_control_points; + BitField<14, 6, u32> hs_output_control_points; + }; + + union TessellationConfig { + u32 raw; + BitField<0, 2, TessellationType> type; + BitField<2, 3, TessellationPartitioning> partitioning; + BitField<5, 3, TessellationTopology> topology; + }; + + union TessFactorMemoryBase { + u32 base; + + u64 MemoryBase() const { + return static_cast(base) << 8; + } + }; + union Eqaa { u32 raw; BitField<0, 1, u32> max_anchor_samples; @@ -1105,10 +1142,10 @@ struct Liverpool { ShaderProgram es_program; INSERT_PADDING_WORDS(0x2C); ShaderProgram hs_program; - INSERT_PADDING_WORDS(0x2C); + INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20); ShaderProgram ls_program; INSERT_PADDING_WORDS(0xA4); - ComputeProgram cs_program; + ComputeProgram cs_program; // shadowed by `cs_state` in `mapped_queues` INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5); DepthRenderControl depth_render_control; INSERT_PADDING_WORDS(1); @@ -1172,7 +1209,9 @@ struct Liverpool { PolygonControl polygon_control; ViewportControl viewport_control; VsOutputControl vs_output_control; - INSERT_PADDING_WORDS(0xA290 - 0xA207 - 1); + INSERT_PADDING_WORDS(0xA287 - 0xA207 - 1); + HsTessFactorClamp hs_clamp; + INSERT_PADDING_WORDS(0xA290 - 0xA287 - 2); GsMode vgt_gs_mode; INSERT_PADDING_WORDS(1); ModeControl mode_control; @@ -1196,9 +1235,10 @@ struct Liverpool { BitField<0, 11, u32> vgt_gs_max_vert_out; INSERT_PADDING_WORDS(0xA2D5 - 0xA2CE - 1); ShaderStageEnable stage_enable; - INSERT_PADDING_WORDS(1); + LsHsConfig ls_hs_config; u32 vgt_gs_vert_itemsize[4]; - INSERT_PADDING_WORDS(4); + TessellationConfig tess_config; + INSERT_PADDING_WORDS(3); PolygonOffset poly_offset; GsInstances vgt_gs_instance_cnt; StreamOutConfig vgt_strmout_config; @@ -1212,6 +1252,8 @@ struct Liverpool { INSERT_PADDING_WORDS(0xC24C - 0xC243); u32 num_indices; VgtNumInstances num_instances; + INSERT_PADDING_WORDS(0xC250 - 0xC24D - 1); + TessFactorMemoryBase vgt_tf_memory_base; }; std::array reg_array{}; @@ -1258,7 +1300,7 @@ public: ~Liverpool(); void SubmitGfx(std::span dcb, std::span ccb); - void SubmitAsc(u32 vqid, std::span acb); + void SubmitAsc(u32 gnm_vqid, std::span acb); void SubmitDone() noexcept { std::scoped_lock lk{submit_mutex}; @@ -1301,6 +1343,18 @@ public: gfx_queue.dcb_buffer.reserve(GfxReservedSize); } + inline ComputeProgram& GetCsRegs() { + return mapped_queues[curr_qid].cs_state; + } + + struct AscQueueInfo { + VAddr map_addr; + u32* read_addr; + u32 ring_size_dw; + u32 pipe_id; + }; + Common::SlotVector asc_queues{}; + private: struct Task { struct promise_type { @@ -1338,7 +1392,8 @@ private: std::span ccb); Task ProcessGraphics(std::span dcb, std::span ccb); Task ProcessCeUpdate(std::span ccb); - Task ProcessCompute(std::span acb, int vqid); + template + Task ProcessCompute(std::span acb, u32 vqid); void Process(std::stop_token stoken); @@ -1353,6 +1408,7 @@ private: VAddr indirect_args_addr{}; }; std::array mapped_queues{}; + u32 num_mapped_queues{1u}; // GFX is always available struct ConstantEngine { void Reset() { @@ -1381,6 +1437,7 @@ private: std::mutex submit_mutex; std::condition_variable_any submit_cv; std::queue> command_queue{}; + int curr_qid{-1}; }; static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); @@ -1427,6 +1484,7 @@ static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202); static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204); static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206); static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207); +static_assert(GFX6_3D_REG_INDEX(hs_clamp) == 0xA287); static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290); static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292); static_assert(GFX6_3D_REG_INDEX(vgt_gs_out_prim_type) == 0xA29B); @@ -1441,6 +1499,7 @@ static_assert(GFX6_3D_REG_INDEX(vgt_gsvs_ring_itemsize) == 0xA2AC); static_assert(GFX6_3D_REG_INDEX(vgt_gs_max_vert_out) == 0xA2CE); static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5); static_assert(GFX6_3D_REG_INDEX(vgt_gs_vert_itemsize[0]) == 0xA2D7); +static_assert(GFX6_3D_REG_INDEX(tess_config) == 0xA2DB); static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF); static_assert(GFX6_3D_REG_INDEX(vgt_gs_instance_cnt) == 0xA2E4); static_assert(GFX6_3D_REG_INDEX(vgt_strmout_config) == 0xA2E5); @@ -1452,6 +1511,7 @@ static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A); static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381); static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242); static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D); +static_assert(GFX6_3D_REG_INDEX(vgt_tf_memory_base) == 0xc250); #undef GFX6_3D_REG_INDEX diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index ba87425f2..5d7417559 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -52,6 +52,10 @@ struct Buffer { return std::memcmp(this, &other, sizeof(Buffer)) == 0; } + u32 DstSelect() const { + return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9); + } + CompSwizzle GetSwizzle(u32 comp) const noexcept { const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w}; return static_cast(select[comp]); @@ -204,6 +208,11 @@ struct Image { return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9); } + CompSwizzle GetSwizzle(u32 comp) const noexcept { + const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w}; + return static_cast(select[comp]); + } + static char SelectComp(u32 sel) { switch (sel) { case 0: diff --git a/src/video_core/amdgpu/types.h b/src/video_core/amdgpu/types.h index 6b95ed910..fa8491665 100644 --- a/src/video_core/amdgpu/types.h +++ b/src/video_core/amdgpu/types.h @@ -3,6 +3,8 @@ #pragma once +#include +#include #include "common/types.h" namespace AmdGpu { @@ -21,6 +23,69 @@ enum class FpDenormMode : u32 { InOutAllow = 3, }; +enum class TessellationType : u32 { + Isoline = 0, + Triangle = 1, + Quad = 2, +}; + +constexpr std::string_view NameOf(TessellationType type) { + switch (type) { + case TessellationType::Isoline: + return "Isoline"; + case TessellationType::Triangle: + return "Triangle"; + case TessellationType::Quad: + return "Quad"; + default: + return "Unknown"; + } +} + +enum class TessellationPartitioning : u32 { + Integer = 0, + Pow2 = 1, + FracOdd = 2, + FracEven = 3, +}; + +constexpr std::string_view NameOf(TessellationPartitioning partitioning) { + switch (partitioning) { + case TessellationPartitioning::Integer: + return "Integer"; + case TessellationPartitioning::Pow2: + return "Pow2"; + case TessellationPartitioning::FracOdd: + return "FracOdd"; + case TessellationPartitioning::FracEven: + return "FracEven"; + default: + return "Unknown"; + } +} + +enum class TessellationTopology : u32 { + Point = 0, + Line = 1, + TriangleCw = 2, + TriangleCcw = 3, +}; + +constexpr std::string_view NameOf(TessellationTopology topology) { + switch (topology) { + case TessellationTopology::Point: + return "Point"; + case TessellationTopology::Line: + return "Line"; + case TessellationTopology::TriangleCw: + return "TriangleCw"; + case TessellationTopology::TriangleCcw: + return "TriangleCcw"; + default: + return "Unknown"; + } +} + // See `VGT_PRIMITIVE_TYPE` description in [Radeon Sea Islands 3D/Compute Register Reference Guide] enum class PrimitiveType : u32 { None = 0, @@ -118,3 +183,33 @@ enum class NumberFormat : u32 { }; } // namespace AmdGpu + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(AmdGpu::TessellationType type, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type)); + } +}; + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(AmdGpu::TessellationPartitioning type, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type)); + } +}; + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(AmdGpu::TessellationTopology type, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type)); + } +}; diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index f0f7d352c..ec0bb3bb7 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -65,6 +65,33 @@ vk::CompareOp CompareOp(Liverpool::CompareFunc func) { } } +bool IsPrimitiveCulled(AmdGpu::PrimitiveType type) { + switch (type) { + case AmdGpu::PrimitiveType::TriangleList: + case AmdGpu::PrimitiveType::TriangleFan: + case AmdGpu::PrimitiveType::TriangleStrip: + case AmdGpu::PrimitiveType::PatchPrimitive: + case AmdGpu::PrimitiveType::AdjTriangleList: + case AmdGpu::PrimitiveType::AdjTriangleStrip: + case AmdGpu::PrimitiveType::QuadList: + case AmdGpu::PrimitiveType::QuadStrip: + case AmdGpu::PrimitiveType::Polygon: + return true; + case AmdGpu::PrimitiveType::None: + case AmdGpu::PrimitiveType::PointList: + case AmdGpu::PrimitiveType::LineList: + case AmdGpu::PrimitiveType::LineStrip: + case AmdGpu::PrimitiveType::AdjLineList: + case AmdGpu::PrimitiveType::AdjLineStrip: + case AmdGpu::PrimitiveType::RectList: // Screen-aligned rectangles that are not culled + case AmdGpu::PrimitiveType::LineLoop: + return false; + default: + UNREACHABLE(); + return true; + } +} + vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type) { switch (type) { case AmdGpu::PrimitiveType::PointList: @@ -672,15 +699,6 @@ vk::Format AdjustColorBufferFormat(vk::Format base_format, default: break; } - } else if (comp_swap_reverse) { - switch (base_format) { - case vk::Format::eR8G8B8A8Unorm: - return vk::Format::eA8B8G8R8UnormPack32; - case vk::Format::eR8G8B8A8Srgb: - return vk::Format::eA8B8G8R8SrgbPack32; - default: - break; - } } return base_format; } diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index 287ba691e..ebd09f0ee 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -18,6 +18,8 @@ vk::StencilOp StencilOp(Liverpool::StencilFunc op); vk::CompareOp CompareOp(Liverpool::CompareFunc func); +bool IsPrimitiveCulled(AmdGpu::PrimitiveType type); + vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type); vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode); diff --git a/src/video_core/renderer_vulkan/vk_common.h b/src/video_core/renderer_vulkan/vk_common.h index 9178aeb65..5fe199e0e 100644 --- a/src/video_core/renderer_vulkan/vk_common.h +++ b/src/video_core/renderer_vulkan/vk_common.h @@ -3,10 +3,6 @@ #pragma once -#if defined(__APPLE__) && !USE_SYSTEM_VULKAN_LOADER -#define VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL 0 -#endif - // Include vulkan-hpp header #define VK_ENABLE_BETA_EXTENSIONS #define VK_NO_PROTOTYPES diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 8d495ab06..a39b18378 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -16,7 +16,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler ComputePipelineKey compute_key_, const Shader::Info& info_, vk::ShaderModule module) : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache, true}, compute_key{compute_key_} { - auto& info = stages[int(Shader::Stage::Compute)]; + auto& info = stages[int(Shader::LogicalStage::Compute)]; info = &info_; const vk::PipelineShaderStageCreateInfo shader_ci = { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d53204c77..222ffb5a9 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -8,6 +8,7 @@ #include "common/assert.h" #include "common/scope_exit.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" @@ -52,7 +53,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul boost::container::static_vector vertex_bindings; boost::container::static_vector vertex_attributes; if (fetch_shader && !instance.IsVertexInputDynamicState()) { - const auto& vs_info = GetStage(Shader::Stage::Vertex); + const auto& vs_info = GetStage(Shader::LogicalStage::Vertex); for (const auto& attrib : fetch_shader->attributes) { if (attrib.UsesStepRates()) { // Skip attribute binding as the data will be pulled by shader @@ -106,11 +107,17 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul key.primitive_restart_index == 0xFFFFFFFF, "Primitive restart index other than -1 is not supported yet"); + const vk::PipelineTessellationStateCreateInfo tessellation_state = { + .patchControlPoints = key.patch_control_points, + }; + const vk::PipelineRasterizationStateCreateInfo raster_state = { .depthClampEnable = false, .rasterizerDiscardEnable = false, .polygonMode = LiverpoolToVK::PolygonMode(key.polygon_mode), - .cullMode = LiverpoolToVK::CullMode(key.cull_mode), + .cullMode = LiverpoolToVK::IsPrimitiveCulled(key.prim_type) + ? LiverpoolToVK::CullMode(key.cull_mode) + : vk::CullModeFlagBits::eNone, .frontFace = key.front_face == Liverpool::FrontFace::Clockwise ? vk::FrontFace::eClockwise : vk::FrontFace::eCounterClockwise, @@ -202,7 +209,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul boost::container::static_vector shader_stages; - auto stage = u32(Shader::Stage::Vertex); + auto stage = u32(Shader::LogicalStage::Vertex); if (infos[stage]) { shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eVertex, @@ -210,7 +217,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pName = "main", }); } - stage = u32(Shader::Stage::Geometry); + stage = u32(Shader::LogicalStage::Geometry); if (infos[stage]) { shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eGeometry, @@ -218,7 +225,23 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pName = "main", }); } - stage = u32(Shader::Stage::Fragment); + stage = u32(Shader::LogicalStage::TessellationControl); + if (infos[stage]) { + shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eTessellationControl, + .module = modules[stage], + .pName = "main", + }); + } + stage = u32(Shader::LogicalStage::TessellationEval); + if (infos[stage]) { + shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eTessellationEvaluation, + .module = modules[stage], + .pName = "main", + }); + } + stage = u32(Shader::LogicalStage::Fragment); if (infos[stage]) { shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eFragment, @@ -227,17 +250,15 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul }); } - const auto it = std::ranges::find(key.color_formats, vk::Format::eUndefined); - const u32 num_color_formats = std::distance(key.color_formats.begin(), it); const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = { - .colorAttachmentCount = num_color_formats, + .colorAttachmentCount = key.num_color_attachments, .pColorAttachmentFormats = key.color_formats.data(), .depthAttachmentFormat = key.depth_format, .stencilAttachmentFormat = key.stencil_format, }; std::array attachments; - for (u32 i = 0; i < num_color_formats; i++) { + for (u32 i = 0; i < key.num_color_attachments; i++) { const auto& control = key.blend_controls[i]; const auto src_color = LiverpoolToVK::BlendFactor(control.color_src_factor); const auto dst_color = LiverpoolToVK::BlendFactor(control.color_dst_factor); @@ -290,7 +311,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul const vk::PipelineColorBlendStateCreateInfo color_blending = { .logicOpEnable = false, .logicOp = vk::LogicOp::eCopy, - .attachmentCount = num_color_formats, + .attachmentCount = key.num_color_attachments, .pAttachments = attachments.data(), .blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f}, }; @@ -301,6 +322,8 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pStages = shader_stages.data(), .pVertexInputState = !instance.IsVertexInputDynamicState() ? &vertex_input_info : nullptr, .pInputAssemblyState = &input_assembly, + .pTessellationState = + stages[u32(Shader::LogicalStage::TessellationControl)] ? &tessellation_state : nullptr, .pViewportState = &viewport_info, .pRasterizationState = &raster_state, .pMultisampleState = &multisampling, @@ -327,7 +350,6 @@ void GraphicsPipeline::BuildDescSetLayout() { if (!stage) { continue; } - if (stage->has_readconst) { bindings.push_back({ .binding = binding++, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 2834fceb7..444c8517e 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -29,6 +29,7 @@ using Liverpool = AmdGpu::Liverpool; struct GraphicsPipelineKey { std::array stage_hashes; + u32 num_color_attachments; std::array color_formats; std::array color_num_formats; std::array mrt_swizzles; @@ -51,6 +52,7 @@ struct GraphicsPipelineKey { std::array blend_controls; std::array write_masks; std::array vertex_buffer_formats; + u32 patch_control_points; bool operator==(const GraphicsPipelineKey& key) const noexcept { return std::memcmp(this, &key, sizeof(key)) == 0; @@ -72,7 +74,7 @@ public: bool IsEmbeddedVs() const noexcept { static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f; - return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash; + return key.stage_hashes[u32(Shader::LogicalStage::Vertex)] == EmbeddedVsHash; } auto GetWriteMasks() const { diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 81784eb60..790e76400 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -9,6 +9,7 @@ #include "common/assert.h" #include "common/config.h" +#include "common/debug.h" #include "sdl_window.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -68,11 +69,10 @@ std::unordered_map GetFormatProperties( } // Other miscellaneous formats, e.g. for color buffers, swizzles, or compatibility static constexpr std::array misc_formats = { - vk::Format::eA2R10G10B10UnormPack32, vk::Format::eA8B8G8R8UnormPack32, - vk::Format::eA8B8G8R8SrgbPack32, vk::Format::eB8G8R8A8Unorm, - vk::Format::eB8G8R8A8Snorm, vk::Format::eB8G8R8A8Uint, - vk::Format::eB8G8R8A8Sint, vk::Format::eB8G8R8A8Srgb, - vk::Format::eR5G6B5UnormPack16, vk::Format::eD24UnormS8Uint, + vk::Format::eA2R10G10B10UnormPack32, + vk::Format::eB8G8R8A8Unorm, + vk::Format::eB8G8R8A8Srgb, + vk::Format::eD24UnormS8Uint, }; for (const auto& format : misc_formats) { if (!format_properties.contains(format)) { @@ -262,11 +262,13 @@ bool Instance::CreateDevice() { // The next two extensions are required to be available together in order to support write masks color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME); color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); - const bool calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME); + const bool calibrated_timestamps = + TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false; const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME); maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME); legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME); + image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME); // These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2 // with extensions. @@ -327,6 +329,7 @@ bool Instance::CreateDevice() { .imageCubeArray = features.imageCubeArray, .independentBlend = features.independentBlend, .geometryShader = features.geometryShader, + .tessellationShader = features.tessellationShader, .logicOp = features.logicOp, .depthBiasClamp = features.depthBiasClamp, .fillModeNonSolid = features.fillModeNonSolid, @@ -580,42 +583,22 @@ bool Instance::IsFormatSupported(const vk::Format format, return (GetFormatFeatureFlags(format) & flags) == flags; } -static vk::Format GetAlternativeFormat(const vk::Format format) { - switch (format) { - case vk::Format::eB5G6R5UnormPack16: - return vk::Format::eR5G6B5UnormPack16; - case vk::Format::eD16UnormS8Uint: - return vk::Format::eD24UnormS8Uint; - default: - return format; - } -} - vk::Format Instance::GetSupportedFormat(const vk::Format format, const vk::FormatFeatureFlags2 flags) const { - if (IsFormatSupported(format, flags)) [[likely]] { - return format; - } - const vk::Format alternative = GetAlternativeFormat(format); - if (IsFormatSupported(alternative, flags)) [[likely]] { - return alternative; + if (!IsFormatSupported(format, flags)) [[unlikely]] { + switch (format) { + case vk::Format::eD16UnormS8Uint: + if (IsFormatSupported(vk::Format::eD24UnormS8Uint, flags)) { + return vk::Format::eD24UnormS8Uint; + } + if (IsFormatSupported(vk::Format::eD32SfloatS8Uint, flags)) { + return vk::Format::eD32SfloatS8Uint; + } + default: + break; + } } return format; } -vk::ComponentMapping Instance::GetSupportedComponentSwizzle( - const vk::Format format, const vk::ComponentMapping swizzle, - const vk::FormatFeatureFlags2 flags) const { - if (IsFormatSupported(format, flags)) [[likely]] { - return swizzle; - } - - vk::ComponentMapping supported_swizzle = swizzle; - if (format == vk::Format::eB5G6R5UnormPack16) { - // B5G6R5 -> R5G6B5 - std::swap(supported_swizzle.r, supported_swizzle.b); - } - return supported_swizzle; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 81303c9cc..54a9b9873 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -33,10 +33,6 @@ public: [[nodiscard]] vk::Format GetSupportedFormat(vk::Format format, vk::FormatFeatureFlags2 flags) const; - /// Re-orders a component swizzle for format compatibility, if needed. - [[nodiscard]] vk::ComponentMapping GetSupportedComponentSwizzle( - vk::Format format, vk::ComponentMapping swizzle, vk::FormatFeatureFlags2 flags) const; - /// Returns the Vulkan instance vk::Instance GetInstance() const { return *instance; @@ -158,6 +154,11 @@ public: return legacy_vertex_attributes; } + /// Returns true when VK_AMD_shader_image_load_store_lod is supported. + bool IsImageLoadStoreLodSupported() const { + return image_load_store_lod; + } + /// Returns true when geometry shaders are supported by the device bool IsGeometryStageSupported() const { return features.geometryShader; @@ -327,6 +328,7 @@ private: bool maintenance5{}; bool list_restart{}; bool legacy_vertex_attributes{}; + bool image_load_store_lod{}; u64 min_imported_host_pointer_alignment{}; u32 subgroup_size{}; bool tooling_info{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b9f318f7a..4b88bd374 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -22,6 +22,8 @@ extern std::unique_ptr presenter; namespace Vulkan { +using Shader::LogicalStage; +using Shader::Stage; using Shader::VsOutput; constexpr static std::array DescriptorHeapSizes = { @@ -78,7 +80,7 @@ void GatherVertexOutputs(Shader::VertexRuntimeInfo& info, : (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None)); } -Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { +Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_stage) { auto info = Shader::RuntimeInfo{stage}; const auto& regs = liverpool->regs; const auto BuildCommon = [&](const auto& program) { @@ -89,20 +91,47 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { info.fp_round_mode32 = program.settings.fp_round_mode32; }; switch (stage) { - case Shader::Stage::Export: { + case Stage::Local: { + BuildCommon(regs.ls_program); + if (regs.stage_enable.IsStageEnabled(static_cast(Stage::Hull))) { + info.ls_info.links_with_tcs = true; + Shader::TessellationDataConstantBuffer tess_constants; + const auto* pgm = regs.ProgramForStage(static_cast(Stage::Hull)); + const auto params = Liverpool::GetParams(*pgm); + const auto& hull_info = program_cache.at(params.hash)->info; + hull_info.ReadTessConstantBuffer(tess_constants); + info.ls_info.ls_stride = tess_constants.ls_stride; + } + break; + } + case Stage::Hull: { + BuildCommon(regs.hs_program); + info.hs_info.num_input_control_points = regs.ls_hs_config.hs_input_control_points.Value(); + info.hs_info.num_threads = regs.ls_hs_config.hs_output_control_points.Value(); + info.hs_info.tess_type = regs.tess_config.type; + + // We need to initialize most hs_info fields after finding the V# with tess constants + break; + } + case Stage::Export: { BuildCommon(regs.es_program); info.es_info.vertex_data_size = regs.vgt_esgs_ring_itemsize; break; } - case Shader::Stage::Vertex: { + case Stage::Vertex: { BuildCommon(regs.vs_program); GatherVertexOutputs(info.vs_info, regs.vs_output_control); info.vs_info.emulate_depth_negative_one_to_one = !instance.IsDepthClipControlSupported() && regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW; + if (l_stage == LogicalStage::TessellationEval) { + info.vs_info.tess_type = regs.tess_config.type; + info.vs_info.tess_topology = regs.tess_config.topology; + info.vs_info.tess_partitioning = regs.tess_config.partitioning; + } break; } - case Shader::Stage::Geometry: { + case Stage::Geometry: { BuildCommon(regs.gs_program); auto& gs_info = info.gs_info; gs_info.output_vertices = regs.vgt_gs_max_vert_out; @@ -121,7 +150,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { DumpShader(gs_info.vs_copy, gs_info.vs_copy_hash, Shader::Stage::Vertex, 0, "copy.bin"); break; } - case Shader::Stage::Fragment: { + case Stage::Fragment: { BuildCommon(regs.ps_program); info.fs_info.en_flags = regs.ps_input_ena; info.fs_info.addr_flags = regs.ps_input_addr; @@ -143,10 +172,10 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { } break; } - case Shader::Stage::Compute: { - const auto& cs_pgm = regs.cs_program; + case Stage::Compute: { + const auto& cs_pgm = liverpool->GetCsRegs(); info.num_user_data = cs_pgm.settings.num_user_regs; - info.num_allocated_vgprs = regs.cs_program.settings.num_vgprs * 4; + info.num_allocated_vgprs = cs_pgm.settings.num_vgprs * 4; info.cs_info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full, cs_pgm.num_thread_z.full}; info.cs_info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1), @@ -172,8 +201,10 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32), .support_explicit_workgroup_layout = true, .support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(), + .supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(), .needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() && instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, + .needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, }; auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({}); ASSERT_MSG(cache_result == vk::Result::eSuccess, "Failed to create pipeline cache: {}", @@ -268,6 +299,7 @@ bool PipelineCache::RefreshGraphicsKey() { // `RenderingInfo` is assumed to be initialized with a contiguous array of valid color // attachments. This might be not a case as HW color buffers can be bound in an arbitrary // order. We need to do some arrays compaction at this stage + key.num_color_attachments = 0; key.color_formats.fill(vk::Format::eUndefined); key.color_num_formats.fill(AmdGpu::NumberFormat::Unorm); key.blend_controls.fill({}); @@ -275,13 +307,26 @@ bool PipelineCache::RefreshGraphicsKey() { key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard); key.vertex_buffer_formats.fill(vk::Format::eUndefined); + key.patch_control_points = 0; + if (regs.stage_enable.hs_en.Value()) { + key.patch_control_points = regs.ls_hs_config.hs_input_control_points.Value(); + } + // First pass of bindings check to idenitfy formats and swizzles and pass them to rhe shader // recompiler. - for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { + for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { auto const& col_buf = regs.color_buffers[cb]; - if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb)) { + if (skip_cb_binding || !col_buf) { + // No attachment bound and no incremented index. continue; } + + const auto remapped_cb = key.num_color_attachments++; + if (!regs.color_target_mask.GetMask(cb)) { + // Bound to null handle, skip over this attachment index. + continue; + } + const auto base_format = LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); key.color_formats[remapped_cb] = @@ -290,14 +335,12 @@ bool PipelineCache::RefreshGraphicsKey() { if (base_format == key.color_formats[remapped_cb]) { key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value(); } - - ++remapped_cb; } fetch_shader = std::nullopt; Shader::Backend::Bindings binding{}; - const auto& TryBindStageRemap = [&](Shader::Stage stage_in, Shader::Stage stage_out) -> bool { + const auto& TryBindStage = [&](Shader::Stage stage_in, Shader::LogicalStage stage_out) -> bool { const auto stage_in_idx = static_cast(stage_in); const auto stage_out_idx = static_cast(stage_out); if (!regs.stage_enable.IsStageEnabled(stage_in_idx)) { @@ -324,23 +367,23 @@ bool PipelineCache::RefreshGraphicsKey() { auto params = Liverpool::GetParams(*pgm); std::optional fetch_shader_; std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_, - key.stage_hashes[stage_out_idx]) = GetProgram(stage_in, params, binding); + key.stage_hashes[stage_out_idx]) = + GetProgram(stage_in, stage_out, params, binding); if (fetch_shader_) { fetch_shader = fetch_shader_; } return true; }; - const auto& TryBindStage = [&](Shader::Stage stage) { return TryBindStageRemap(stage, stage); }; - const auto& IsGsFeaturesSupported = [&]() -> bool { // These checks are temporary until all functionality is implemented. return !regs.vgt_gs_mode.onchip && !regs.vgt_strmout_config.raw; }; - TryBindStage(Shader::Stage::Fragment); + infos.fill(nullptr); + TryBindStage(Stage::Fragment, LogicalStage::Fragment); - const auto* fs_info = infos[static_cast(Shader::Stage::Fragment)]; + const auto* fs_info = infos[static_cast(LogicalStage::Fragment)]; key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u; switch (regs.stage_enable.raw) { @@ -348,22 +391,36 @@ bool PipelineCache::RefreshGraphicsKey() { if (!instance.IsGeometryStageSupported() || !IsGsFeaturesSupported()) { return false; } - if (!TryBindStageRemap(Shader::Stage::Export, Shader::Stage::Vertex)) { + if (!TryBindStage(Stage::Export, LogicalStage::Vertex)) { return false; } - if (!TryBindStage(Shader::Stage::Geometry)) { + if (!TryBindStage(Stage::Geometry, LogicalStage::Geometry)) { + return false; + } + break; + } + case Liverpool::ShaderStageEnable::VgtStages::LsHs: { + if (!instance.IsTessellationSupported()) { + break; + } + if (!TryBindStage(Stage::Hull, LogicalStage::TessellationControl)) { + return false; + } + if (!TryBindStage(Stage::Vertex, LogicalStage::TessellationEval)) { + return false; + } + if (!TryBindStage(Stage::Local, LogicalStage::Vertex)) { return false; } break; } default: { - TryBindStage(Shader::Stage::Vertex); - infos[static_cast(Shader::Stage::Geometry)] = nullptr; + TryBindStage(Stage::Vertex, LogicalStage::Vertex); break; } } - const auto vs_info = infos[static_cast(Shader::Stage::Vertex)]; + const auto vs_info = infos[static_cast(Shader::LogicalStage::Vertex)]; if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) { u32 vertex_binding = 0; for (const auto& attrib : fetch_shader->attributes) { @@ -385,10 +442,18 @@ bool PipelineCache::RefreshGraphicsKey() { // Second pass to fill remain CB pipeline key data for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { auto const& col_buf = regs.color_buffers[cb]; - if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb) || - (key.mrt_mask & (1u << cb)) == 0) { - key.color_formats[cb] = vk::Format::eUndefined; - key.mrt_swizzles[cb] = Liverpool::ColorBuffer::SwapMode::Standard; + if (skip_cb_binding || !col_buf) { + // No attachment bound and no incremented index. + continue; + } + + if (!regs.color_target_mask.GetMask(cb) || (key.mrt_mask & (1u << cb)) == 0) { + // Attachment is masked out by either color_target_mask or shader mrt_mask. In the case + // of the latter we need to change format to undefined, and either way we need to + // increment the index for the null attachment binding. + key.color_formats[remapped_cb] = vk::Format::eUndefined; + key.mrt_swizzles[remapped_cb] = Liverpool::ColorBuffer::SwapMode::Standard; + ++remapped_cb; continue; } @@ -397,10 +462,9 @@ bool PipelineCache::RefreshGraphicsKey() { !col_buf.info.blend_bypass); key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)}; key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb)); + ++remapped_cb; num_samples = std::max(num_samples, 1u << col_buf.attrib.num_samples_log2); - - ++remapped_cb; } // It seems that the number of samples > 1 set in the AA config doesn't mean we're always @@ -409,19 +473,18 @@ bool PipelineCache::RefreshGraphicsKey() { key.num_samples = num_samples; return true; -} +} // namespace Vulkan bool PipelineCache::RefreshComputeKey() { Shader::Backend::Bindings binding{}; - const auto* cs_pgm = &liverpool->regs.cs_program; - const auto cs_params = Liverpool::GetParams(*cs_pgm); + const auto& cs_pgm = liverpool->GetCsRegs(); + const auto cs_params = Liverpool::GetParams(cs_pgm); std::tie(infos[0], modules[0], fetch_shader, compute_key.value) = - GetProgram(Shader::Stage::Compute, cs_params, binding); + GetProgram(Shader::Stage::Compute, LogicalStage::Compute, cs_params, binding); return true; } -vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, - const Shader::RuntimeInfo& runtime_info, +vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info, std::span code, size_t perm_idx, Shader::Backend::Bindings& binding) { LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash, @@ -446,19 +509,19 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, const auto name = fmt::format("{}_{:#018x}_{}", info.stage, info.pgm_hash, perm_idx); Vulkan::SetObjectName(instance.GetDevice(), module, name); if (Config::collectShadersForDebug()) { - DebugState.CollectShader(name, module, spv, code, patch ? *patch : std::span{}, - is_patched); + DebugState.CollectShader(name, info.l_stage, module, spv, code, + patch ? *patch : std::span{}, is_patched); } return module; } -std::tuple, u64> -PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params, - Shader::Backend::Bindings& binding) { - const auto runtime_info = BuildRuntimeInfo(stage); +PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stage, + Shader::ShaderParams params, + Shader::Backend::Bindings& binding) { + auto runtime_info = BuildRuntimeInfo(stage, l_stage); auto [it_pgm, new_program] = program_cache.try_emplace(params.hash); if (new_program) { - it_pgm.value() = std::make_unique(stage, params); + it_pgm.value() = std::make_unique(stage, l_stage, params); auto& program = it_pgm.value(); auto start = binding; const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding); @@ -467,6 +530,7 @@ PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params, return std::make_tuple(&program->info, module, spec.fetch_shader_data, HashCombine(params.hash, 0)); } + it_pgm.value()->info.user_data = params.user_data; auto& program = it_pgm.value(); auto& info = program->info; @@ -477,7 +541,7 @@ PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params, const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec); if (it == program->modules.end()) { - auto new_info = Shader::Info(stage, params); + auto new_info = Shader::Info(stage, l_stage, params); module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding); program->AddPermut(module, std::move(spec)); } else { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index c5c2fc98e..ec4406448 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -34,11 +34,13 @@ struct Program { vk::ShaderModule module; Shader::StageSpecialization spec; }; + using ModuleList = boost::container::small_vector; Shader::Info info; - boost::container::small_vector modules; + ModuleList modules; - explicit Program(Shader::Stage stage, Shader::ShaderParams params) : info{stage, params} {} + explicit Program(Shader::Stage stage, Shader::LogicalStage l_stage, Shader::ShaderParams params) + : info{stage, l_stage, params} {} void AddPermut(vk::ShaderModule module, const Shader::StageSpecialization&& spec) { modules.emplace_back(module, std::move(spec)); @@ -55,10 +57,10 @@ public: const ComputePipeline* GetComputePipeline(); - std::tuple, - u64> - GetProgram(Shader::Stage stage, Shader::ShaderParams params, - Shader::Backend::Bindings& binding); + using Result = std::tuple, u64>; + Result GetProgram(Shader::Stage stage, Shader::LogicalStage l_stage, + Shader::ShaderParams params, Shader::Backend::Bindings& binding); std::optional ReplaceShader(vk::ShaderModule module, std::span spv_code); @@ -71,10 +73,10 @@ private: std::string_view ext); std::optional> GetShaderPatch(u64 hash, Shader::Stage stage, size_t perm_idx, std::string_view ext); - vk::ShaderModule CompileModule(Shader::Info& info, const Shader::RuntimeInfo& runtime_info, + vk::ShaderModule CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info, std::span code, size_t perm_idx, Shader::Backend::Bindings& binding); - Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage); + Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage, Shader::LogicalStage l_stage); private: const Instance& instance; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.h b/src/video_core/renderer_vulkan/vk_pipeline_common.h index 8c48c83f7..1b13a1797 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_common.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.h @@ -14,9 +14,10 @@ class BufferCache; namespace Vulkan { -static constexpr auto gp_stage_flags = vk::ShaderStageFlagBits::eVertex | - vk::ShaderStageFlagBits::eGeometry | - vk::ShaderStageFlagBits::eFragment; +static constexpr auto gp_stage_flags = + vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eTessellationControl | + vk::ShaderStageFlagBits::eTessellationEvaluation | vk::ShaderStageFlagBits::eGeometry | + vk::ShaderStageFlagBits::eFragment; class Instance; class Scheduler; @@ -37,6 +38,7 @@ public: } auto GetStages() const { + static_assert(static_cast(Shader::LogicalStage::Compute) == Shader::MaxStageTypes - 1); if (is_compute) { return std::span{stages.cend() - 1, stages.cend()}; } else { @@ -44,7 +46,7 @@ public: } } - const Shader::Info& GetStage(Shader::Stage stage) const noexcept { + const Shader::Info& GetStage(Shader::LogicalStage stage) const noexcept { return *stages[u32(stage)]; } diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index 2e717397b..dbdabe0d9 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -14,6 +14,7 @@ #endif #include +#include #include "common/assert.h" #include "common/config.h" #include "common/logging/log.h" @@ -21,15 +22,6 @@ #include "sdl_window.h" #include "video_core/renderer_vulkan/vk_platform.h" -#if VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL -static vk::detail::DynamicLoader dl; -#else -extern "C" { -VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr(VkInstance instance, - const char* pName); -} -#endif - namespace Vulkan { static const char* const VALIDATION_LAYER_NAME = "VK_LAYER_KHRONOS_validation"; @@ -44,6 +36,7 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL DebugUtilsCallback( case 0xc81ad50e: case 0xb7c39078: case 0x32868fde: // vkCreateBufferView(): pCreateInfo->range does not equal VK_WHOLE_SIZE + case 0x1012616b: // `VK_FORMAT_UNDEFINED` does not match fragment shader output type return VK_FALSE; default: break; @@ -198,15 +191,57 @@ std::vector GetInstanceExtensions(Frontend::WindowSystemType window return extensions; } +std::vector GetInstanceLayers(bool enable_validation, bool enable_crash_diagnostic) { + const auto [properties_result, properties] = vk::enumerateInstanceLayerProperties(); + if (properties_result != vk::Result::eSuccess || properties.empty()) { + LOG_ERROR(Render_Vulkan, "Failed to query layer properties: {}", + vk::to_string(properties_result)); + return {}; + } + + std::vector layers; + layers.reserve(2); + + if (enable_validation) { + layers.push_back(VALIDATION_LAYER_NAME); + } + if (enable_crash_diagnostic) { + layers.push_back(CRASH_DIAGNOSTIC_LAYER_NAME); + } + + // Sanitize layer list + std::erase_if(layers, [&](const char* layer) -> bool { + const auto it = std::ranges::find_if(properties, [layer](const auto& prop) { + return std::strcmp(layer, prop.layerName) == 0; + }); + if (it == properties.end()) { + LOG_ERROR(Render_Vulkan, "Requested layer {} is not available", layer); + return true; + } + return false; + }); + + return layers; +} + vk::UniqueInstance CreateInstance(Frontend::WindowSystemType window_type, bool enable_validation, bool enable_crash_diagnostic) { LOG_INFO(Render_Vulkan, "Creating vulkan instance"); -#if VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL - auto vkGetInstanceProcAddr = - dl.getProcAddress("vkGetInstanceProcAddr"); +#ifdef __APPLE__ + // If the Vulkan loader exists in /usr/local/lib, give it priority. The Vulkan SDK + // installs it here by default but it is not in the default library search path. + // The loader has a clause to check for it, but at a lower priority than the bundled + // libMoltenVK.dylib, so we need to handle it ourselves to give it priority. + static const std::string usr_local_path = "/usr/local/lib/libvulkan.dylib"; + static vk::detail::DynamicLoader dl = std::filesystem::exists(usr_local_path) + ? vk::detail::DynamicLoader(usr_local_path) + : vk::detail::DynamicLoader(); +#else + static vk::detail::DynamicLoader dl; #endif - VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr); + VULKAN_HPP_DEFAULT_DISPATCHER.init( + dl.getProcAddress("vkGetInstanceProcAddr")); const auto [available_version_result, available_version] = VULKAN_HPP_DEFAULT_DISPATCHER.vkEnumerateInstanceVersion @@ -229,38 +264,25 @@ vk::UniqueInstance CreateInstance(Frontend::WindowSystemType window_type, bool e .apiVersion = available_version, }; - u32 num_layers = 0; - std::array layers; + const auto layers = GetInstanceLayers(enable_validation, enable_crash_diagnostic); - vk::Bool32 enable_force_barriers = vk::False; - const char* log_path{}; + const std::string extensions_string = fmt::format("{}", fmt::join(extensions, ", ")); + const std::string layers_string = fmt::format("{}", fmt::join(layers, ", ")); + LOG_INFO(Render_Vulkan, "Enabled instance extensions: {}", extensions_string); + LOG_INFO(Render_Vulkan, "Enabled instance layers: {}", layers_string); -#if VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL - if (enable_validation) { - layers[num_layers++] = VALIDATION_LAYER_NAME; - } + // Validation settings + vk::Bool32 enable_sync = Config::vkValidationSyncEnabled() ? vk::True : vk::False; + vk::Bool32 enable_gpuav = Config::vkValidationSyncEnabled() ? vk::True : vk::False; + const char* gpuav_mode = + Config::vkValidationGpuEnabled() ? "GPU_BASED_GPU_ASSISTED" : "GPU_BASED_NONE"; - if (enable_crash_diagnostic) { - layers[num_layers++] = CRASH_DIAGNOSTIC_LAYER_NAME; - static const auto crash_diagnostic_path = - Common::FS::GetUserPathString(Common::FS::PathType::LogDir); - log_path = crash_diagnostic_path.c_str(); - enable_force_barriers = vk::True; - } -#else - if (enable_validation || enable_crash_diagnostic) { - LOG_WARNING(Render_Vulkan, - "Skipping loading Vulkan layers as dynamic loading is not enabled."); - } -#endif + // Crash diagnostics settings + static const auto crash_diagnostic_path = + Common::FS::GetUserPathString(Common::FS::PathType::LogDir); + const char* log_path = crash_diagnostic_path.c_str(); + vk::Bool32 enable_force_barriers = vk::True; - vk::Bool32 enable_sync = - enable_validation && Config::vkValidationSyncEnabled() ? vk::True : vk::False; - vk::Bool32 enable_gpuav = - enable_validation && Config::vkValidationSyncEnabled() ? vk::True : vk::False; - const char* gpuav_mode = enable_validation && Config::vkValidationGpuEnabled() - ? "GPU_BASED_GPU_ASSISTED" - : "GPU_BASED_NONE"; const std::array layer_setings = { vk::LayerSettingEXT{ .pLayerName = VALIDATION_LAYER_NAME, @@ -330,7 +352,7 @@ vk::UniqueInstance CreateInstance(Frontend::WindowSystemType window_type, bool e vk::StructureChain instance_ci_chain = { vk::InstanceCreateInfo{ .pApplicationInfo = &application_info, - .enabledLayerCount = num_layers, + .enabledLayerCount = static_cast(layers.size()), .ppEnabledLayerNames = layers.data(), .enabledExtensionCount = static_cast(extensions.size()), .ppEnabledExtensionNames = extensions.data(), diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index bfcdc9538..bd8906f86 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -4,6 +4,7 @@ #include "common/config.h" #include "common/debug.h" #include "core/memory.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" @@ -48,10 +49,6 @@ void Rasterizer::CpSync() { bool Rasterizer::FilterDraw() { const auto& regs = liverpool->regs; - // Tessellation is unsupported so skip the draw to avoid locking up the driver. - if (regs.primitive_type == AmdGpu::PrimitiveType::PatchPrimitive) { - return false; - } // There are several cases (e.g. FCE, FMask/HTile decompression) where we don't need to do an // actual draw hence can skip pipeline creation. if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::EliminateFastClear) { @@ -100,6 +97,7 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { // an unnecessary transition and may result in state conflict if the resource is already // bound for reading. if ((mrt_mask & (1 << col_buf_id)) == 0) { + state.color_attachments[state.num_color_attachments++].imageView = VK_NULL_HANDLE; continue; } @@ -213,7 +211,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { return; } - const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); + const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex); const auto& fetch_shader = pipeline->GetFetchShader(); buffer_cache.BindVertexBuffers(vs_info, fetch_shader); const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset); @@ -270,7 +268,7 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 return; } - const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); + const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex); const auto& fetch_shader = pipeline->GetFetchShader(); buffer_cache.BindVertexBuffers(vs_info, fetch_shader); buffer_cache.BindIndexBuffer(is_indexed, 0); @@ -319,14 +317,14 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 void Rasterizer::DispatchDirect() { RENDERER_TRACE; - const auto& cs_program = liverpool->regs.cs_program; + const auto& cs_program = liverpool->GetCsRegs(); const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline(); if (!pipeline) { return; } - const auto& cs = pipeline->GetStage(Shader::Stage::Compute); - if (ExecuteShaderHLE(cs, liverpool->regs, *this)) { + const auto& cs = pipeline->GetStage(Shader::LogicalStage::Compute); + if (ExecuteShaderHLE(cs, liverpool->regs, cs_program, *this)) { return; } @@ -346,7 +344,7 @@ void Rasterizer::DispatchDirect() { void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) { RENDERER_TRACE; - const auto& cs_program = liverpool->regs.cs_program; + const auto& cs_program = liverpool->GetCsRegs(); const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline(); if (!pipeline) { return; @@ -386,7 +384,7 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { const auto& regs = liverpool->regs; if (pipeline->IsCompute()) { - const auto& info = pipeline->GetStage(Shader::Stage::Compute); + const auto& info = pipeline->GetStage(Shader::LogicalStage::Compute); // Most of the time when a metadata is updated with a shader it gets cleared. It means // we can skip the whole dispatch and update the tracked state instead. Also, it is not @@ -615,18 +613,24 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin auto& [image_id, desc] = image_bindings.emplace_back(std::piecewise_construct, std::tuple{}, std::tuple{tsharp, image_desc}); image_id = texture_cache.FindImage(desc); - auto& image = texture_cache.GetImage(image_id); - if (image.binding.is_bound) { + auto* image = &texture_cache.GetImage(image_id); + if (image->depth_id) { + // If this image has an associated depth image, it's a stencil attachment. + // Redirect the access to the actual depth-stencil buffer. + image_id = image->depth_id; + image = &texture_cache.GetImage(image_id); + } + if (image->binding.is_bound) { // The image is already bound. In case if it is about to be used as storage we need // to force general layout on it. - image.binding.force_general |= image_desc.is_storage; + image->binding.force_general |= image_desc.is_storage; } - if (image.binding.is_target) { + if (image->binding.is_target) { // The image is already bound as target. Since we read and output to it need to force // general layout too. - image.binding.force_general = 1u; + image->binding.force_general = 1u; } - image.binding.is_bound = 1u; + image->binding.is_bound = 1u; } // Second pass to re-bind images that were updated after binding diff --git a/src/video_core/renderer_vulkan/vk_shader_hle.cpp b/src/video_core/renderer_vulkan/vk_shader_hle.cpp index df9d40f07..ff78f5d24 100644 --- a/src/video_core/renderer_vulkan/vk_shader_hle.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_hle.cpp @@ -2,17 +2,19 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "shader_recompiler/info.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_hle.h" -#include "vk_rasterizer.h" +extern std::unique_ptr liverpool; namespace Vulkan { static constexpr u64 COPY_SHADER_HASH = 0xfefebf9f; -bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, - Rasterizer& rasterizer) { +static bool ExecuteCopyShaderHLE(const Shader::Info& info, + const AmdGpu::Liverpool::ComputeProgram& cs_program, + Rasterizer& rasterizer) { auto& scheduler = rasterizer.GetScheduler(); auto& buffer_cache = rasterizer.GetBufferCache(); @@ -34,9 +36,9 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg static std::vector copies; copies.clear(); - copies.reserve(regs.cs_program.dim_x); + copies.reserve(cs_program.dim_x); - for (u32 i = 0; i < regs.cs_program.dim_x; i++) { + for (u32 i = 0; i < cs_program.dim_x; i++) { const auto& [dst_idx, src_idx, end] = ctl_buf[i]; const u32 local_dst_offset = dst_idx * buf_stride; const u32 local_src_offset = src_idx * buf_stride; @@ -60,7 +62,7 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg static constexpr vk::DeviceSize MaxDistanceForMerge = 64_MB; u32 batch_start = 0; - u32 batch_end = 1; + u32 batch_end = 0; while (batch_end < copies.size()) { // Place first copy into the current batch @@ -70,19 +72,19 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg auto dst_offset_min = copy.dstOffset; auto dst_offset_max = copy.dstOffset + copy.size; - for (int i = batch_start + 1; i < copies.size(); i++) { + for (++batch_end; batch_end < copies.size(); batch_end++) { // Compute new src and dst bounds if we were to batch this copy - const auto [src_offset, dst_offset, size] = copies[i]; + const auto& [src_offset, dst_offset, size] = copies[batch_end]; auto new_src_offset_min = std::min(src_offset_min, src_offset); auto new_src_offset_max = std::max(src_offset_max, src_offset + size); if (new_src_offset_max - new_src_offset_min > MaxDistanceForMerge) { - continue; + break; } auto new_dst_offset_min = std::min(dst_offset_min, dst_offset); auto new_dst_offset_max = std::max(dst_offset_max, dst_offset + size); if (new_dst_offset_max - new_dst_offset_min > MaxDistanceForMerge) { - continue; + break; } // We can batch this copy @@ -90,10 +92,6 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg src_offset_max = new_src_offset_max; dst_offset_min = new_dst_offset_min; dst_offset_max = new_dst_offset_max; - if (i != batch_end) { - std::swap(copies[i], copies[batch_end]); - } - ++batch_end; } // Obtain buffers for the total source and destination ranges. @@ -116,7 +114,6 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg src_offset_max - src_offset_min, dst_offset_max - dst_offset_min); scheduler.CommandBuffer().copyBuffer(src_buf->Handle(), dst_buf->Handle(), vk_copies); batch_start = batch_end; - ++batch_end; } scheduler.CommandBuffer().pipelineBarrier( @@ -127,10 +124,10 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg } bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, - Rasterizer& rasterizer) { + const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer) { switch (info.pgm_hash) { case COPY_SHADER_HASH: - return ExecuteCopyShaderHLE(info, regs, rasterizer); + return ExecuteCopyShaderHLE(info, cs_program, rasterizer); default: return false; } diff --git a/src/video_core/renderer_vulkan/vk_shader_hle.h b/src/video_core/renderer_vulkan/vk_shader_hle.h index fda9b1735..008de8003 100644 --- a/src/video_core/renderer_vulkan/vk_shader_hle.h +++ b/src/video_core/renderer_vulkan/vk_shader_hle.h @@ -15,6 +15,6 @@ class Rasterizer; /// Attempts to execute a shader using HLE if possible. bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, - Rasterizer& rasterizer); + const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer); } // namespace Vulkan diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index e7e1ce1da..03339d280 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -145,8 +145,10 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, const ImageInfo& info_) : instance{&instance_}, scheduler{&scheduler_}, info{info_}, image{instance->GetDevice(), instance->GetAllocator()} { + if (info.pixel_format == vk::Format::eUndefined) { + return; + } mip_hashes.resize(info.resources.levels); - ASSERT(info.pixel_format != vk::Format::eUndefined); // Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case // the texture cache should re-create the resource with the usage requested vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat | diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index a1b1b007f..473dd731e 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -92,6 +92,10 @@ struct Image { return image_view_ids[std::distance(image_view_infos.begin(), it)]; } + void AssociateDepth(ImageId image_id) { + depth_id = image_id; + } + boost::container::small_vector GetBarriers( vk::ImageLayout dst_layout, vk::Flags dst_mask, vk::PipelineStageFlags2 dst_stage, std::optional subres_range); @@ -116,6 +120,7 @@ struct Image { VAddr track_addr_end = 0; std::vector image_view_infos; std::vector image_view_ids; + ImageId depth_id{}; // Resource state tracking struct { diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 0ed36ee39..1445d41cd 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -298,6 +298,9 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice resources.layers = num_slices; meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0; + stencil_addr = buffer.StencilAddress(); + stencil_size = pitch * size.height * sizeof(u8); + guest_address = buffer.Address(); const auto depth_slice_sz = buffer.GetDepthSliceSize(); guest_size_bytes = depth_slice_sz * num_slices; diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index e12ae3be1..a657310a8 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -69,7 +69,7 @@ struct ImageInfo { } props{}; // Surface properties with impact on various calculation factors vk::Format pixel_format = vk::Format::eUndefined; - vk::ImageType type = vk::ImageType::e1D; + vk::ImageType type = vk::ImageType::e2D; SubresourceExtent resources; Extent3D size{1, 1, 1}; u32 num_bits{}; diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 61f1aaafe..ec1fda0d8 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -50,34 +50,6 @@ vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) { } } -bool IsIdentityMapping(u32 dst_sel, u32 num_components) { - return (num_components == 1 && dst_sel == 0b001'000'000'100) || - (num_components == 2 && dst_sel == 0b001'000'101'100) || - (num_components == 3 && dst_sel == 0b001'110'101'100) || - (num_components == 4 && dst_sel == 0b111'110'101'100); -} - -vk::Format TrySwizzleFormat(vk::Format format, u32 dst_sel) { - // BGRA - if (dst_sel == 0b111100101110) { - switch (format) { - case vk::Format::eR8G8B8A8Unorm: - return vk::Format::eB8G8R8A8Unorm; - case vk::Format::eR8G8B8A8Snorm: - return vk::Format::eB8G8R8A8Snorm; - case vk::Format::eR8G8B8A8Uint: - return vk::Format::eB8G8R8A8Uint; - case vk::Format::eR8G8B8A8Sint: - return vk::Format::eB8G8R8A8Sint; - case vk::Format::eR8G8B8A8Srgb: - return vk::Format::eB8G8R8A8Srgb; - default: - break; - } - } - return format; -} - ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept : is_storage{desc.is_storage} { const auto dfmt = image.GetDataFmt(); @@ -120,17 +92,6 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageReso mapping.b = ConvertComponentSwizzle(image.dst_sel_z); mapping.a = ConvertComponentSwizzle(image.dst_sel_w); } - // Check for unfortunate case of storage images being swizzled - const u32 num_comps = AmdGpu::NumComponents(image.GetDataFmt()); - const u32 dst_sel = image.DstSelect(); - if (is_storage && !IsIdentityMapping(dst_sel, num_comps)) { - if (auto new_format = TrySwizzleFormat(format, dst_sel); new_format != format) { - format = new_format; - return; - } - LOG_ERROR(Render_Vulkan, "Storage image (num_comps = {}) requires swizzling {}", num_comps, - image.DstSelectName()); - } } ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer) noexcept { @@ -170,7 +131,8 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info format = image.info.pixel_format; aspect = vk::ImageAspectFlagBits::eDepth; } - if (image.aspect_mask & vk::ImageAspectFlagBits::eStencil && format == vk::Format::eR8Unorm) { + if (image.aspect_mask & vk::ImageAspectFlagBits::eStencil && + (format == vk::Format::eR8Uint || format == vk::Format::eR8Unorm)) { format = image.info.pixel_format; aspect = vk::ImageAspectFlagBits::eStencil; } @@ -180,8 +142,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info .image = image.image, .viewType = info.type, .format = instance.GetSupportedFormat(format, image.format_features), - .components = - instance.GetSupportedComponentSwizzle(format, info.mapping, image.format_features), + .components = info.mapping, .subresourceRange{ .aspectMask = aspect, .baseMipLevel = info.range.base.level, diff --git a/src/video_core/texture_cache/sampler.cpp b/src/video_core/texture_cache/sampler.cpp index e47f53abf..9f4bc7a7e 100644 --- a/src/video_core/texture_cache/sampler.cpp +++ b/src/video_core/texture_cache/sampler.cpp @@ -25,7 +25,7 @@ Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sample .minLod = sampler.MinLod(), .maxLod = sampler.MaxLod(), .borderColor = LiverpoolToVK::BorderColor(sampler.border_color_type), - .unnormalizedCoordinates = bool(sampler.force_unnormalized), + .unnormalizedCoordinates = false, // Handled in shader due to Vulkan limitations. }; auto [sampler_result, smplr] = instance.GetDevice().createSamplerUnique(sampler_ci); ASSERT_MSG(sampler_result == vk::Result::eSuccess, "Failed to create sampler: {}", diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 153314d2b..897d6f67e 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -443,6 +443,27 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) { } } + // If there is a stencil attachment, link depth and stencil. + if (desc.info.stencil_addr != 0) { + ImageId stencil_id{}; + ForEachImageInRegion(desc.info.stencil_addr, desc.info.stencil_size, + [&](ImageId image_id, Image& image) { + if (image.info.guest_address == desc.info.stencil_addr) { + stencil_id = image_id; + } + }); + if (!stencil_id) { + ImageInfo info{}; + info.guest_address = desc.info.stencil_addr; + info.guest_size_bytes = desc.info.stencil_size; + info.size = desc.info.size; + stencil_id = slot_images.insert(instance, scheduler, info); + RegisterImage(stencil_id); + } + Image& image = slot_images[stencil_id]; + image.AssociateDepth(image_id); + } + return RegisterImageView(image_id, desc.view_info); } diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index fc3d35e3e..94d37c993 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -172,9 +172,11 @@ void ConvertTileToLinear(u8* dst, const u8* src, u32 width, u32 height, bool is_ vk::Format DemoteImageFormatForDetiling(vk::Format format) { switch (format) { + case vk::Format::eR8Uint: case vk::Format::eR8Unorm: return vk::Format::eR8Uint; case vk::Format::eR4G4B4A4UnormPack16: + case vk::Format::eR5G5B5A1UnormPack16: case vk::Format::eR8G8Unorm: case vk::Format::eR16Sfloat: case vk::Format::eR16Unorm: @@ -211,6 +213,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { case vk::Format::eBc7SrgbBlock: case vk::Format::eBc7UnormBlock: case vk::Format::eBc6HUfloatBlock: + case vk::Format::eR32G32B32A32Uint: case vk::Format::eR32G32B32A32Sfloat: return vk::Format::eR32G32B32A32Uint; default: