From 8bb1e8fcdb511f28c08924789251454315bcc617 Mon Sep 17 00:00:00 2001 From: rainmakerv2 <30595646+rainmakerv3@users.noreply.github.com> Date: Sat, 14 Dec 2024 16:17:00 +0800 Subject: [PATCH 01/31] Resize trophy pop up windows based on window size (#1769) --- src/core/libraries/np_trophy/trophy_ui.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/core/libraries/np_trophy/trophy_ui.cpp b/src/core/libraries/np_trophy/trophy_ui.cpp index 618f8db46..55ef7b8de 100644 --- a/src/core/libraries/np_trophy/trophy_ui.cpp +++ b/src/core/libraries/np_trophy/trophy_ui.cpp @@ -38,21 +38,22 @@ void TrophyUI::Finish() { void TrophyUI::Draw() { const auto& io = GetIO(); + float AdjustWidth = io.DisplaySize.x / 1280; + float AdjustHeight = io.DisplaySize.y / 720; const ImVec2 window_size{ - std::min(io.DisplaySize.x, 250.f), - std::min(io.DisplaySize.y, 70.f), + std::min(io.DisplaySize.x, (300 * AdjustWidth)), + std::min(io.DisplaySize.y, (70 * AdjustHeight)), }; SetNextWindowSize(window_size); SetNextWindowCollapsed(false); - SetNextWindowPos(ImVec2(io.DisplaySize.x - 250, 50)); + SetNextWindowPos(ImVec2(io.DisplaySize.x - (300 * AdjustWidth), (50 * AdjustHeight))); KeepNavHighlight(); - if (Begin("Trophy Window", nullptr, ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoInputs)) { if (trophy_icon) { - Image(trophy_icon.GetTexture().im_id, ImVec2(50, 50)); + Image(trophy_icon.GetTexture().im_id, ImVec2((50 * AdjustWidth), (50 * AdjustHeight))); ImGui::SameLine(); } else { // placeholder @@ -61,6 +62,7 @@ void TrophyUI::Draw() { GetColorU32(ImVec4{0.7f})); ImGui::Indent(60); } + SetWindowFontScale((1.2 * AdjustHeight)); TextWrapped("Trophy earned!\n%s", trophy_name.c_str()); } End(); From 32556ad0d86ea01aacb136f00a879082bcca66c0 Mon Sep 17 00:00:00 2001 From: Alexandre Bouvier Date: Sat, 14 Dec 2024 08:18:05 +0000 Subject: [PATCH 02/31] cmake: fix double alias (#1771) --- CMakeLists.txt | 4 ++-- externals/CMakeLists.txt | 2 +- externals/sirit | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b057f55d6..2e21a33c4 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -875,7 +875,7 @@ endif() create_target_directory_groups(shadps4) target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG) -target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::SPIRV glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers) +target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers) target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h") target_compile_definitions(Dear_ImGui PRIVATE IMGUI_USER_CONFIG="${PROJECT_SOURCE_DIR}/src/imgui/imgui_config.h") @@ -1016,4 +1016,4 @@ if (ENABLE_QT_GUI AND CMAKE_SYSTEM_NAME STREQUAL "Linux") install(FILES "dist/net.shadps4.shadPS4.metainfo.xml" DESTINATION "share/metainfo") install(FILES ".github/shadps4.png" DESTINATION "share/icons/hicolor/512x512/apps" RENAME "net.shadps4.shadPS4.png") install(FILES "src/images/net.shadps4.shadPS4.svg" DESTINATION "share/icons/hicolor/scalable/apps") -endif() \ No newline at end of file +endif() diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index e1e67f235..dcc9d2bc0 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -110,7 +110,7 @@ if (NOT TARGET glslang::glslang) set(ENABLE_OPT OFF CACHE BOOL "") add_subdirectory(glslang) file(COPY glslang/SPIRV DESTINATION glslang/glslang FILES_MATCHING PATTERN "*.h") - target_include_directories(SPIRV INTERFACE "${CMAKE_CURRENT_BINARY_DIR}/glslang") + target_include_directories(glslang INTERFACE "${CMAKE_CURRENT_BINARY_DIR}/glslang") endif() # Robin-map diff --git a/externals/sirit b/externals/sirit index e12b6b592..5b5ff49a5 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit e12b6b592ce9917a85303c555259488643c56f47 +Subproject commit 5b5ff49a58f5be27af1058794c6ca907dabc05b3 From a57ccf9112ec9971695cb08eeee27a876020ab19 Mon Sep 17 00:00:00 2001 From: rainmakerv2 <30595646+rainmakerv3@users.noreply.github.com> Date: Sat, 14 Dec 2024 16:18:34 +0800 Subject: [PATCH 03/31] Save main window together with config to avoid rare crash (#1772) --- src/common/config.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/config.cpp b/src/common/config.cpp index 4d07ba29f..403b0e32f 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -692,6 +692,7 @@ void save(const std::filesystem::path& path) { std::ofstream file(path, std::ios::binary); file << data; file.close(); + saveMainWindow(path); } void saveMainWindow(const std::filesystem::path& path) { From 40e8a40ada9a707b7cf3001ed1d8f835d0c7d2ad Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sat, 14 Dec 2024 00:20:04 -0800 Subject: [PATCH 04/31] externals: Add MoltenVK as an external. (#1767) --- .github/workflows/build.yml | 14 ++---- .gitmodules | 14 +++++- CMakeLists.txt | 3 +- documents/building-macos.md | 8 ++- externals/CMakeLists.txt | 24 +++++---- externals/MoltenVK/CMakeLists.txt | 81 +++++++++++++++++++++++++++++++ externals/MoltenVK/MoltenVK | 1 + externals/MoltenVK/SPIRV-Cross | 1 + externals/MoltenVK/cereal | 1 + 9 files changed, 119 insertions(+), 28 deletions(-) create mode 100644 externals/MoltenVK/CMakeLists.txt create mode 160000 externals/MoltenVK/MoltenVK create mode 160000 externals/MoltenVK/SPIRV-Cross create mode 160000 externals/MoltenVK/cereal diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bacfbea0d..3b5690438 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -174,11 +174,6 @@ jobs: with: xcode-version: latest - - name: Install MoltenVK - run: | - arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" - arch -x86_64 /usr/local/bin/brew install molten-vk - - name: Cache CMake Configuration uses: actions/cache@v4 env: @@ -210,7 +205,7 @@ jobs: run: | mkdir upload mv ${{github.workspace}}/build/shadps4 upload - cp $(arch -x86_64 /usr/local/bin/brew --prefix)/opt/molten-vk/lib/libMoltenVK.dylib upload + cp ${{github.workspace}}/build/externals/MoltenVK/libMoltenVK.dylib upload tar cf shadps4-macos-sdl.tar.gz -C upload . - uses: actions/upload-artifact@v4 with: @@ -230,11 +225,8 @@ jobs: with: xcode-version: latest - - name: Install MoltenVK and Setup Qt - run: | - arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" - arch -x86_64 /usr/local/bin/brew install molten-vk - - uses: jurplel/install-qt-action@v4 + - name: Setup Qt + uses: jurplel/install-qt-action@v4 with: version: 6.7.3 host: mac diff --git a/.gitmodules b/.gitmodules index 8010250a9..3d0d21c5b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -106,4 +106,16 @@ [submodule "externals/libpng"] path = externals/libpng url = https://github.com/pnggroup/libpng - shallow = true \ No newline at end of file + shallow = true +[submodule "externals/MoltenVK/SPIRV-Cross"] + path = externals/MoltenVK/SPIRV-Cross + url = https://github.com/KhronosGroup/SPIRV-Cross + shallow = true +[submodule "externals/MoltenVK/MoltenVK"] + path = externals/MoltenVK/MoltenVK + url = https://github.com/KhronosGroup/MoltenVK + shallow = true +[submodule "externals/MoltenVK/cereal"] + path = externals/MoltenVK/cereal + url = https://github.com/USCiLab/cereal + shallow = true diff --git a/CMakeLists.txt b/CMakeLists.txt index 2e21a33c4..1e54f7a00 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -894,8 +894,7 @@ if (APPLE) target_compile_definitions(shadps4 PRIVATE USE_SYSTEM_VULKAN_LOADER=1) else() # Link MoltenVK for Vulkan support - find_library(MOLTENVK MoltenVK REQUIRED) - target_link_libraries(shadps4 PRIVATE ${MOLTENVK}) + target_link_libraries(shadps4 PRIVATE MoltenVK) endif() if (ARCHITECTURE STREQUAL "x86_64") diff --git a/documents/building-macos.md b/documents/building-macos.md index d8cc414e2..9a1a021ee 100644 --- a/documents/building-macos.md +++ b/documents/building-macos.md @@ -24,23 +24,21 @@ eval $(/opt/homebrew/bin/brew shellenv) brew install clang-format cmake ``` -Next, install x86_64 Homebrew and libraries. +Next, install x86_64 Qt. You can skip these steps and move on to **Cloning and compiling** if you do not intend to build the Qt GUI. **If you are on an ARM Mac:** ``` # Installs x86_64 Homebrew to /usr/local arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" # Installs libraries. -arch -x86_64 /usr/local/bin/brew install molten-vk qt@6 +arch -x86_64 /usr/local/bin/brew install qt@6 ``` **If you are on an x86_64 Mac:** ``` -brew install molten-vk qt@6 +brew install qt@6 ``` -If you don't need the Qt GUI you can remove `qt@6` from the last command. - ### Cloning and compiling: Clone the repository recursively: diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index dcc9d2bc0..1ab23a403 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -177,15 +177,6 @@ if (NOT TARGET PNG::PNG) add_library(PNG::PNG ALIAS png_static) endif() -if (APPLE) - # date - if (NOT TARGET date::date-tz) - option(BUILD_TZ_LIB "" ON) - option(USE_SYSTEM_TZ_DB "" ON) - add_subdirectory(date) - endif() -endif() - # Dear ImGui add_library(Dear_ImGui dear_imgui/imgui.cpp @@ -232,3 +223,18 @@ if (NOT TARGET stb::headers) target_include_directories(stb INTERFACE stb) add_library(stb::headers ALIAS stb) endif() + +# Apple-only dependencies +if (APPLE) + # date + if (NOT TARGET date::date-tz) + option(BUILD_TZ_LIB "" ON) + option(USE_SYSTEM_TZ_DB "" ON) + add_subdirectory(date) + endif() + + # MoltenVK + if (NOT TARGET MoltenVK) + add_subdirectory(MoltenVK) + endif() +endif() diff --git a/externals/MoltenVK/CMakeLists.txt b/externals/MoltenVK/CMakeLists.txt new file mode 100644 index 000000000..00e3231ee --- /dev/null +++ b/externals/MoltenVK/CMakeLists.txt @@ -0,0 +1,81 @@ +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later + +# Prepare version information +find_package(Git) +if(GIT_FOUND) + execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD + OUTPUT_VARIABLE MVK_GIT_REV + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +endif() +set(MVK_VERSION "1.2.12") +set(MVK_GENERATED_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}/Generated) +file(WRITE ${MVK_GENERATED_INCLUDES}/mvkGitRevDerived.h "static const char* mvkRevString = \"${MVK_GIT_REV}\";") + +# Find required system libraries +find_library(APPKIT_LIBRARY AppKit REQUIRED) +find_library(FOUNDATION_LIBRARY Foundation REQUIRED) +find_library(IOKIT_LIBRARY IOKit REQUIRED) +find_library(IOSURFACE_LIBRARY IOSurface REQUIRED) +find_library(METAL_LIBRARY Metal REQUIRED) +find_library(QUARTZCORE_LIBRARY QuartzCore REQUIRED) + +# cereal +option(SKIP_PORTABILITY_TEST "" ON) +option(BUILD_DOC "" OFF) +option(BUILD_SANDBOX "" OFF) +option(SKIP_PERFORMANCE_COMPARISON "" ON) +option(SPIRV_CROSS_SKIP_INSTALL "" ON) +add_subdirectory(cereal) + +# SPIRV-Cross +option(SPIRV_CROSS_CLI "" OFF) +option(SPIRV_CROSS_ENABLE_TESTS "" OFF) +option(SPIRV_CROSS_ENABLE_HLSL "" OFF) +option(SPIRV_CROSS_ENABLE_CPP "" OFF) +option(SPIRV_CROSS_SKIP_INSTALL "" ON) +add_subdirectory(SPIRV-Cross) + +# Common +set(MVK_COMMON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/Common) +file(GLOB_RECURSE MVK_COMMON_SOURCES CONFIGURE_DEPENDS + ${MVK_COMMON_DIR}/*.cpp + ${MVK_COMMON_DIR}/*.m + ${MVK_COMMON_DIR}/*.mm) +set(MVK_COMMON_INCLUDES ${MVK_COMMON_DIR}) + +add_library(MoltenVKCommon STATIC ${MVK_COMMON_SOURCES}) +target_include_directories(MoltenVKCommon PUBLIC ${MVK_COMMON_INCLUDES}) +target_compile_options(MoltenVKCommon PRIVATE -w) + +# MoltenVKShaderConverter +set(MVK_SHADER_CONVERTER_DIR ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/MoltenVKShaderConverter) +file(GLOB_RECURSE MVK_SHADER_CONVERTER_SOURCES CONFIGURE_DEPENDS + ${MVK_SHADER_CONVERTER_DIR}/MoltenVKShaderConverter/*.cpp + ${MVK_SHADER_CONVERTER_DIR}/MoltenVKShaderConverter/*.m + ${MVK_SHADER_CONVERTER_DIR}/MoltenVKShaderConverter/*.mm) +set(MVK_SHADER_CONVERTER_INCLUDES ${MVK_SHADER_CONVERTER_DIR} ${MVK_SHADER_CONVERTER_DIR}/include) + +add_library(MoltenVKShaderConverter STATIC ${MVK_SHADER_CONVERTER_SOURCES}) +target_include_directories(MoltenVKShaderConverter PUBLIC ${MVK_SHADER_CONVERTER_INCLUDES}) +target_compile_options(MoltenVKShaderConverter PRIVATE -w) +target_link_libraries(MoltenVKShaderConverter PRIVATE spirv-cross-msl spirv-cross-reflect MoltenVKCommon) +target_compile_definitions(MoltenVKShaderConverter PRIVATE MVK_EXCLUDE_SPIRV_TOOLS=1) + +# MoltenVK +set(MVK_DIR ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/MoltenVK) +file(GLOB_RECURSE MVK_SOURCES CONFIGURE_DEPENDS + ${MVK_DIR}/MoltenVK/*.cpp + ${MVK_DIR}/MoltenVK/*.m + ${MVK_DIR}/MoltenVK/*.mm) +file(GLOB MVK_SRC_INCLUDES LIST_DIRECTORIES ON ${MVK_DIR}/MoltenVK/*) +set(MVK_INCLUDES ${MVK_SRC_INCLUDES} ${MVK_GENERATED_INCLUDES} ${MVK_DIR}/include) + +add_library(MoltenVK SHARED ${MVK_SOURCES}) +target_include_directories(MoltenVK PRIVATE ${MVK_INCLUDES}) +target_compile_options(MoltenVK PRIVATE -w) +target_link_libraries(MoltenVK PRIVATE + ${APPKIT_LIBRARY} ${FOUNDATION_LIBRARY} ${IOKIT_LIBRARY} ${IOSURFACE_LIBRARY} ${METAL_LIBRARY} ${QUARTZCORE_LIBRARY} + Vulkan::Headers cereal::cereal spirv-cross-msl MoltenVKCommon MoltenVKShaderConverter) +target_compile_definitions(MoltenVK PRIVATE MVK_FRAMEWORK_VERSION=${MVK_VERSION} MVK_USE_METAL_PRIVATE_API=1) diff --git a/externals/MoltenVK/MoltenVK b/externals/MoltenVK/MoltenVK new file mode 160000 index 000000000..5ad3ee5d2 --- /dev/null +++ b/externals/MoltenVK/MoltenVK @@ -0,0 +1 @@ +Subproject commit 5ad3ee5d2f84342950c3fe93dec97719574d1932 diff --git a/externals/MoltenVK/SPIRV-Cross b/externals/MoltenVK/SPIRV-Cross new file mode 160000 index 000000000..6173e24b3 --- /dev/null +++ b/externals/MoltenVK/SPIRV-Cross @@ -0,0 +1 @@ +Subproject commit 6173e24b31f09a0c3217103a130e74c4ddec14a6 diff --git a/externals/MoltenVK/cereal b/externals/MoltenVK/cereal new file mode 160000 index 000000000..d1fcec807 --- /dev/null +++ b/externals/MoltenVK/cereal @@ -0,0 +1 @@ +Subproject commit d1fcec807b372f04e4c1041b3058e11c12853e6e From 8caca4df32c05a11af8351590dcfa0fa5266eb11 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sat, 14 Dec 2024 02:03:42 -0800 Subject: [PATCH 05/31] shader_recompiler: Support VK_AMD_shader_image_load_store_lod for IMAGE_STORE_MIP (#1770) * shader_recompiler: Support VK_AMD_shader_image_load_store_lod for IMAGE_STORE_MIP * emit_spirv: Fix missing extension declaration. --- .../backend/spirv/emit_spirv.cpp | 4 ++++ .../backend/spirv/emit_spirv_image.cpp | 17 ++++++++++++----- .../backend/spirv/emit_spirv_instructions.h | 8 ++++---- .../frontend/translate/translate.h | 2 +- .../frontend/translate/vector_memory.cpp | 11 ++++++++--- src/shader_recompiler/ir/ir_emitter.cpp | 17 +++++++++-------- src/shader_recompiler/ir/ir_emitter.h | 10 ++++++---- src/shader_recompiler/ir/opcodes.inc | 6 +++--- .../ir/passes/resource_tracking_pass.cpp | 8 +++++--- src/shader_recompiler/profile.h | 1 + src/video_core/renderer_vulkan/vk_instance.cpp | 1 + src/video_core/renderer_vulkan/vk_instance.h | 6 ++++++ .../renderer_vulkan/vk_pipeline_cache.cpp | 1 + 13 files changed, 61 insertions(+), 31 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 23800fc49..ab9d6afae 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -222,6 +222,10 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct ctx.AddCapability(spv::Capability::StorageImageExtendedFormats); ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat); ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat); + if (profile.supports_image_load_store_lod) { + ctx.AddExtension("SPV_AMD_shader_image_load_store_lod"); + ctx.AddCapability(spv::Capability::ImageReadWriteLodAMD); + } } if (info.has_texel_buffers) { ctx.AddCapability(spv::Capability::SampledBuffer); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 736410dcd..8da9280d0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -168,8 +168,8 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], texels) : texels; } -Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset, - Id lod, Id ms) { +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, + const IR::Value& offset, Id ms) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id result_type = texture.data_types->Get(4); @@ -236,15 +236,22 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], sample) : sample; } -Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { +Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id lod) { UNREACHABLE_MSG("SPIR-V Instruction"); } -void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color) { +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id color) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id color_type = texture.data_types->Get(4); - ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color)); + ImageOperands operands; + if (ctx.profile.supports_image_load_store_lod) { + operands.Add(spv::ImageOperandsMask::Lod, lod); + } else if (lod.value != 0) { + LOG_WARNING(Render, "Image write with LOD not supported by driver"); + } + ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color), operands.mask, + operands.operands); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 4ff53670e..057b0d692 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -392,14 +392,14 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset); Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset, Id dref); -Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset, - Id lod, Id ms); +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, + const IR::Value& offset, Id ms); Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips); Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords); Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id derivatives_dx, Id derivatives_dy, const IR::Value& offset, const IR::Value& lod_clamp); -Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); -void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color); +Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id lod); +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id color); Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 2f320a6c7..198cea276 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -276,7 +276,7 @@ public: // Image Memory // MIMG void IMAGE_LOAD(bool has_mip, const GcnInst& inst); - void IMAGE_STORE(const GcnInst& inst); + void IMAGE_STORE(bool has_mip, const GcnInst& inst); void IMAGE_GET_RESINFO(const GcnInst& inst); void IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst); void IMAGE_SAMPLE(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 74b9c905d..eadd1c4db 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -98,7 +98,9 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { // Buffer store operations case Opcode::IMAGE_STORE: - return IMAGE_STORE(inst); + return IMAGE_STORE(false, inst); + case Opcode::IMAGE_STORE_MIP: + return IMAGE_STORE(true, inst); // Image misc operations case Opcode::IMAGE_GET_RESINFO: @@ -423,7 +425,7 @@ void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) { } } -void Translator::IMAGE_STORE(const GcnInst& inst) { +void Translator::IMAGE_STORE(bool has_mip, const GcnInst& inst) { const auto& mimg = inst.control.mimg; IR::VectorReg addr_reg{inst.src[0].code}; IR::VectorReg data_reg{inst.dst[0].code}; @@ -434,6 +436,9 @@ void Translator::IMAGE_STORE(const GcnInst& inst) { ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1), ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3)); + IR::TextureInstInfo info{}; + info.has_lod.Assign(has_mip); + boost::container::static_vector comps; for (u32 i = 0; i < 4; i++) { if (((mimg.dmask >> i) & 1) == 0) { @@ -443,7 +448,7 @@ void Translator::IMAGE_STORE(const GcnInst& inst) { comps.push_back(ir.GetVectorReg(data_reg++)); } const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]); - ir.ImageWrite(handle, body, value, {}); + ir.ImageWrite(handle, body, {}, value, info); } void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) { diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 29b406699..3ebc82e64 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -1599,9 +1599,9 @@ Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, dref); } -Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset, - const U32& lod, const U32& multisampling, TextureInstInfo info) { - return Inst(Opcode::ImageFetch, Flags{info}, handle, coords, offset, lod, multisampling); +Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const U32& lod, + const Value& offset, const U32& multisampling, TextureInstInfo info) { + return Inst(Opcode::ImageFetch, Flags{info}, handle, coords, lod, offset, multisampling); } Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod, @@ -1625,13 +1625,14 @@ Value IREmitter::ImageGradient(const Value& handle, const Value& coords, offset, lod_clamp); } -Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) { - return Inst(Opcode::ImageRead, Flags{info}, handle, coords); +Value IREmitter::ImageRead(const Value& handle, const Value& coords, const U32& lod, + TextureInstInfo info) { + return Inst(Opcode::ImageRead, Flags{info}, handle, coords, lod); } -void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color, - TextureInstInfo info) { - Inst(Opcode::ImageWrite, Flags{info}, handle, coords, color); +void IREmitter::ImageWrite(const Value& handle, const Value& coords, const U32& lod, + const Value& color, TextureInstInfo info) { + Inst(Opcode::ImageWrite, Flags{info}, handle, coords, lod, color); } // Debug print maps to SPIRV's NonSemantic DebugPrintf instruction diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index f77e22b82..068aba14d 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -314,14 +314,16 @@ public: TextureInstInfo info); [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, const F32& dref, TextureInstInfo info); - [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, - const U32& lod, const U32& multisampling, TextureInstInfo info); + [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const U32& lod, + const Value& offset, const U32& multisampling, + TextureInstInfo info); [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, const Value& derivatives_dx, const Value& derivatives_dy, const Value& offset, const F32& lod_clamp, TextureInstInfo info); - [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info); - void ImageWrite(const Value& handle, const Value& coords, const Value& color, + [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, const U32& lod, + TextureInstInfo info); + void ImageWrite(const Value& handle, const Value& coords, const U32& lod, const Value& color, TextureInstInfo info); void EmitVertex(); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 8f40ed985..477275824 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -334,12 +334,12 @@ OPCODE(ImageSampleDrefImplicitLod, F32x4, Opaq OPCODE(ImageSampleDrefExplicitLod, F32x4, Opaque, Opaque, F32, F32, Opaque, ) OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, ) OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, F32, ) -OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) +OPCODE(ImageFetch, F32x4, Opaque, Opaque, U32, Opaque, Opaque, ) OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, ) OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, F32, ) -OPCODE(ImageRead, U32x4, Opaque, Opaque, ) -OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) +OPCODE(ImageRead, U32x4, Opaque, Opaque, U32, ) +OPCODE(ImageWrite, Void, Opaque, Opaque, U32, U32x4, ) // Image atomic operations OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 398579ad4..f436db07a 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -771,14 +771,16 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip inst.SetArg(1, coords); if (inst.GetOpcode() == IR::Opcode::ImageWrite) { - inst.SetArg(2, SwizzleVector(ir, image, inst.Arg(2))); + inst.SetArg(3, SwizzleVector(ir, image, inst.Arg(3))); } if (inst_info.has_lod) { - ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch); + ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch || + inst.GetOpcode() == IR::Opcode::ImageRead || + inst.GetOpcode() == IR::Opcode::ImageWrite); ASSERT(image.GetType() != AmdGpu::ImageType::Color2DMsaa && image.GetType() != AmdGpu::ImageType::Color2DMsaaArray); - inst.SetArg(3, arg); + inst.SetArg(2, arg); } else if (image.GetType() == AmdGpu::ImageType::Color2DMsaa || image.GetType() == AmdGpu::ImageType::Color2DMsaaArray) { inst.SetArg(4, arg); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 96c458d44..c00e37f9c 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -23,6 +23,7 @@ struct Profile { bool support_fp32_denorm_flush{}; bool support_explicit_workgroup_layout{}; bool support_legacy_vertex_attributes{}; + bool supports_image_load_store_lod{}; bool has_broken_spirv_clamp{}; bool lower_left_origin_mode{}; bool needs_manual_interpolation{}; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 81784eb60..2f9695055 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -267,6 +267,7 @@ bool Instance::CreateDevice() { list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME); maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME); legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME); + image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME); // These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2 // with extensions. diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 81303c9cc..2b4bd612f 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -158,6 +158,11 @@ public: return legacy_vertex_attributes; } + /// Returns true when VK_AMD_shader_image_load_store_lod is supported. + bool IsImageLoadStoreLodSupported() const { + return image_load_store_lod; + } + /// Returns true when geometry shaders are supported by the device bool IsGeometryStageSupported() const { return features.geometryShader; @@ -327,6 +332,7 @@ private: bool maintenance5{}; bool list_restart{}; bool legacy_vertex_attributes{}; + bool image_load_store_lod{}; u64 min_imported_host_pointer_alignment{}; u32 subgroup_size{}; bool tooling_info{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0fa77e19b..ff27b742f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -172,6 +172,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32), .support_explicit_workgroup_layout = true, .support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(), + .supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(), .needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() && instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, }; From 3e226225080cca81693e034fdb0f0d0b30b8d4dd Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sat, 14 Dec 2024 02:04:30 -0800 Subject: [PATCH 06/31] renderer_vulkan: Remove some fallbacks and misc format queries that are no longer needed. (#1773) --- .../renderer_vulkan/vk_instance.cpp | 26 +++---------------- src/video_core/renderer_vulkan/vk_instance.h | 4 --- src/video_core/texture_cache/image_view.cpp | 3 +-- 3 files changed, 5 insertions(+), 28 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 2f9695055..e844150b2 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -68,11 +68,10 @@ std::unordered_map GetFormatProperties( } // Other miscellaneous formats, e.g. for color buffers, swizzles, or compatibility static constexpr std::array misc_formats = { - vk::Format::eA2R10G10B10UnormPack32, vk::Format::eA8B8G8R8UnormPack32, - vk::Format::eA8B8G8R8SrgbPack32, vk::Format::eB8G8R8A8Unorm, - vk::Format::eB8G8R8A8Snorm, vk::Format::eB8G8R8A8Uint, - vk::Format::eB8G8R8A8Sint, vk::Format::eB8G8R8A8Srgb, - vk::Format::eR5G6B5UnormPack16, vk::Format::eD24UnormS8Uint, + vk::Format::eA2R10G10B10UnormPack32, + vk::Format::eB8G8R8A8Unorm, + vk::Format::eB8G8R8A8Srgb, + vk::Format::eD24UnormS8Uint, }; for (const auto& format : misc_formats) { if (!format_properties.contains(format)) { @@ -583,8 +582,6 @@ bool Instance::IsFormatSupported(const vk::Format format, static vk::Format GetAlternativeFormat(const vk::Format format) { switch (format) { - case vk::Format::eB5G6R5UnormPack16: - return vk::Format::eR5G6B5UnormPack16; case vk::Format::eD16UnormS8Uint: return vk::Format::eD24UnormS8Uint; default: @@ -604,19 +601,4 @@ vk::Format Instance::GetSupportedFormat(const vk::Format format, return format; } -vk::ComponentMapping Instance::GetSupportedComponentSwizzle( - const vk::Format format, const vk::ComponentMapping swizzle, - const vk::FormatFeatureFlags2 flags) const { - if (IsFormatSupported(format, flags)) [[likely]] { - return swizzle; - } - - vk::ComponentMapping supported_swizzle = swizzle; - if (format == vk::Format::eB5G6R5UnormPack16) { - // B5G6R5 -> R5G6B5 - std::swap(supported_swizzle.r, supported_swizzle.b); - } - return supported_swizzle; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 2b4bd612f..54a9b9873 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -33,10 +33,6 @@ public: [[nodiscard]] vk::Format GetSupportedFormat(vk::Format format, vk::FormatFeatureFlags2 flags) const; - /// Re-orders a component swizzle for format compatibility, if needed. - [[nodiscard]] vk::ComponentMapping GetSupportedComponentSwizzle( - vk::Format format, vk::ComponentMapping swizzle, vk::FormatFeatureFlags2 flags) const; - /// Returns the Vulkan instance vk::Instance GetInstance() const { return *instance; diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index cc467e9a4..41c45019e 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -141,8 +141,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info .image = image.image, .viewType = info.type, .format = instance.GetSupportedFormat(format, image.format_features), - .components = - instance.GetSupportedComponentSwizzle(format, info.mapping, image.format_features), + .components = info.mapping, .subresourceRange{ .aspectMask = aspect, .baseMipLevel = info.range.base.level, From 3c0c921ef5006f1d30eac356e72edb6140d1da1e Mon Sep 17 00:00:00 2001 From: baggins183 Date: Sat, 14 Dec 2024 02:56:17 -0800 Subject: [PATCH 07/31] Tessellation (#1528) * shader_recompiler: Tessellation WIP * fix compiler errors after merge DONT MERGE set log file to /dev/null DONT MERGE linux pthread bb fix save work DONT MERGE dump ir save more work fix mistake with ES shader skip list add input patch control points dynamic state random stuff * WIP Tessellation partial implementation. Squash commits * test: make local/tcs use attr arrays * attr arrays in TCS/TES * dont define empty attr arrays * switch to special opcodes for tess tcs/tes reads and tcs writes * impl tcs/tes read attr insts * rebase fix * save some work * save work probably broken and slow * put Vertex LogicalStage after TCS and TES to fix bindings * more refactors * refactor pattern matching and optimize modulos (disabled) * enable modulo opt * copyright * rebase fixes * remove some prints * remove some stuff * Add TCS/TES support for shader patching and use LogicalStage * refactor and handle wider DS instructions * get rid of GetAttributes for special tess constants reads. Immediately replace some upon seeing readconstbuffer. Gets rid of some extra passes over IR * stop relying on GNMX HsConstants struct. Change runtime_info.hs_info and some regs * delete some more stuff * update comments for current implementation * some cleanup * uint error * more cleanup * remove patch control points dynamic state (because runtime_info already depends on it) * fix potential problem with determining passthrough --------- Co-authored-by: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> --- CMakeLists.txt | 3 + src/core/debug_state.cpp | 9 +- src/core/debug_state.h | 20 +- src/core/devtools/widget/shader_list.cpp | 17 +- src/core/libraries/gnmdriver/gnmdriver.cpp | 7 +- .../backend/spirv/emit_spirv.cpp | 74 +- .../backend/spirv/emit_spirv_barriers.cpp | 13 +- .../spirv/emit_spirv_context_get_set.cpp | 146 +++- .../backend/spirv/emit_spirv_instructions.h | 9 +- .../backend/spirv/spirv_emit_context.cpp | 149 +++- .../backend/spirv/spirv_emit_context.h | 26 +- src/shader_recompiler/frontend/tessellation.h | 38 + .../frontend/translate/data_share.cpp | 9 +- .../frontend/translate/scalar_alu.cpp | 10 +- .../frontend/translate/translate.cpp | 41 +- .../frontend/translate/translate.h | 5 +- .../frontend/translate/vector_alu.cpp | 10 +- .../frontend/translate/vector_memory.cpp | 12 +- src/shader_recompiler/info.h | 22 +- src/shader_recompiler/ir/attribute.cpp | 12 + src/shader_recompiler/ir/attribute.h | 14 +- src/shader_recompiler/ir/basic_block.cpp | 2 + src/shader_recompiler/ir/ir_emitter.cpp | 35 +- src/shader_recompiler/ir/ir_emitter.h | 15 +- src/shader_recompiler/ir/microinstruction.cpp | 2 + src/shader_recompiler/ir/opcodes.h | 2 +- src/shader_recompiler/ir/opcodes.inc | 4 + .../ir/passes/constant_propagation_pass.cpp | 26 +- .../ir/passes/constant_propogation.h | 4 + .../ir/passes/hull_shader_transform.cpp | 744 ++++++++++++++++++ src/shader_recompiler/ir/passes/ir_passes.h | 3 + .../ir/passes/ring_access_elimination.cpp | 45 +- .../ir/passes/shader_info_collection_pass.cpp | 16 + src/shader_recompiler/ir/patch.cpp | 28 + src/shader_recompiler/ir/patch.h | 173 ++++ src/shader_recompiler/ir/pattern_matching.h | 127 +++ src/shader_recompiler/ir/reg.h | 3 +- src/shader_recompiler/ir/type.h | 2 +- src/shader_recompiler/ir/value.cpp | 2 + src/shader_recompiler/ir/value.h | 9 + src/shader_recompiler/recompiler.cpp | 26 +- src/shader_recompiler/recompiler.h | 2 +- src/shader_recompiler/runtime_info.h | 78 +- src/shader_recompiler/specialization.h | 12 + src/video_core/amdgpu/liverpool.h | 49 +- src/video_core/amdgpu/types.h | 95 +++ .../renderer_vulkan/vk_compute_pipeline.cpp | 2 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 32 +- .../renderer_vulkan/vk_graphics_pipeline.h | 3 +- .../renderer_vulkan/vk_instance.cpp | 2 + .../renderer_vulkan/vk_pipeline_cache.cpp | 105 ++- .../renderer_vulkan/vk_pipeline_cache.h | 18 +- .../renderer_vulkan/vk_pipeline_common.h | 10 +- .../renderer_vulkan/vk_rasterizer.cpp | 13 +- 54 files changed, 2146 insertions(+), 189 deletions(-) create mode 100644 src/shader_recompiler/frontend/tessellation.h create mode 100644 src/shader_recompiler/ir/passes/constant_propogation.h create mode 100644 src/shader_recompiler/ir/passes/hull_shader_transform.cpp create mode 100644 src/shader_recompiler/ir/patch.cpp create mode 100644 src/shader_recompiler/ir/patch.h create mode 100644 src/shader_recompiler/ir/pattern_matching.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 1e54f7a00..78d8421a3 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -664,6 +664,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/passes/constant_propagation_pass.cpp src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp + src/shader_recompiler/ir/passes/hull_shader_transform.cpp src/shader_recompiler/ir/passes/identity_removal_pass.cpp src/shader_recompiler/ir/passes/ir_passes.h src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp @@ -683,6 +684,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/opcodes.cpp src/shader_recompiler/ir/opcodes.h src/shader_recompiler/ir/opcodes.inc + src/shader_recompiler/ir/patch.cpp + src/shader_recompiler/ir/patch.h src/shader_recompiler/ir/post_order.cpp src/shader_recompiler/ir/post_order.h src/shader_recompiler/ir/program.cpp diff --git a/src/core/debug_state.cpp b/src/core/debug_state.cpp index 649624924..c68fd469d 100644 --- a/src/core/debug_state.cpp +++ b/src/core/debug_state.cpp @@ -177,10 +177,11 @@ void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, } } -void DebugStateImpl::CollectShader(const std::string& name, vk::ShaderModule module, - std::span spv, std::span raw_code, - std::span patch_spv, bool is_patched) { - shader_dump_list.emplace_back(name, module, std::vector{spv.begin(), spv.end()}, +void DebugStateImpl::CollectShader(const std::string& name, Shader::LogicalStage l_stage, + vk::ShaderModule module, std::span spv, + std::span raw_code, std::span patch_spv, + bool is_patched) { + shader_dump_list.emplace_back(name, l_stage, module, std::vector{spv.begin(), spv.end()}, std::vector{raw_code.begin(), raw_code.end()}, std::vector{patch_spv.begin(), patch_spv.end()}, is_patched); } diff --git a/src/core/debug_state.h b/src/core/debug_state.h index fa2e5cd9d..0db5bc468 100644 --- a/src/core/debug_state.h +++ b/src/core/debug_state.h @@ -76,6 +76,7 @@ struct FrameDump { struct ShaderDump { std::string name; + Shader::LogicalStage l_stage; vk::ShaderModule module; std::vector spv; @@ -90,16 +91,17 @@ struct ShaderDump { std::string cache_isa_disasm{}; std::string cache_patch_disasm{}; - ShaderDump(std::string name, vk::ShaderModule module, std::vector spv, - std::vector isa, std::vector patch_spv, bool is_patched) - : name(std::move(name)), module(module), spv(std::move(spv)), isa(std::move(isa)), - patch_spv(std::move(patch_spv)), is_patched(is_patched) {} + ShaderDump(std::string name, Shader::LogicalStage l_stage, vk::ShaderModule module, + std::vector spv, std::vector isa, std::vector patch_spv, + bool is_patched) + : name(std::move(name)), l_stage(l_stage), module(module), spv(std::move(spv)), + isa(std::move(isa)), patch_spv(std::move(patch_spv)), is_patched(is_patched) {} ShaderDump(const ShaderDump& other) = delete; ShaderDump(ShaderDump&& other) noexcept - : name{std::move(other.name)}, module{std::move(other.module)}, spv{std::move(other.spv)}, - isa{std::move(other.isa)}, patch_spv{std::move(other.patch_spv)}, - patch_source{std::move(other.patch_source)}, + : name{std::move(other.name)}, l_stage(other.l_stage), module{std::move(other.module)}, + spv{std::move(other.spv)}, isa{std::move(other.isa)}, + patch_spv{std::move(other.patch_spv)}, patch_source{std::move(other.patch_source)}, cache_spv_disasm{std::move(other.cache_spv_disasm)}, cache_isa_disasm{std::move(other.cache_isa_disasm)}, cache_patch_disasm{std::move(other.cache_patch_disasm)} {} @@ -108,6 +110,7 @@ struct ShaderDump { if (this == &other) return *this; name = std::move(other.name); + l_stage = other.l_stage; module = std::move(other.module); spv = std::move(other.spv); isa = std::move(other.isa); @@ -203,7 +206,8 @@ public: void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, const AmdGpu::Liverpool::Regs& regs, bool is_compute = false); - void CollectShader(const std::string& name, vk::ShaderModule module, std::span spv, + void CollectShader(const std::string& name, Shader::LogicalStage l_stage, + vk::ShaderModule module, std::span spv, std::span raw_code, std::span patch_spv, bool is_patched); }; diff --git a/src/core/devtools/widget/shader_list.cpp b/src/core/devtools/widget/shader_list.cpp index 80c939718..2c97db7fd 100644 --- a/src/core/devtools/widget/shader_list.cpp +++ b/src/core/devtools/widget/shader_list.cpp @@ -158,16 +158,17 @@ bool ShaderList::Selection::DrawShader(DebugStateType::ShaderDump& value) { DebugState.ShowDebugMessage(msg); } if (compile) { - static std::map stage_arg = { - {"vs", "vert"}, - {"gs", "geom"}, - {"fs", "frag"}, - {"cs", "comp"}, + static std::map stage_arg = { + {Shader::LogicalStage::Vertex, "vert"}, + {Shader::LogicalStage::TessellationControl, "tesc"}, + {Shader::LogicalStage::TessellationEval, "tese"}, + {Shader::LogicalStage::Geometry, "geom"}, + {Shader::LogicalStage::Fragment, "frag"}, + {Shader::LogicalStage::Compute, "comp"}, }; - auto stage = stage_arg.find(value.name.substr(0, 2)); + auto stage = stage_arg.find(value.l_stage); if (stage == stage_arg.end()) { - DebugState.ShowDebugMessage(std::string{"Invalid shader stage: "} + - value.name.substr(0, 2)); + DebugState.ShowDebugMessage(std::string{"Invalid shader stage"}); } else { std::string cmd = fmt::format("glslc --target-env=vulkan1.3 --target-spv=spv1.6 " diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index dbf085fb3..e85b8b890 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -1642,7 +1642,6 @@ s32 PS4_SYSV_ABI sceGnmSetGsShader(u32* cmdbuf, u32 size, const u32* gs_regs) { s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u32 param4) { LOG_TRACE(Lib_GnmDriver, "called"); - if (!cmdbuf || size < 0x1E) { return -1; } @@ -1660,11 +1659,13 @@ s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u3 cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x108u, hs_regs[0], 0u); // SPI_SHADER_PGM_LO_HS cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x10au, hs_regs[2], hs_regs[3]); // SPI_SHADER_PGM_RSRC1_HS/SPI_SHADER_PGM_RSRC2_HS - cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u, hs_regs[5], - hs_regs[5]); // VGT_HOS_MAX_TESS_LEVEL + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u, + hs_regs[5], // VGT_HOS_MAX_TESS_LEVEL + hs_regs[6]); // VGT_HOS_MIN_TESS_LEVEL cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2dbu, hs_regs[4]); // VGT_TF_PARAM cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2d6u, param4); // VGT_LS_HS_CONFIG + // right padding? WriteTrailingNop<11>(cmdbuf); return ORBIS_OK; } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index ab9d6afae..e545e8e36 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -1,6 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later - #include #include #include @@ -13,6 +12,7 @@ #include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/program.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/types.h" namespace Shader::Backend::SPIRV { @@ -72,7 +72,10 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { return arg.VectorReg(); } else if constexpr (std::is_same_v) { return arg.StringLiteral(); + } else if constexpr (std::is_same_v) { + return arg.Patch(); } + UNREACHABLE(); } template @@ -206,6 +209,32 @@ Id DefineMain(EmitContext& ctx, const IR::Program& program) { return main; } +spv::ExecutionMode ExecutionMode(AmdGpu::TessellationType primitive) { + switch (primitive) { + case AmdGpu::TessellationType::Isoline: + return spv::ExecutionMode::Isolines; + case AmdGpu::TessellationType::Triangle: + return spv::ExecutionMode::Triangles; + case AmdGpu::TessellationType::Quad: + return spv::ExecutionMode::Quads; + } + UNREACHABLE_MSG("Tessellation primitive {}", primitive); +} + +spv::ExecutionMode ExecutionMode(AmdGpu::TessellationPartitioning spacing) { + switch (spacing) { + case AmdGpu::TessellationPartitioning::Integer: + return spv::ExecutionMode::SpacingEqual; + case AmdGpu::TessellationPartitioning::FracOdd: + return spv::ExecutionMode::SpacingFractionalOdd; + case AmdGpu::TessellationPartitioning::FracEven: + return spv::ExecutionMode::SpacingFractionalEven; + default: + break; + } + UNREACHABLE_MSG("Tessellation spacing {}", spacing); +} + void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ctx) { ctx.AddCapability(spv::Capability::Image1D); ctx.AddCapability(spv::Capability::Sampled1D); @@ -248,36 +277,55 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct if (info.uses_group_ballot) { ctx.AddCapability(spv::Capability::GroupNonUniformBallot); } - if (info.stage == Stage::Export || info.stage == Stage::Vertex) { + const auto stage = info.l_stage; + if (stage == LogicalStage::Vertex) { ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddCapability(spv::Capability::DrawParameters); } - if (info.stage == Stage::Geometry) { + if (stage == LogicalStage::Geometry) { ctx.AddCapability(spv::Capability::Geometry); } if (info.stage == Stage::Fragment && profile.needs_manual_interpolation) { ctx.AddExtension("SPV_KHR_fragment_shader_barycentric"); ctx.AddCapability(spv::Capability::FragmentBarycentricKHR); } + if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) { + ctx.AddCapability(spv::Capability::Tessellation); + } } -void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { - const auto& info = program.info; +void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) { const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); spv::ExecutionModel execution_model{}; - switch (program.info.stage) { - case Stage::Compute: { + switch (info.l_stage) { + case LogicalStage::Compute: { const std::array workgroup_size{ctx.runtime_info.cs_info.workgroup_size}; execution_model = spv::ExecutionModel::GLCompute; ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], workgroup_size[2]); break; } - case Stage::Export: - case Stage::Vertex: + case LogicalStage::Vertex: execution_model = spv::ExecutionModel::Vertex; break; - case Stage::Fragment: + case LogicalStage::TessellationControl: + execution_model = spv::ExecutionModel::TessellationControl; + ctx.AddCapability(spv::Capability::Tessellation); + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, + ctx.runtime_info.hs_info.NumOutputControlPoints()); + break; + case LogicalStage::TessellationEval: { + execution_model = spv::ExecutionModel::TessellationEvaluation; + const auto& vs_info = ctx.runtime_info.vs_info; + ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_type)); + ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_partitioning)); + ctx.AddExecutionMode(main, + vs_info.tess_topology == AmdGpu::TessellationTopology::TriangleCcw + ? spv::ExecutionMode::VertexOrderCcw + : spv::ExecutionMode::VertexOrderCw); + break; + } + case LogicalStage::Fragment: execution_model = spv::ExecutionModel::Fragment; if (ctx.profile.lower_left_origin_mode) { ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft); @@ -292,7 +340,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); } break; - case Stage::Geometry: + case LogicalStage::Geometry: execution_model = spv::ExecutionModel::Geometry; ctx.AddExecutionMode(main, GetInputPrimitiveType(ctx.runtime_info.gs_info.in_primitive)); ctx.AddExecutionMode(main, @@ -303,7 +351,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { ctx.runtime_info.gs_info.num_invocations); break; default: - throw NotImplementedException("Stage {}", u32(program.info.stage)); + UNREACHABLE_MSG("Stage {}", u32(info.stage)); } ctx.AddEntryPoint(execution_model, main, "main", interfaces); } @@ -349,7 +397,7 @@ std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in const IR::Program& program, Bindings& binding) { EmitContext ctx{profile, runtime_info, program.info, binding}; const Id main{DefineMain(ctx, program)}; - DefineEntryPoint(program, ctx, main); + DefineEntryPoint(program.info, ctx, main); SetupCapabilities(program.info, profile, ctx); SetupFloatMode(ctx, profile, runtime_info, main); PatchPhiNodes(program, ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp index 22b3523aa..611225e8b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp @@ -18,9 +18,16 @@ void MemoryBarrier(EmitContext& ctx, spv::Scope scope) { void EmitBarrier(EmitContext& ctx) { const auto execution{spv::Scope::Workgroup}; - const auto memory{spv::Scope::Workgroup}; - const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease | - spv::MemorySemanticsMask::WorkgroupMemory}; + spv::Scope memory; + spv::MemorySemanticsMask memory_semantics; + if (ctx.l_stage == Shader::LogicalStage::TessellationControl) { + memory = spv::Scope::Invocation; + memory_semantics = spv::MemorySemanticsMask::MaskNone; + } else { + memory = spv::Scope::Workgroup; + memory_semantics = + spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::WorkgroupMemory; + } ctx.OpControlBarrier(ctx.ConstU32(static_cast(execution)), ctx.ConstU32(static_cast(memory)), ctx.ConstU32(static_cast(memory_semantics))); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index d005169c4..f3db6af56 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -4,6 +4,9 @@ #include "common/assert.h" #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" +#include "shader_recompiler/ir/attribute.h" +#include "shader_recompiler/ir/patch.h" +#include "shader_recompiler/runtime_info.h" #include @@ -45,13 +48,19 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) { Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { if (IR::IsParam(attr)) { - const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; - const auto& info{ctx.output_params.at(index)}; - ASSERT(info.num_components > 0); - if (info.num_components == 1) { - return info.id; + const u32 attr_index{u32(attr) - u32(IR::Attribute::Param0)}; + if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) { + const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]); + return ctx.OpAccessChain(component_ptr, ctx.output_attr_array, ctx.ConstU32(attr_index), + ctx.ConstU32(element)); } else { - return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element)); + const auto& info{ctx.output_params.at(attr_index)}; + ASSERT(info.num_components > 0); + if (info.num_components == 1) { + return info.id; + } else { + return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element)); + } } } if (IR::IsMrt(attr)) { @@ -82,9 +91,13 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { std::pair OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr) { if (IR::IsParam(attr)) { - const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; - const auto& info{ctx.output_params.at(index)}; - return {info.component_type, info.is_integer}; + if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) { + return {ctx.F32[1], false}; + } else { + const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; + const auto& info{ctx.output_params.at(index)}; + return {info.component_type, info.is_integer}; + } } if (IR::IsMrt(attr)) { const u32 index{u32(attr) - u32(IR::Attribute::RenderTarget0)}; @@ -171,12 +184,11 @@ Id EmitReadStepRate(EmitContext& ctx, int rate_idx) { rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value)); } -Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) { +Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) { if (IR::IsPosition(attr)) { ASSERT(attr == IR::Attribute::Position0); const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); - const auto pointer{ - ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index), ctx.ConstU32(0u))}; + const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, index, ctx.ConstU32(0u))}; const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); @@ -186,7 +198,7 @@ Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)}; const auto param = ctx.input_params.at(param_id).id; const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); - const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))}; + const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)}; const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); @@ -194,9 +206,27 @@ Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u UNREACHABLE(); } -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) { - if (ctx.info.stage == Stage::Geometry) { +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) { + if (ctx.info.l_stage == LogicalStage::Geometry) { return EmitGetAttributeForGeometry(ctx, attr, comp, index); + } else if (ctx.info.l_stage == LogicalStage::TessellationControl || + ctx.info.l_stage == LogicalStage::TessellationEval) { + if (IR::IsTessCoord(attr)) { + const u32 component = attr == IR::Attribute::TessellationEvaluationPointU ? 0 : 1; + const auto component_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); + const auto pointer{ + ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))}; + return ctx.OpLoad(ctx.F32[1], pointer); + } else if (IR::IsParam(attr)) { + const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)}; + const auto param = ctx.input_params.at(param_id).id; + const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); + const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)}; + const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); + } + UNREACHABLE(); } if (IR::IsParam(attr)) { @@ -242,8 +272,14 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) { } return coord; } + case IR::Attribute::TessellationEvaluationPointU: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value)); + case IR::Attribute::TessellationEvaluationPointV: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.ConstU32(1U))); default: - throw NotImplementedException("Read attribute {}", attr); + UNREACHABLE_MSG("Read attribute {}", attr); } } @@ -266,10 +302,32 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) { return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value, ctx.u32_zero_value); case IR::Attribute::PrimitiveId: - ASSERT(ctx.info.stage == Stage::Geometry); return ctx.OpLoad(ctx.U32[1], ctx.primitive_id); + case IR::Attribute::InvocationId: + ASSERT(ctx.info.l_stage == LogicalStage::Geometry || + ctx.info.l_stage == LogicalStage::TessellationControl); + return ctx.OpLoad(ctx.U32[1], ctx.invocation_id); + case IR::Attribute::PatchVertices: + ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl); + return ctx.OpLoad(ctx.U32[1], ctx.patch_vertices); + case IR::Attribute::PackedHullInvocationInfo: { + ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl); + // [0:8]: patch id within VGT + // [8:12]: output control point id + // But 0:8 should be treated as 0 for attribute addressing purposes + if (ctx.runtime_info.hs_info.IsPassthrough()) { + // Gcn shader would run with 1 thread, but we need to run a thread for + // each output control point. + // If Gcn shader uses this value, we should make sure all threads in the + // Vulkan shader use 0 + return ctx.ConstU32(0u); + } else { + const Id invocation_id = ctx.OpLoad(ctx.U32[1], ctx.invocation_id); + return ctx.OpShiftLeftLogical(ctx.U32[1], invocation_id, ctx.ConstU32(8u)); + } + } default: - throw NotImplementedException("Read U32 attribute {}", attr); + UNREACHABLE_MSG("Read U32 attribute {}", attr); } } @@ -287,6 +345,58 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen } } +Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index) { + const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); + return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.input_attr_array, + vertex_index, attr_index, comp_index)); +} + +void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index) { + // Implied vertex index is invocation_id + const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]); + Id pointer = + ctx.OpAccessChain(component_ptr, ctx.output_attr_array, + ctx.OpLoad(ctx.U32[1], ctx.invocation_id), attr_index, comp_index); + ctx.OpStore(pointer, value); +} + +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) { + const u32 index{IR::GenericPatchIndex(patch)}; + const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))}; + const Id type{ctx.l_stage == LogicalStage::TessellationControl ? ctx.output_f32 + : ctx.input_f32}; + const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)}; + return ctx.OpLoad(ctx.F32[1], pointer); +} + +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { + const Id pointer{[&] { + if (IR::IsGeneric(patch)) { + const u32 index{IR::GenericPatchIndex(patch)}; + const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))}; + return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element); + } + switch (patch) { + case IR::Patch::TessellationLodLeft: + case IR::Patch::TessellationLodRight: + case IR::Patch::TessellationLodTop: + case IR::Patch::TessellationLodBottom: { + const u32 index{static_cast(patch) - u32(IR::Patch::TessellationLodLeft)}; + const Id index_id{ctx.ConstU32(index)}; + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id); + } + case IR::Patch::TessellationLodInteriorU: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, + ctx.u32_zero_value); + case IR::Patch::TessellationLodInteriorV: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.ConstU32(1u)); + default: + UNREACHABLE_MSG("Patch {}", u32(patch)); + } + }()}; + ctx.OpStore(pointer, value); +} + template static Id EmitLoadBufferU32xN(EmitContext& ctx, u32 handle, Id address) { auto& buffer = ctx.buffers[handle]; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 057b0d692..f71c61af6 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -9,6 +9,7 @@ namespace Shader::IR { enum class Attribute : u64; enum class ScalarReg : u32; +enum class Patch : u64; class Inst; class Value; } // namespace Shader::IR @@ -27,8 +28,6 @@ Id EmitConditionRef(EmitContext& ctx, const IR::Value& value); void EmitReference(EmitContext&); void EmitPhiMove(EmitContext&); void EmitJoin(EmitContext& ctx); -void EmitWorkgroupMemoryBarrier(EmitContext& ctx); -void EmitDeviceMemoryBarrier(EmitContext& ctx); void EmitGetScc(EmitContext& ctx); void EmitGetExec(EmitContext& ctx); void EmitGetVcc(EmitContext& ctx); @@ -85,9 +84,13 @@ Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index); Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); +Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index); +void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index); +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); void EmitSetSampleMask(EmitContext& ctx, Id value); void EmitSetFragDepth(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 1ada2f1f9..2e09e70a7 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -6,6 +6,7 @@ #include "shader_recompiler/backend/spirv/spirv_emit_context.h" #include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/ir/passes/srt.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/types.h" #include @@ -34,7 +35,7 @@ std::string_view StageName(Stage stage) { case Stage::Compute: return "cs"; } - throw InvalidArgument("Invalid stage {}", u32(stage)); + UNREACHABLE_MSG("Invalid hw stage {}", u32(stage)); } static constexpr u32 NumVertices(AmdGpu::PrimitiveType type) { @@ -65,7 +66,7 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, const Info& info_, Bindings& binding_) : Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_}, - profile{profile_}, stage{info.stage}, binding{binding_} { + profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} { AddCapability(spv::Capability::Shader); DefineArithmeticTypes(); DefineInterfaces(); @@ -268,9 +269,8 @@ void EmitContext::DefineInputs() { U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input); Decorate(subgroup_local_invocation_id, spv::Decoration::Flat); } - switch (stage) { - case Stage::Export: - case Stage::Vertex: { + switch (l_stage) { + case LogicalStage::Vertex: { vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input); base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input); instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input); @@ -311,12 +311,11 @@ void EmitContext::DefineInputs() { } input_params[attrib.semantic] = GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false); - interfaces.push_back(id); } } break; } - case Stage::Fragment: + case LogicalStage::Fragment: frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input); frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output); front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input); @@ -351,15 +350,14 @@ void EmitContext::DefineInputs() { } input_params[semantic] = GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false); - interfaces.push_back(attr_id); } break; - case Stage::Compute: + case LogicalStage::Compute: workgroup_id = DefineVariable(U32[3], spv::BuiltIn::WorkgroupId, spv::StorageClass::Input); local_invocation_id = DefineVariable(U32[3], spv::BuiltIn::LocalInvocationId, spv::StorageClass::Input); break; - case Stage::Geometry: { + case LogicalStage::Geometry: { primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input); const auto gl_per_vertex = Name(TypeStruct(TypeVector(F32[1], 4), F32[1], TypeArray(F32[1], ConstU32(1u))), @@ -389,15 +387,129 @@ void EmitContext::DefineInputs() { } break; } + case LogicalStage::TessellationControl: { + invocation_id = + DefineVariable(U32[1], spv::BuiltIn::InvocationId, spv::StorageClass::Input); + patch_vertices = + DefineVariable(U32[1], spv::BuiltIn::PatchVertices, spv::StorageClass::Input); + primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input); + + const u32 num_attrs = runtime_info.hs_info.ls_stride >> 4; + if (num_attrs > 0) { + const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))}; + // The input vertex count isn't statically known, so make length 32 (what glslang does) + const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))}; + input_attr_array = DefineInput(patch_array_type, 0); + Name(input_attr_array, "in_attrs"); + } + break; + } + case LogicalStage::TessellationEval: { + tess_coord = DefineInput(F32[3], std::nullopt, spv::BuiltIn::TessCoord); + primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input); + + const u32 num_attrs = runtime_info.vs_info.hs_output_cp_stride >> 4; + if (num_attrs > 0) { + const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))}; + // The input vertex count isn't statically known, so make length 32 (what glslang does) + const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))}; + input_attr_array = DefineInput(patch_array_type, 0); + Name(input_attr_array, "in_attrs"); + } + + u32 patch_base_location = runtime_info.vs_info.hs_output_cp_stride >> 4; + for (size_t index = 0; index < 30; ++index) { + if (!(info.uses_patches & (1U << index))) { + continue; + } + const Id id{DefineInput(F32[4], patch_base_location + index)}; + Decorate(id, spv::Decoration::Patch); + Name(id, fmt::format("patch_in{}", index)); + patches[index] = id; + } + break; + } default: break; } } void EmitContext::DefineOutputs() { - switch (stage) { - case Stage::Export: - case Stage::Vertex: { + switch (l_stage) { + case LogicalStage::Vertex: { + // No point in defining builtin outputs (i.e. position) unless next stage is fragment? + // Might cause problems linking with tcs + + output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); + const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) || + info.stores.Get(IR::Attribute::Position2) || + info.stores.Get(IR::Attribute::Position3); + if (has_extra_pos_stores) { + const Id type{TypeArray(F32[1], ConstU32(8U))}; + clip_distances = + DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output); + cull_distances = + DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output); + } + if (stage == Shader::Stage::Local && runtime_info.ls_info.links_with_tcs) { + const u32 num_attrs = runtime_info.ls_info.ls_stride >> 4; + if (num_attrs > 0) { + const Id type{TypeArray(F32[4], ConstU32(num_attrs))}; + output_attr_array = DefineOutput(type, 0); + Name(output_attr_array, "out_attrs"); + } + } else { + for (u32 i = 0; i < IR::NumParams; i++) { + const IR::Attribute param{IR::Attribute::Param0 + i}; + if (!info.stores.GetAny(param)) { + continue; + } + const u32 num_components = info.stores.NumComponents(param); + const Id id{DefineOutput(F32[num_components], i)}; + Name(id, fmt::format("out_attr{}", i)); + output_params[i] = + GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true); + } + } + break; + } + case LogicalStage::TessellationControl: { + if (info.stores_tess_level_outer) { + const Id type{TypeArray(F32[1], ConstU32(4U))}; + output_tess_level_outer = + DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelOuter); + Decorate(output_tess_level_outer, spv::Decoration::Patch); + } + if (info.stores_tess_level_inner) { + const Id type{TypeArray(F32[1], ConstU32(2U))}; + output_tess_level_inner = + DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelInner); + Decorate(output_tess_level_inner, spv::Decoration::Patch); + } + + const u32 num_attrs = runtime_info.hs_info.hs_output_cp_stride >> 4; + if (num_attrs > 0) { + const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))}; + // The input vertex count isn't statically known, so make length 32 (what glslang does) + const Id patch_array_type{TypeArray( + per_vertex_type, ConstU32(runtime_info.hs_info.NumOutputControlPoints()))}; + output_attr_array = DefineOutput(patch_array_type, 0); + Name(output_attr_array, "out_attrs"); + } + + u32 patch_base_location = runtime_info.hs_info.hs_output_cp_stride >> 4; + for (size_t index = 0; index < 30; ++index) { + if (!(info.uses_patches & (1U << index))) { + continue; + } + const Id id{DefineOutput(F32[4], patch_base_location + index)}; + Decorate(id, spv::Decoration::Patch); + Name(id, fmt::format("patch_out{}", index)); + patches[index] = id; + } + break; + } + case LogicalStage::TessellationEval: { output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) || info.stores.Get(IR::Attribute::Position2) || @@ -419,11 +531,10 @@ void EmitContext::DefineOutputs() { Name(id, fmt::format("out_attr{}", i)); output_params[i] = GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true); - interfaces.push_back(id); } break; } - case Stage::Fragment: + case LogicalStage::Fragment: for (u32 i = 0; i < IR::NumRenderTargets; i++) { const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i}; if (!info.stores.GetAny(mrt)) { @@ -435,22 +546,22 @@ void EmitContext::DefineOutputs() { const Id id{DefineOutput(type, i)}; Name(id, fmt::format("frag_color{}", i)); frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true); - interfaces.push_back(id); } break; - case Stage::Geometry: { + case LogicalStage::Geometry: { output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) { const Id id{DefineOutput(F32[4], attr_id)}; Name(id, fmt::format("out_attr{}", attr_id)); output_params[attr_id] = {id, output_f32, F32[1], 4u}; - interfaces.push_back(id); } break; } - default: + case LogicalStage::Compute: break; + default: + UNREACHABLE(); } } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index cd1293328..583d96b99 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -46,14 +46,18 @@ public: void DefineBufferOffsets(); void DefineInterpolatedAttribs(); - [[nodiscard]] Id DefineInput(Id type, u32 location) { - const Id input_id{DefineVar(type, spv::StorageClass::Input)}; - Decorate(input_id, spv::Decoration::Location, location); + [[nodiscard]] Id DefineInput(Id type, std::optional location = std::nullopt, + std::optional builtin = std::nullopt) { + const Id input_id{DefineVariable(type, builtin, spv::StorageClass::Input)}; + if (location) { + Decorate(input_id, spv::Decoration::Location, *location); + } return input_id; } - [[nodiscard]] Id DefineOutput(Id type, std::optional location = std::nullopt) { - const Id output_id{DefineVar(type, spv::StorageClass::Output)}; + [[nodiscard]] Id DefineOutput(Id type, std::optional location = std::nullopt, + std::optional builtin = std::nullopt) { + const Id output_id{DefineVariable(type, builtin, spv::StorageClass::Output)}; if (location) { Decorate(output_id, spv::Decoration::Location, *location); } @@ -131,7 +135,8 @@ public: const Info& info; const RuntimeInfo& runtime_info; const Profile& profile; - Stage stage{}; + Stage stage; + LogicalStage l_stage{}; Id void_id{}; Id U8{}; @@ -188,8 +193,15 @@ public: Id clip_distances{}; Id cull_distances{}; + Id patch_vertices{}; + Id output_tess_level_outer{}; + Id output_tess_level_inner{}; + Id tess_coord; + std::array patches{}; + Id workgroup_id{}; Id local_invocation_id{}; + Id invocation_id{}; // for instanced geoshaders or output vertices within TCS patch Id subgroup_local_invocation_id{}; Id image_u32{}; @@ -252,6 +264,8 @@ public: bool is_loaded{}; s32 buffer_handle{-1}; }; + Id input_attr_array; + Id output_attr_array; std::array input_params{}; std::array output_params{}; std::array frag_outputs{}; diff --git a/src/shader_recompiler/frontend/tessellation.h b/src/shader_recompiler/frontend/tessellation.h new file mode 100644 index 000000000..bfcaa4fdc --- /dev/null +++ b/src/shader_recompiler/frontend/tessellation.h @@ -0,0 +1,38 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/types.h" + +namespace Shader { + +struct TessellationDataConstantBuffer { + u32 ls_stride; + u32 hs_cp_stride; // HullStateConstants::m_cpStride != 0 ? HullStateConstants::m_cpStride : + // ls_stride + u32 num_patches; // num patches submitted in threadgroup + u32 hs_output_base; // HullStateConstants::m_numInputCP::m_cpStride != 0 ? + // HullStateConstants::m_numInputCP * ls_stride * num_patches : 0 + // basically 0 when passthrough + u32 patch_const_size; // 16 * num_patch_attrs + u32 patch_const_base; // hs_output_base + patch_output_size + u32 patch_output_size; // output_cp_stride * num_output_cp_per_patch + f32 off_chip_tessellation_factor_threshold; + u32 first_edge_tess_factor_index; +}; + +// Assign names to dword fields of TessellationDataConstantBuffer +enum class TessConstantAttribute : u32 { + LsStride, + HsCpStride, + HsNumPatch, + HsOutputBase, + PatchConstSize, + PatchConstBase, + PatchOutputSize, + OffChipTessellationFactorThreshold, + FirstEdgeTessFactorIndex, +}; + +} // namespace Shader \ No newline at end of file diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index 5914f9fe3..116935b94 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -1,8 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later - #include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/ir/reg.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Gcn { @@ -73,10 +73,11 @@ void Translator::EmitDataShare(const GcnInst& inst) { void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) { const IR::U32 value{GetSrc(inst.src[0])}; - if (info.stage != Stage::Compute) { - SetDst(inst.dst[0], value); - } else { + if (info.l_stage == LogicalStage::Compute || + info.l_stage == LogicalStage::TessellationControl) { SetDst(inst.dst[0], ir.ReadFirstLane(value)); + } else { + SetDst(inst.dst[0], value); } } diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 5b411d83e..1ef0d82d8 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include +#include "common/assert.h" #include "shader_recompiler/frontend/translate/translate.h" namespace Shader::Gcn { @@ -78,8 +80,10 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { return S_BFM_B32(inst); case Opcode::S_MUL_I32: return S_MUL_I32(inst); + case Opcode::S_BFE_I32: + return S_BFE(inst, true); case Opcode::S_BFE_U32: - return S_BFE_U32(inst); + return S_BFE(inst, false); case Opcode::S_ABSDIFF_I32: return S_ABSDIFF_I32(inst); @@ -434,12 +438,12 @@ void Translator::S_MUL_I32(const GcnInst& inst) { SetDst(inst.dst[0], ir.IMul(GetSrc(inst.src[0]), GetSrc(inst.src[1]))); } -void Translator::S_BFE_U32(const GcnInst& inst) { +void Translator::S_BFE(const GcnInst& inst, bool is_signed) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; const IR::U32 offset{ir.BitwiseAnd(src1, ir.Imm32(0x1F))}; const IR::U32 count{ir.BitFieldExtract(src1, ir.Imm32(16), ir.Imm32(7))}; - const IR::U32 result{ir.BitFieldExtract(src0, offset, count)}; + const IR::U32 result{ir.BitFieldExtract(src0, offset, count, is_signed)}; SetDst(inst.dst[0], result); ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); } diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 97978ff6b..3031e6643 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -8,6 +8,8 @@ #include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/info.h" +#include "shader_recompiler/ir/attribute.h" +#include "shader_recompiler/ir/reg.h" #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/types.h" @@ -34,9 +36,8 @@ void Translator::EmitPrologue() { } IR::VectorReg dst_vreg = IR::VectorReg::V0; - switch (info.stage) { - case Stage::Vertex: - case Stage::Export: + switch (info.l_stage) { + case LogicalStage::Vertex: // v0: vertex ID, always present ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId)); // v1: instance ID, step rate 0 @@ -52,7 +53,7 @@ void Translator::EmitPrologue() { ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId)); } break; - case Stage::Fragment: + case LogicalStage::Fragment: dst_vreg = IR::VectorReg::V0; if (runtime_info.fs_info.addr_flags.persp_sample_ena) { ++dst_vreg; // I @@ -122,7 +123,30 @@ void Translator::EmitPrologue() { } } break; - case Stage::Compute: + case LogicalStage::TessellationControl: { + // Should be laid out like: + // [0:8]: patch id within VGT + // [8:12]: output control point id + ir.SetVectorReg(IR::VectorReg::V1, + ir.GetAttributeU32(IR::Attribute::PackedHullInvocationInfo)); + // TODO PrimitiveId is probably V2 but haven't seen it yet + break; + } + case LogicalStage::TessellationEval: + ir.SetVectorReg(IR::VectorReg::V0, + ir.GetAttribute(IR::Attribute::TessellationEvaluationPointU)); + ir.SetVectorReg(IR::VectorReg::V1, + ir.GetAttribute(IR::Attribute::TessellationEvaluationPointV)); + // V2 is similar to PrimitiveID but not the same. It seems to only be used in + // compiler-generated address calculations. Its probably the patch id within the + // patches running locally on a given VGT (or CU, whichever is the granularity of LDS + // memory) + // Set to 0. See explanation in comment describing hull/domain passes + ir.SetVectorReg(IR::VectorReg::V2, ir.Imm32(0u)); + // V3 is the actual PrimitiveID as intended by the shader author. + ir.SetVectorReg(IR::VectorReg::V3, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); + break; + case LogicalStage::Compute: ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 0)); ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1)); ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2)); @@ -137,7 +161,7 @@ void Translator::EmitPrologue() { ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2)); } break; - case Stage::Geometry: + case LogicalStage::Geometry: switch (runtime_info.gs_info.out_primitive[0]) { case AmdGpu::GsOutputPrimitiveType::TriangleStrip: ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2 @@ -152,7 +176,7 @@ void Translator::EmitPrologue() { ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); break; default: - throw NotImplementedException("Unknown shader stage"); + UNREACHABLE_MSG("Unknown shader stage"); } } @@ -503,7 +527,8 @@ void Translate(IR::Block* block, u32 pc, std::span inst_list, Inf // Special case for emitting fetch shader. if (inst.opcode == Opcode::S_SWAPPC_B64) { - ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export); + ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export || + info.stage == Stage::Local); translator.EmitFetch(inst); continue; } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 198cea276..60bad1864 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -94,7 +94,8 @@ public: void S_ASHR_I32(const GcnInst& inst); void S_BFM_B32(const GcnInst& inst); void S_MUL_I32(const GcnInst& inst); - void S_BFE_U32(const GcnInst& inst); + void S_BFE(const GcnInst& inst, bool is_signed); + void S_BFE_I32(const GcnInst& inst); void S_ABSDIFF_I32(const GcnInst& inst); void S_NOT_B32(const GcnInst& inst); @@ -217,7 +218,7 @@ public: // VOP3a void V_MAD_F32(const GcnInst& inst); - void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = false); + void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = true); void V_MAD_U32_U24(const GcnInst& inst); void V_CUBEID_F32(const GcnInst& inst); void V_CUBESC_F32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 3e9e677a7..2b32ca2ce 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -1060,8 +1060,14 @@ void Translator::V_CUBEMA_F32(const GcnInst& inst) { void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{ir.BitwiseAnd(GetSrc(inst.src[1]), ir.Imm32(0x1F))}; - const IR::U32 src2{ir.BitwiseAnd(GetSrc(inst.src[2]), ir.Imm32(0x1F))}; + IR::U32 src1{GetSrc(inst.src[1])}; + IR::U32 src2{GetSrc(inst.src[2])}; + if (!src1.IsImmediate()) { + src1 = ir.BitwiseAnd(src1, ir.Imm32(0x1F)); + } + if (!src2.IsImmediate()) { + src2 = ir.BitwiseAnd(src2, ir.Imm32(0x1F)); + } SetDst(inst.dst[0], ir.BitFieldExtract(src0, src1, src2, is_signed)); } diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index eadd1c4db..072b1f88e 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -189,7 +189,8 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst) buffer_info.index_enable.Assign(mtbuf.idxen); buffer_info.offset_enable.Assign(mtbuf.offen); buffer_info.inst_offset.Assign(mtbuf.offset); - buffer_info.ring_access.Assign(is_ring); + buffer_info.globally_coherent.Assign(mtbuf.glc); + buffer_info.system_coherent.Assign(mtbuf.slc); if (is_typed) { const auto dmft = static_cast(mtbuf.dfmt); const auto nfmt = static_cast(mtbuf.nfmt); @@ -247,11 +248,15 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst const IR::ScalarReg sharp{inst.src[2].code * 4}; const IR::Value soffset{GetSrc(inst.src[3])}; - if (info.stage != Stage::Export && info.stage != Stage::Geometry) { + if (info.stage != Stage::Export && info.stage != Stage::Hull && info.stage != Stage::Geometry) { ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); } + if (info.stage == Stage::Hull) { + // printf("here\n"); // break + } + IR::Value address = [&] -> IR::Value { if (is_ring) { return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset); @@ -269,7 +274,8 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst buffer_info.index_enable.Assign(mtbuf.idxen); buffer_info.offset_enable.Assign(mtbuf.offen); buffer_info.inst_offset.Assign(mtbuf.offset); - buffer_info.ring_access.Assign(is_ring); + buffer_info.globally_coherent.Assign(mtbuf.glc); + buffer_info.system_coherent.Assign(mtbuf.slc); if (is_typed) { const auto dmft = static_cast(mtbuf.dfmt); const auto nfmt = static_cast(mtbuf.nfmt); diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 494bbb4bb..dbea2af8a 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -11,6 +11,7 @@ #include "common/types.h" #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/frontend/copy_shader.h" +#include "shader_recompiler/frontend/tessellation.h" #include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/passes/srt.h" #include "shader_recompiler/ir/reg.h" @@ -163,6 +164,7 @@ struct Info { UserDataMask ud_mask{}; CopyShaderData gs_copy_data; + u32 uses_patches{}; BufferResourceList buffers; TextureBufferResourceList texture_buffers; @@ -173,8 +175,12 @@ struct Info { PersistentSrtInfo srt_info; std::vector flattened_ud_buf; + IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max; + s32 tess_consts_dword_offset = -1; + std::span user_data; Stage stage; + LogicalStage l_stage; u64 pgm_hash{}; VAddr pgm_base; @@ -190,14 +196,16 @@ struct Info { bool uses_shared{}; bool uses_fp16{}; bool uses_fp64{}; + bool stores_tess_level_outer{}; + bool stores_tess_level_inner{}; bool translation_failed{}; // indicates that shader has unsupported instructions bool has_readconst{}; u8 mrt_mask{0u}; bool has_fetch_shader{false}; u32 fetch_shader_sgpr_base{0u}; - explicit Info(Stage stage_, ShaderParams params) - : stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, + explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params) + : stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, user_data{params.user_data} {} template @@ -244,6 +252,16 @@ struct Info { srt_info.walker_func(user_data.data(), flattened_ud_buf.data()); } } + + void ReadTessConstantBuffer(TessellationDataConstantBuffer& tess_constants) const { + ASSERT(tess_consts_dword_offset >= 0); // We've already tracked the V# UD + auto buf = ReadUdReg(static_cast(tess_consts_ptr_base), + static_cast(tess_consts_dword_offset)); + VAddr tess_constants_addr = buf.base_address; + memcpy(&tess_constants, + reinterpret_cast(tess_constants_addr), + sizeof(tess_constants)); + } }; constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept { diff --git a/src/shader_recompiler/ir/attribute.cpp b/src/shader_recompiler/ir/attribute.cpp index e219dfb64..6a267e21b 100644 --- a/src/shader_recompiler/ir/attribute.cpp +++ b/src/shader_recompiler/ir/attribute.cpp @@ -104,6 +104,8 @@ std::string NameOf(Attribute attribute) { return "VertexId"; case Attribute::InstanceId: return "InstanceId"; + case Attribute::PrimitiveId: + return "PrimitiveId"; case Attribute::FragCoord: return "FragCoord"; case Attribute::IsFrontFace: @@ -114,6 +116,16 @@ std::string NameOf(Attribute attribute) { return "LocalInvocationId"; case Attribute::LocalInvocationIndex: return "LocalInvocationIndex"; + case Attribute::InvocationId: + return "InvocationId"; + case Attribute::PatchVertices: + return "PatchVertices"; + case Attribute::TessellationEvaluationPointU: + return "TessellationEvaluationPointU"; + case Attribute::TessellationEvaluationPointV: + return "TessellationEvaluationPointV"; + case Attribute::PackedHullInvocationInfo: + return "PackedHullInvocationInfo"; default: break; } diff --git a/src/shader_recompiler/ir/attribute.h b/src/shader_recompiler/ir/attribute.h index 0890e88f1..bcb2b44a9 100644 --- a/src/shader_recompiler/ir/attribute.h +++ b/src/shader_recompiler/ir/attribute.h @@ -72,8 +72,13 @@ enum class Attribute : u64 { LocalInvocationId = 75, LocalInvocationIndex = 76, FragCoord = 77, - InstanceId0 = 78, // step rate 0 - InstanceId1 = 79, // step rate 1 + InstanceId0 = 78, // step rate 0 + InstanceId1 = 79, // step rate 1 + InvocationId = 80, // TCS id in output patch and instanced geometry shader id + PatchVertices = 81, + TessellationEvaluationPointU = 82, + TessellationEvaluationPointV = 83, + PackedHullInvocationInfo = 84, // contains patch id within the VGT and invocation ID Max, }; @@ -85,6 +90,11 @@ constexpr bool IsPosition(Attribute attribute) noexcept { return attribute >= Attribute::Position0 && attribute <= Attribute::Position3; } +constexpr bool IsTessCoord(Attribute attribute) noexcept { + return attribute >= Attribute::TessellationEvaluationPointU && + attribute <= Attribute::TessellationEvaluationPointV; +} + constexpr bool IsParam(Attribute attribute) noexcept { return attribute >= Attribute::Param0 && attribute <= Attribute::Param31; } diff --git a/src/shader_recompiler/ir/basic_block.cpp b/src/shader_recompiler/ir/basic_block.cpp index b4d1a78c7..a312eabde 100644 --- a/src/shader_recompiler/ir/basic_block.cpp +++ b/src/shader_recompiler/ir/basic_block.cpp @@ -94,6 +94,8 @@ static std::string ArgToIndex(std::map& inst_to_index, size return fmt::format("{}", arg.VectorReg()); case Type::Attribute: return fmt::format("{}", arg.Attribute()); + case Type::Patch: + return fmt::format("{}", arg.Patch()); default: return ""; } diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 3ebc82e64..21df53391 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -266,8 +266,8 @@ void IREmitter::SetM0(const U32& value) { Inst(Opcode::SetM0, value); } -F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, u32 index) { - return Inst(Opcode::GetAttribute, attribute, Imm32(comp), Imm32(index)); +F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, IR::Value index) { + return Inst(Opcode::GetAttribute, attribute, Imm32(comp), index); } U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) { @@ -278,6 +278,24 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp Inst(Opcode::SetAttribute, attribute, value, Imm32(comp)); } +F32 IREmitter::GetTessGenericAttribute(const U32& vertex_index, const U32& attr_index, + const U32& comp_index) { + return Inst(IR::Opcode::GetTessGenericAttribute, vertex_index, attr_index, comp_index); +} + +void IREmitter::SetTcsGenericAttribute(const F32& value, const U32& attr_index, + const U32& comp_index) { + Inst(Opcode::SetTcsGenericAttribute, value, attr_index, comp_index); +} + +F32 IREmitter::GetPatch(Patch patch) { + return Inst(Opcode::GetPatch, patch); +} + +void IREmitter::SetPatch(Patch patch, const F32& value) { + Inst(Opcode::SetPatch, patch, value); +} + Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) { switch (bit_size) { case 32: @@ -552,6 +570,19 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu } } +Value IREmitter::CompositeConstruct(std::span elements) { + switch (elements.size()) { + case 2: + return CompositeConstruct(elements[0], elements[1]); + case 3: + return CompositeConstruct(elements[0], elements[1], elements[2]); + case 4: + return CompositeConstruct(elements[0], elements[1], elements[2], elements[3]); + default: + UNREACHABLE_MSG("Composite construct with greater than 4 elements"); + } +} + Value IREmitter::CompositeExtract(const Value& vector, size_t element) { const auto read{[&](Opcode opcode, size_t limit) -> Value { if (element >= limit) { diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 068aba14d..95713565b 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -10,6 +10,7 @@ #include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/condition.h" +#include "shader_recompiler/ir/patch.h" #include "shader_recompiler/ir/value.h" namespace Shader::IR { @@ -80,10 +81,18 @@ public: [[nodiscard]] U1 Condition(IR::Condition cond); - [[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, u32 index = 0); + [[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, + IR::Value index = IR::Value(u32(0u))); [[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0); void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0); + [[nodiscard]] F32 GetTessGenericAttribute(const U32& vertex_index, const U32& attr_index, + const U32& comp_index); + void SetTcsGenericAttribute(const F32& value, const U32& attr_index, const U32& comp_index); + + [[nodiscard]] F32 GetPatch(Patch patch); + void SetPatch(Patch patch, const F32& value); + [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset); void WriteShared(int bit_size, const Value& value, const U32& offset); @@ -138,6 +147,8 @@ public: [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, const Value& e4); + [[nodiscard]] Value CompositeConstruct(std::span values); + [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element); @@ -335,6 +346,7 @@ private: template T Inst(Opcode op, Args... args) { auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})}; + it->SetParent(block); return T{Value{&*it}}; } @@ -352,6 +364,7 @@ private: u32 raw_flags{}; std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy)); auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)}; + it->SetParent(block); return T{Value{&*it}}; } }; diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index 9b4ad63d2..6e7bbe661 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -52,6 +52,8 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::Discard: case Opcode::DiscardCond: case Opcode::SetAttribute: + case Opcode::SetTcsGenericAttribute: + case Opcode::SetPatch: case Opcode::StoreBufferU32: case Opcode::StoreBufferU32x2: case Opcode::StoreBufferU32x3: diff --git a/src/shader_recompiler/ir/opcodes.h b/src/shader_recompiler/ir/opcodes.h index be640297a..cd73ace7e 100644 --- a/src/shader_recompiler/ir/opcodes.h +++ b/src/shader_recompiler/ir/opcodes.h @@ -30,7 +30,7 @@ constexpr Type Opaque{Type::Opaque}; constexpr Type ScalarReg{Type::ScalarReg}; constexpr Type VectorReg{Type::VectorReg}; constexpr Type Attribute{Type::Attribute}; -constexpr Type SystemValue{Type::SystemValue}; +constexpr Type Patch{Type::Patch}; constexpr Type U1{Type::U1}; constexpr Type U8{Type::U8}; constexpr Type U16{Type::U16}; diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 477275824..470f9fbe5 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -60,6 +60,10 @@ OPCODE(SetGotoVariable, Void, U32, OPCODE(GetAttribute, F32, Attribute, U32, U32, ) OPCODE(GetAttributeU32, U32, Attribute, U32, ) OPCODE(SetAttribute, Void, Attribute, F32, U32, ) +OPCODE(GetPatch, F32, Patch, ) +OPCODE(SetPatch, Void, Patch, F32, ) +OPCODE(GetTessGenericAttribute, F32, U32, U32, U32, ) +OPCODE(SetTcsGenericAttribute, Void, F32, U32, U32, ) // Flags OPCODE(GetScc, U1, Void, ) diff --git a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp index 9624ce6a5..16b07e1a1 100644 --- a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp @@ -216,6 +216,18 @@ void FoldAdd(IR::Block& block, IR::Inst& inst) { } } +template +void FoldMul(IR::Block& block, IR::Inst& inst) { + if (!FoldCommutative(inst, [](T a, T b) { return a * b; })) { + return; + } + const IR::Value rhs{inst.Arg(1)}; + if (rhs.IsImmediate() && Arg(rhs) == 0) { + inst.ReplaceUsesWithAndRemove(IR::Value(0u)); + return; + } +} + void FoldCmpClass(IR::Block& block, IR::Inst& inst) { ASSERT_MSG(inst.Arg(1).IsImmediate(), "Unable to resolve compare operation"); const auto class_mask = static_cast(inst.Arg(1).U32()); @@ -292,7 +304,19 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { FoldWhenAllImmediates(inst, [](u32 a) { return static_cast(a); }); return; case IR::Opcode::IMul32: - FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; }); + FoldMul(block, inst); + return; + case IR::Opcode::UDiv32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { + ASSERT_MSG(b != 0, "Folding UDiv32 with divisor 0"); + return a / b; + }); + return; + case IR::Opcode::UMod32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { + ASSERT_MSG(b != 0, "Folding UMod32 with modulo 0"); + return a % b; + }); return; case IR::Opcode::FPCmpClass32: FoldCmpClass(block, inst); diff --git a/src/shader_recompiler/ir/passes/constant_propogation.h b/src/shader_recompiler/ir/passes/constant_propogation.h new file mode 100644 index 000000000..313a3cc6a --- /dev/null +++ b/src/shader_recompiler/ir/passes/constant_propogation.h @@ -0,0 +1,4 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once \ No newline at end of file diff --git a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp new file mode 100644 index 000000000..5cf02b6d0 --- /dev/null +++ b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp @@ -0,0 +1,744 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later +#include "common/assert.h" +#include "shader_recompiler/info.h" +#include "shader_recompiler/ir/attribute.h" +#include "shader_recompiler/ir/breadth_first_search.h" +#include "shader_recompiler/ir/ir_emitter.h" +#include "shader_recompiler/ir/opcodes.h" +#include "shader_recompiler/ir/pattern_matching.h" +#include "shader_recompiler/ir/program.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Optimization { + +/** + * Tessellation shaders pass outputs to the next shader using LDS. + * The Hull shader stage receives input control points stored in LDS. + * + * These passes attempt to resolve LDS accesses to attribute accesses and correctly + * write to the tessellation factor tables. + * + * The LDS layout is: + * - TCS inputs for patch 0 + * - TCS inputs for patch 1 + * - TCS inputs for patch 2 + * - ... + * - TCS outputs for patch 0 + * - TCS outputs for patch 1 + * - TCS outputs for patch 2 + * - ... + * - PatchConst TCS outputs for patch 0 + * - PatchConst TCS outputs for patch 1 + * - PatchConst TCS outputs for patch 2 + * + * + * If the Hull stage does not write any new control points the driver will + * optimize LDS layout so input and output control point spaces overlap. + * (Passthrough) + * + * The gnm driver requires a V# holding special constants to be bound + * for reads by the shader. + * The Hull and Domain shaders read values from this buffer which + * contain size and offset information required to address input, output, + * or PatchConst attributes within the current patch. + * See the TessellationDataConstantBuffer struct to see the layout of this V#. + * + * Tessellation factors are stored to a special tessellation factor V# that is automatically bound + * by the driver. This is the input to the fixed function tessellator that actually subdivides the + * domain. We translate these to writes to SPIR-V builtins for tessellation factors in the Hull + * shader. + * The offset into the tess factor buffer determines which factor the shader is writing. + * Additionally, most hull shaders seem to redundantly write tess factors to PatchConst + * attributes, even if dead in the domain shader. We just treat these as generic PatchConst writes. + * + * LDS reads in the Hull shader can be from input control points, and in the the Domain shader can + * be hs output control points (output from the perspective of the Hull shader) and patchconst + * values. + * LDS stores in the Hull shader can either be output control point writes or per-patch + * (PatchConst) data writes. The Domain shader exports attributes using EXP instructions, unless its + * followed by the geometry stage (but we havent seen this yet), so nothing special there. + * The address calculations can vary significantly and can't be easily pattern matched. We are at + * the mercy of instruction selection the ps4 compiler wanted to use. + * Generally though, they could look something like this: + * Input control point: + * addr = PatchIdInVgt * input_cp_stride * #input_cp_per_patch + index * input_cp_stride + * + attr# * 16 + component + * Output control point: + * addr = #patches * input_cp_stride * #input_cp_per_patch + * + PatchIdInVgt * output_patch_stride + InvocationID * output_cp_stride + + attr# * 16 + component + * Per patch output: + * addr = #patches * input_cp_stride * #cp_per_input_patch + * + #patches * output_patch_stride + * + PatchIdInVgt * per_patch_output_stride + attr# * 16 + component + * + * output_patch_stride and output_cp_stride are usually compile time constants in the gcn + * + * Hull shaders can probably also read output control points corresponding to other threads, like + * shared memory (but we havent seen this yet). + * ^ This is an UNREACHABLE for now. We may need to insert additional barriers if this happens. + * They should also be able to read PatchConst values, + * although not sure if this happens in practice. + * + * To determine which type of attribute (input, output, patchconst) we the check the users of + * TessConstants V# reads to deduce which type of attribute a given load/store to LDS + * is touching. + * + * In the Hull shader, both the PatchId within the VGT group (PatchIdInVgt) and the output control + * point id (InvocationId) are packed in VGPR1 by the driver like + * V1 = InvocationId << 8 | PatchIdInVgt + * The shader typically uses V_BFE_(U|S)32 to extract them. We use the starting bit_pos to determine + * which is which. + * + * This pass does not attempt to deduce the exact attribute referenced in a LDS load/store. + * Instead, it feeds the address in the LDS load/store to the get/set Insts we use for TCS in/out's, + * TES in's, and PatchConst in/out's. + * + * TCS/TES Input attributes: + * We define input attributes using an array in the shader roughly like this: + * // equivalent GLSL in TCS + * layout (location = 0) in vec4 in_attrs[][NUM_INPUT_ATTRIBUTES]; + * + * Here the NUM_INPUT_ATTRIBUTES is derived from the ls_stride member of the TessConstants V#. + * We divide ls_stride (in bytes) by 16 to get the number of vec4 attributes. + * For TES, the number of attributes comes from hs_cp_stride / 16. + * The first (outer) dimension is unsized but corresponds to the number of vertices in the hs input + * patch (for Hull) or the hs output patch (for Domain). + * + * For input reads in TCS or TES, we emit SPIR-V like: + * float value = in_attrs[addr / ls_stride][(addr % ls_stride) >> 4][(addr & 0xF) >> 2]; + * + * For output writes, we assume the control point index is InvocationId, since high level languages + * impose that restriction (although maybe it's technically possible on hardware). So SPIR-V looks + * like this: + * layout (location = 0) in vec4 in_attrs[][NUM_OUTPUT_ATTRIBUTES]; + * out_attrs[InvocationId][(addr % hs_cp_stride) >> 4][(addr & 0xF) >> 2] = value; + * + * NUM_OUTPUT_ATTRIBUTES is derived by hs_cp_stride / 16, so it can link with the TES in_attrs + * variable. + * + * Another challenge is the fact that the GCN shader needs to address attributes from LDS as a whole + * which contains the attributes from many patches. On the other hand, higher level shading + * languages restrict attribute access to the patch of the current thread, which is naturally a + * restriction in SPIR-V also. + * The addresses the ps4 compiler generates for loads/stores and the fact that LDS holds many + * patches' attributes are just implementation details of the ps4 driver/compiler. To deal with + * this, we can replace certain TessConstant V# reads with 0, which only contribute to the base + * address of the current patch's attributes in LDS and not the indexes within the local patch. + * + * (A perfect implementation might need emulation of the VGTs in mesh/compute, loading/storing + * attributes to buffers and not caring about whether they are hs input, hs output, or patchconst + * attributes) + * + */ + +namespace { + +using namespace Shader::Optimiation::PatternMatching; + +static void InitTessConstants(IR::ScalarReg sharp_ptr_base, s32 sharp_dword_offset, + Shader::Info& info, Shader::RuntimeInfo& runtime_info, + TessellationDataConstantBuffer& tess_constants) { + info.tess_consts_ptr_base = sharp_ptr_base; + info.tess_consts_dword_offset = sharp_dword_offset; + info.ReadTessConstantBuffer(tess_constants); + if (info.l_stage == LogicalStage::TessellationControl) { + runtime_info.hs_info.InitFromTessConstants(tess_constants); + } else { + runtime_info.vs_info.InitFromTessConstants(tess_constants); + } + + return; +} + +struct TessSharpLocation { + IR::ScalarReg ptr_base; + u32 dword_off; +}; + +std::optional FindTessConstantSharp(IR::Inst* read_const_buffer) { + IR::Value sharp_ptr_base; + IR::Value sharp_dword_offset; + + IR::Value rv = IR::Value{read_const_buffer}; + IR::Value handle = read_const_buffer->Arg(0); + + if (M_COMPOSITECONSTRUCTU32X4(M_GETUSERDATA(MatchImm(sharp_dword_offset)), MatchIgnore(), + MatchIgnore(), MatchIgnore()) + .Match(handle)) { + return TessSharpLocation{.ptr_base = IR::ScalarReg::Max, + .dword_off = static_cast(sharp_dword_offset.ScalarReg())}; + } else if (M_COMPOSITECONSTRUCTU32X4( + M_READCONST(M_COMPOSITECONSTRUCTU32X2(M_GETUSERDATA(MatchImm(sharp_ptr_base)), + MatchIgnore()), + MatchImm(sharp_dword_offset)), + MatchIgnore(), MatchIgnore(), MatchIgnore()) + .Match(handle)) { + return TessSharpLocation{.ptr_base = sharp_ptr_base.ScalarReg(), + .dword_off = sharp_dword_offset.U32()}; + } + return {}; +} + +// Walker that helps deduce what type of attribute a DS instruction is reading +// or writing, which could be an input control point, output control point, +// or per-patch constant (PatchConst). +// For certain ReadConstBuffer instructions using the tess constants V#,, we visit the users +// recursively and increment a counter on the Load/WriteShared users. +// Namely NumPatch (from m_hsNumPatch), HsOutputBase (m_hsOutputBase), +// and PatchConstBase (m_patchConstBase). +// In addr calculations, the term NumPatch * ls_stride * #input_cp_in_patch +// is used as an addend to skip the region for input control points, and similarly +// NumPatch * hs_cp_stride * #output_cp_in_patch is used to skip the region +// for output control points. +// +// TODO: this will break if AMD compiler used distributive property like +// TcsNumPatches * (ls_stride * #input_cp_in_patch + hs_cp_stride * #output_cp_in_patch) +class TessConstantUseWalker { +public: + void MarkTessAttributeUsers(IR::Inst* read_const_buffer, TessConstantAttribute attr) { + u32 inc; + switch (attr) { + case TessConstantAttribute::HsNumPatch: + case TessConstantAttribute::HsOutputBase: + inc = 1; + break; + case TessConstantAttribute::PatchConstBase: + inc = 2; + break; + default: + UNREACHABLE(); + } + + for (IR::Use use : read_const_buffer->Uses()) { + MarkTessAttributeUsersHelper(use, inc); + } + + ++seq_num; + } + +private: + void MarkTessAttributeUsersHelper(IR::Use use, u32 inc) { + IR::Inst* inst = use.user; + + switch (use.user->GetOpcode()) { + case IR::Opcode::LoadSharedU32: + case IR::Opcode::LoadSharedU64: + case IR::Opcode::LoadSharedU128: + case IR::Opcode::WriteSharedU32: + case IR::Opcode::WriteSharedU64: + case IR::Opcode::WriteSharedU128: { + u32 counter = inst->Flags(); + inst->SetFlags(counter + inc); + // Stop here + return; + } + case IR::Opcode::Phi: { + struct PhiCounter { + u16 seq_num; + u8 unique_edge; + u8 counter; + }; + + PhiCounter count = inst->Flags(); + ASSERT_MSG(count.counter == 0 || count.unique_edge == use.operand); + // the point of seq_num is to tell us if we've already traversed this + // phi on the current walk. Alternatively we could keep a set of phi's + // seen on the current walk. This is to handle phi cycles + if (count.seq_num == 0) { + // First time we've encountered this phi + count.seq_num = seq_num; + // Mark the phi as having been traversed originally through this edge + count.unique_edge = use.operand; + count.counter = inc; + } else if (count.seq_num < seq_num) { + count.seq_num = seq_num; + // For now, assume we are visiting this phi via the same edge + // as on other walks. If not, some dataflow analysis might be necessary + ASSERT(count.unique_edge == use.operand); + count.counter += inc; + } else { + // count.seq_num == seq_num + // there's a cycle, and we've already been here on this walk + return; + } + inst->SetFlags(count); + break; + } + default: + break; + } + + for (IR::Use use : inst->Uses()) { + MarkTessAttributeUsersHelper(use, inc); + } + } + + u32 seq_num{1u}; +}; + +enum class AttributeRegion : u32 { InputCP, OutputCP, PatchConst }; + +static AttributeRegion GetAttributeRegionKind(IR::Inst* ring_access, const Shader::Info& info, + const Shader::RuntimeInfo& runtime_info) { + u32 count = ring_access->Flags(); + if (count == 0) { + return AttributeRegion::InputCP; + } else if (info.l_stage == LogicalStage::TessellationControl && + runtime_info.hs_info.IsPassthrough()) { + ASSERT(count <= 1); + return AttributeRegion::PatchConst; + } else { + ASSERT(count <= 2); + return AttributeRegion(count); + } +} + +static bool IsDivisibleByStride(IR::Value term, u32 stride) { + IR::Value a, b; + if (MatchU32(stride).Match(term)) { + return true; + } else if (M_BITFIELDUEXTRACT(MatchValue(a), MatchU32(0), MatchU32(24)).Match(term) || + M_BITFIELDSEXTRACT(MatchValue(a), MatchU32(0), MatchU32(24)).Match(term)) { + return IsDivisibleByStride(a, stride); + } else if (M_IMUL32(MatchValue(a), MatchValue(b)).Match(term)) { + return IsDivisibleByStride(a, stride) || IsDivisibleByStride(b, stride); + } + return false; +} + +// Return true if we can eliminate any addends +static bool TryOptimizeAddendInModulo(IR::Value addend, u32 stride, std::vector& addends) { + IR::Value a, b; + if (M_IADD32(MatchValue(a), MatchValue(b)).Match(addend)) { + bool ret = false; + ret = TryOptimizeAddendInModulo(a, stride, addends); + ret |= TryOptimizeAddendInModulo(b, stride, addends); + return ret; + } else if (!IsDivisibleByStride(addend, stride)) { + addends.push_back(IR::U32{addend}); + return false; + } else { + return true; + } +} + +// In calculation (a + b + ...) % stride +// Use this fact +// (a + b) mod N = (a mod N + b mod N) mod N +// If any addend is divisible by stride, then we can replace it with 0 in the attribute +// or component index calculation +static IR::U32 TryOptimizeAddressModulo(IR::U32 addr, u32 stride, IR::IREmitter& ir) { + std::vector addends; + if (TryOptimizeAddendInModulo(addr, stride, addends)) { + addr = ir.Imm32(0); + for (auto& addend : addends) { + addr = ir.IAdd(addr, addend); + } + } + return addr; +} + +// TODO: can optimize div in control point index similarly to mod + +// Read a TCS input (InputCP region) or TES input (OutputCP region) +static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmitter& ir, + u32 off_dw) { + if (off_dw > 0) { + addr = ir.IAdd(addr, ir.Imm32(off_dw)); + } + const IR::U32 control_point_index = ir.IDiv(addr, ir.Imm32(stride)); + const IR::U32 addr_for_attrs = TryOptimizeAddressModulo(addr, stride, ir); + const IR::U32 attr_index = + ir.ShiftRightLogical(ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u)); + const IR::U32 comp_index = + ir.ShiftRightLogical(ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u)); + return ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index); +} + +} // namespace + +void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { + const Info& info = program.info; + + for (IR::Block* block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + const auto opcode = inst.GetOpcode(); + switch (opcode) { + case IR::Opcode::StoreBufferU32: + case IR::Opcode::StoreBufferU32x2: + case IR::Opcode::StoreBufferU32x3: + case IR::Opcode::StoreBufferU32x4: { + const auto info = inst.Flags(); + if (!info.globally_coherent) { + break; + } + IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; + const auto GetValue = [&](IR::Value data) -> IR::F32 { + if (auto* inst = data.TryInstRecursive(); + inst && inst->GetOpcode() == IR::Opcode::BitCastU32F32) { + return IR::F32{inst->Arg(0)}; + } + return ir.BitCast(IR::U32{data}); + }; + const u32 num_dwords = u32(opcode) - u32(IR::Opcode::StoreBufferU32) + 1; + IR::U32 index = IR::U32{inst.Arg(1)}; + ASSERT(index.IsImmediate()); + const u32 gcn_factor_idx = (info.inst_offset.Value() + index.U32()) >> 2; + + const IR::Value data = inst.Arg(2); + auto get_factor_attr = [&](u32 gcn_factor_idx) -> IR::Patch { + // The hull outputs tess factors in different formats depending on the shader. + // For triangle domains, it seems to pack the entries into 4 consecutive floats, + // with the 3 edge factors followed by the 1 interior factor. + // For quads, it does 4 edge factors then 2 interior. + // There is a tess factor stride member of the GNMX hull constants struct in + // a hull program shader binary archive, but this doesn't seem to be + // communicated to the driver. + // The layout seems to be implied by the type of the abstract domain. + switch (runtime_info.hs_info.tess_type) { + case AmdGpu::TessellationType::Quad: + ASSERT(gcn_factor_idx < 6); + return IR::PatchFactor(gcn_factor_idx); + case AmdGpu::TessellationType::Triangle: + ASSERT(gcn_factor_idx < 4); + if (gcn_factor_idx == 3) { + return IR::Patch::TessellationLodInteriorU; + } + return IR::PatchFactor(gcn_factor_idx); + default: + // Point domain types haven't been seen so far + UNREACHABLE_MSG("Unhandled tess type"); + } + }; + + inst.Invalidate(); + if (num_dwords == 1) { + ir.SetPatch(get_factor_attr(gcn_factor_idx), GetValue(data)); + break; + } + auto* inst = data.TryInstRecursive(); + ASSERT(inst && (inst->GetOpcode() == IR::Opcode::CompositeConstructU32x2 || + inst->GetOpcode() == IR::Opcode::CompositeConstructU32x3 || + inst->GetOpcode() == IR::Opcode::CompositeConstructU32x4)); + for (s32 i = 0; i < num_dwords; i++) { + ir.SetPatch(get_factor_attr(gcn_factor_idx + i), GetValue(inst->Arg(i))); + } + break; + } + + case IR::Opcode::WriteSharedU32: + case IR::Opcode::WriteSharedU64: + case IR::Opcode::WriteSharedU128: { + IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; + const u32 num_dwords = opcode == IR::Opcode::WriteSharedU32 + ? 1 + : (opcode == IR::Opcode::WriteSharedU64 ? 2 : 4); + const IR::U32 addr{inst.Arg(0)}; + const IR::U32 data{inst.Arg(1).Resolve()}; + + const auto SetOutput = [&](IR::U32 addr, IR::U32 value, AttributeRegion output_kind, + u32 off_dw) { + const IR::F32 data_component = ir.BitCast(value); + + if (output_kind == AttributeRegion::OutputCP) { + if (off_dw > 0) { + addr = ir.IAdd(addr, ir.Imm32(off_dw)); + } + u32 stride = runtime_info.hs_info.hs_output_cp_stride; + // Invocation ID array index is implicit, handled by SPIRV backend + const IR::U32 addr_for_attrs = TryOptimizeAddressModulo(addr, stride, ir); + const IR::U32 attr_index = ir.ShiftRightLogical( + ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u)); + const IR::U32 comp_index = ir.ShiftRightLogical( + ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u)); + ir.SetTcsGenericAttribute(data_component, attr_index, comp_index); + } else { + ASSERT(output_kind == AttributeRegion::PatchConst); + ASSERT_MSG(addr.IsImmediate(), "patch addr non imm, inst {}", + fmt::ptr(addr.Inst())); + ir.SetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw), data_component); + } + }; + + AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info); + if (num_dwords == 1) { + SetOutput(addr, data, region, 0); + } else { + for (auto i = 0; i < num_dwords; i++) { + SetOutput(addr, IR::U32{data.Inst()->Arg(i)}, region, i); + } + } + inst.Invalidate(); + break; + } + + case IR::Opcode::LoadSharedU32: { + case IR::Opcode::LoadSharedU64: + case IR::Opcode::LoadSharedU128: + IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; + const IR::U32 addr{inst.Arg(0)}; + AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info); + const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32 + ? 1 + : (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4); + ASSERT_MSG(region == AttributeRegion::InputCP, + "Unhandled read of output or patchconst attribute in hull shader"); + IR::Value attr_read; + if (num_dwords == 1) { + attr_read = ir.BitCast( + ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, 0)); + } else { + boost::container::static_vector read_components; + for (auto i = 0; i < num_dwords; i++) { + const IR::F32 component = + ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, i); + read_components.push_back(ir.BitCast(component)); + } + attr_read = ir.CompositeConstruct(read_components); + } + inst.ReplaceUsesWithAndRemove(attr_read); + break; + } + + default: + break; + } + } + } + + if (runtime_info.hs_info.IsPassthrough()) { + // Copy input attributes to output attributes, indexed by InvocationID + // Passthrough should imply that input and output patches have same number of vertices + IR::Block* entry_block = *program.blocks.begin(); + auto it = std::ranges::find_if(entry_block->Instructions(), [](IR::Inst& inst) { + return inst.GetOpcode() == IR::Opcode::Prologue; + }); + ASSERT(it != entry_block->end()); + ++it; + ASSERT(it != entry_block->end()); + ++it; + // Prologue + // SetExec #true + // <- insert here + // ... + IR::IREmitter ir{*entry_block, it}; + + ASSERT(runtime_info.hs_info.ls_stride % 16 == 0); + u32 num_attributes = runtime_info.hs_info.ls_stride / 16; + const auto invocation_id = ir.GetAttributeU32(IR::Attribute::InvocationId); + for (u32 attr_no = 0; attr_no < num_attributes; attr_no++) { + for (u32 comp = 0; comp < 4; comp++) { + IR::F32 attr_read = + ir.GetTessGenericAttribute(invocation_id, ir.Imm32(attr_no), ir.Imm32(comp)); + // InvocationId is implicit index for output control point writes + ir.SetTcsGenericAttribute(attr_read, ir.Imm32(attr_no), ir.Imm32(comp)); + } + } + // We could wrap the rest of the program in an if stmt + // CopyInputAttrsToOutputs(); // psuedocode + // if (InvocationId == 0) { + // PatchConstFunction(); + // } + // But as long as we treat invocation ID as 0 for all threads, shouldn't matter functionally + } +} + +void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { + Info& info = program.info; + + for (IR::Block* block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; + const auto opcode = inst.GetOpcode(); + switch (inst.GetOpcode()) { + case IR::Opcode::LoadSharedU32: { + case IR::Opcode::LoadSharedU64: + case IR::Opcode::LoadSharedU128: + const IR::U32 addr{inst.Arg(0)}; + AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info); + const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32 + ? 1 + : (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4); + const auto GetInput = [&](IR::U32 addr, u32 off_dw) -> IR::F32 { + if (region == AttributeRegion::OutputCP) { + return ReadTessInputComponent( + addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw); + } else { + ASSERT(region == AttributeRegion::PatchConst); + return ir.GetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw)); + } + }; + IR::Value attr_read; + if (num_dwords == 1) { + attr_read = ir.BitCast(GetInput(addr, 0)); + } else { + boost::container::static_vector read_components; + for (auto i = 0; i < num_dwords; i++) { + const IR::F32 component = GetInput(addr, i); + read_components.push_back(ir.BitCast(component)); + } + attr_read = ir.CompositeConstruct(read_components); + } + inst.ReplaceUsesWithAndRemove(attr_read); + break; + } + default: + break; + } + } + } +} + +// Run before either hull or domain transform +void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info) { + TessellationDataConstantBuffer tess_constants; + Shader::Info& info = program.info; + // Find the TessellationDataConstantBuffer V# + for (IR::Block* block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + auto found_tess_consts_sharp = [&]() -> bool { + switch (inst.GetOpcode()) { + case IR::Opcode::LoadSharedU32: + case IR::Opcode::LoadSharedU64: + case IR::Opcode::LoadSharedU128: + case IR::Opcode::WriteSharedU32: + case IR::Opcode::WriteSharedU64: + case IR::Opcode::WriteSharedU128: { + IR::Value addr = inst.Arg(0); + auto read_const_buffer = IR::BreadthFirstSearch( + addr, [](IR::Inst* maybe_tess_const) -> std::optional { + if (maybe_tess_const->GetOpcode() == IR::Opcode::ReadConstBuffer) { + return maybe_tess_const; + } + return std::nullopt; + }); + if (read_const_buffer) { + auto sharp_location = FindTessConstantSharp(read_const_buffer.value()); + if (sharp_location) { + if (info.tess_consts_dword_offset >= 0) { + // Its possible theres a readconstbuffer that contributes to an + // LDS address and isnt a TessConstant V# read. Could improve on + // this somehow + ASSERT_MSG(static_cast(sharp_location->dword_off) == + info.tess_consts_dword_offset && + sharp_location->ptr_base == + info.tess_consts_ptr_base, + "TessConstants V# is ambiguous"); + } + InitTessConstants(sharp_location->ptr_base, + static_cast(sharp_location->dword_off), info, + runtime_info, tess_constants); + return true; + } + UNREACHABLE_MSG("Failed to match tess constant sharp"); + } + return false; + } + default: + return false; + } + }(); + + if (found_tess_consts_sharp) { + break; + } + } + } + + ASSERT(info.tess_consts_dword_offset >= 0); + + TessConstantUseWalker walker; + + for (IR::Block* block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) { + auto sharp_location = FindTessConstantSharp(&inst); + if (sharp_location && sharp_location->ptr_base == info.tess_consts_ptr_base && + sharp_location->dword_off == info.tess_consts_dword_offset) { + // The shader is reading from the TessConstants V# + IR::Value index = inst.Arg(1); + + ASSERT_MSG(index.IsImmediate(), + "Tessellation constant read with dynamic index"); + u32 off_dw = index.U32(); + ASSERT(off_dw <= + static_cast(TessConstantAttribute::FirstEdgeTessFactorIndex)); + + auto tess_const_attr = static_cast(off_dw); + switch (tess_const_attr) { + case TessConstantAttribute::LsStride: + // If not, we may need to make this runtime state for TES + ASSERT(info.l_stage == LogicalStage::TessellationControl); + inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.ls_stride)); + break; + case TessConstantAttribute::HsCpStride: + inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.hs_cp_stride)); + break; + case TessConstantAttribute::HsNumPatch: + case TessConstantAttribute::HsOutputBase: + case TessConstantAttribute::PatchConstBase: + walker.MarkTessAttributeUsers(&inst, tess_const_attr); + // We should be able to safely set these to 0 so that indexing happens only + // within the local patch in the recompiled Vulkan shader. This assumes + // these values only contribute to address calculations for in/out + // attributes in the original gcn shader. + // See the explanation for why we set V2 to 0 when emitting the prologue. + inst.ReplaceUsesWithAndRemove(IR::Value(0u)); + break; + case Shader::TessConstantAttribute::PatchConstSize: + case Shader::TessConstantAttribute::PatchOutputSize: + case Shader::TessConstantAttribute::OffChipTessellationFactorThreshold: + case Shader::TessConstantAttribute::FirstEdgeTessFactorIndex: + // May need to replace PatchConstSize and PatchOutputSize with 0 + break; + default: + UNREACHABLE_MSG("Read past end of TessConstantsBuffer"); + } + } + } + } + } + + // These pattern matching are neccessary for now unless we support dynamic indexing of + // PatchConst attributes and tess factors. PatchConst should be easy, turn those into a single + // vec4 array like in/out attrs. Not sure about tess factors. + if (info.l_stage == LogicalStage::TessellationControl) { + // Replace the BFEs on V1 (packed with patch id within VGT and output cp id) + for (IR::Block* block : program.blocks) { + for (auto it = block->Instructions().begin(); it != block->Instructions().end(); it++) { + IR::Inst& inst = *it; + if (M_BITFIELDUEXTRACT( + M_GETATTRIBUTEU32(MatchAttribute(IR::Attribute::PackedHullInvocationInfo), + MatchIgnore()), + MatchU32(0), MatchU32(8)) + .Match(IR::Value{&inst})) { + IR::IREmitter emit(*block, it); + // This is the patch id within the VGT, not the actual PrimitiveId + // in the draw + IR::Value replacement(0u); + inst.ReplaceUsesWithAndRemove(replacement); + } else if (M_BITFIELDUEXTRACT( + M_GETATTRIBUTEU32( + MatchAttribute(IR::Attribute::PackedHullInvocationInfo), + MatchIgnore()), + MatchU32(8), MatchU32(5)) + .Match(IR::Value{&inst})) { + IR::IREmitter ir(*block, it); + IR::Value replacement; + if (runtime_info.hs_info.IsPassthrough()) { + // Deal with annoying pattern in BB where InvocationID use makes no + // sense (in addr calculation for patchconst or tess factor write) + replacement = ir.Imm32(0); + } else { + replacement = ir.GetAttributeU32(IR::Attribute::InvocationId); + } + inst.ReplaceUsesWithAndRemove(replacement); + } + } + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index 7bd47992c..61f43e7e4 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -18,5 +18,8 @@ void CollectShaderInfoPass(IR::Program& program); void LowerSharedMemToRegisters(IR::Program& program); void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info, Stage stage); +void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info); +void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info); +void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp index eb1be2967..d6f1efb12 100644 --- a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp +++ b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp @@ -1,11 +1,13 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/assert.h" #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/opcodes.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/recompiler.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Optimization { @@ -23,12 +25,45 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim }; switch (stage) { + case Stage::Local: { + ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) { + const auto opcode = inst.GetOpcode(); + switch (opcode) { + case IR::Opcode::WriteSharedU64: + case IR::Opcode::WriteSharedU32: { + bool is_composite = opcode == IR::Opcode::WriteSharedU64; + u32 num_components = opcode == IR::Opcode::WriteSharedU32 ? 1 : 2; + + u32 offset = 0; + const auto* addr = inst.Arg(0).InstRecursive(); + if (addr->GetOpcode() == IR::Opcode::IAdd32) { + ASSERT(addr->Arg(1).IsImmediate()); + offset = addr->Arg(1).U32(); + } + IR::Value data = inst.Arg(1).Resolve(); + for (s32 i = 0; i < num_components; i++) { + const auto attrib = IR::Attribute::Param0 + (offset / 16); + const auto comp = (offset / 4) % 4; + const IR::U32 value = IR::U32{is_composite ? data.Inst()->Arg(i) : data}; + ir.SetAttribute(attrib, ir.BitCast(value), comp); + offset += 4; + } + inst.Invalidate(); + break; + } + default: + break; + } + }); + break; + } case Stage::Export: { ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) { const auto opcode = inst.GetOpcode(); switch (opcode) { case IR::Opcode::StoreBufferU32: { - if (!inst.Flags().ring_access) { + const auto info = inst.Flags(); + if (!info.system_coherent || !info.globally_coherent) { break; } @@ -61,12 +96,13 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim const auto opcode = inst.GetOpcode(); switch (opcode) { case IR::Opcode::LoadBufferU32: { - if (!inst.Flags().ring_access) { + const auto info = inst.Flags(); + if (!info.system_coherent || !info.globally_coherent) { break; } const auto shl_inst = inst.Arg(1).TryInstRecursive(); - const auto vertex_id = shl_inst->Arg(0).Resolve().U32() >> 2; + const auto vertex_id = ir.Imm32(shl_inst->Arg(0).Resolve().U32() >> 2); const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1); const auto bucket = offset.Resolve().U32() / 256u; const auto attrib = bucket < 4 ? IR::Attribute::Position0 @@ -80,7 +116,8 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim break; } case IR::Opcode::StoreBufferU32: { - if (!inst.Flags().ring_access) { + const auto buffer_info = inst.Flags(); + if (!buffer_info.system_coherent || !buffer_info.globally_coherent) { break; } diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index 8b93d72e3..c34b59b88 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -17,6 +17,22 @@ void Visit(Info& info, IR::Inst& inst) { case IR::Opcode::GetUserData: info.ud_mask.Set(inst.Arg(0).ScalarReg()); break; + case IR::Opcode::SetPatch: { + const auto patch = inst.Arg(0).Patch(); + if (patch <= IR::Patch::TessellationLodBottom) { + info.stores_tess_level_outer = true; + } else if (patch <= IR::Patch::TessellationLodInteriorV) { + info.stores_tess_level_inner = true; + } else { + info.uses_patches |= 1U << IR::GenericPatchIndex(patch); + } + break; + } + case IR::Opcode::GetPatch: { + const auto patch = inst.Arg(0).Patch(); + info.uses_patches |= 1U << IR::GenericPatchIndex(patch); + break; + } case IR::Opcode::LoadSharedU32: case IR::Opcode::LoadSharedU64: case IR::Opcode::WriteSharedU32: diff --git a/src/shader_recompiler/ir/patch.cpp b/src/shader_recompiler/ir/patch.cpp new file mode 100644 index 000000000..2485bc5b4 --- /dev/null +++ b/src/shader_recompiler/ir/patch.cpp @@ -0,0 +1,28 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/ir/patch.h" + +namespace Shader::IR { + +std::string NameOf(Patch patch) { + switch (patch) { + case Patch::TessellationLodLeft: + return "TessellationLodLeft"; + case Patch::TessellationLodTop: + return "TessellationLodTop"; + case Patch::TessellationLodRight: + return "TessellationLodRight"; + case Patch::TessellationLodBottom: + return "TessellationLodBottom"; + case Patch::TessellationLodInteriorU: + return "TessellationLodInteriorU"; + case Patch::TessellationLodInteriorV: + return "TessellationLodInteriorV"; + default: + const u32 index = u32(patch) - u32(Patch::Component0); + return fmt::format("Component{}", index); + } +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir/patch.h b/src/shader_recompiler/ir/patch.h new file mode 100644 index 000000000..65d2192e6 --- /dev/null +++ b/src/shader_recompiler/ir/patch.h @@ -0,0 +1,173 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "common/types.h" + +namespace Shader::IR { + +enum class Patch : u64 { + TessellationLodLeft, + TessellationLodTop, + TessellationLodRight, + TessellationLodBottom, + TessellationLodInteriorU, + TessellationLodInteriorV, + Component0, + Component1, + Component2, + Component3, + Component4, + Component5, + Component6, + Component7, + Component8, + Component9, + Component10, + Component11, + Component12, + Component13, + Component14, + Component15, + Component16, + Component17, + Component18, + Component19, + Component20, + Component21, + Component22, + Component23, + Component24, + Component25, + Component26, + Component27, + Component28, + Component29, + Component30, + Component31, + Component32, + Component33, + Component34, + Component35, + Component36, + Component37, + Component38, + Component39, + Component40, + Component41, + Component42, + Component43, + Component44, + Component45, + Component46, + Component47, + Component48, + Component49, + Component50, + Component51, + Component52, + Component53, + Component54, + Component55, + Component56, + Component57, + Component58, + Component59, + Component60, + Component61, + Component62, + Component63, + Component64, + Component65, + Component66, + Component67, + Component68, + Component69, + Component70, + Component71, + Component72, + Component73, + Component74, + Component75, + Component76, + Component77, + Component78, + Component79, + Component80, + Component81, + Component82, + Component83, + Component84, + Component85, + Component86, + Component87, + Component88, + Component89, + Component90, + Component91, + Component92, + Component93, + Component94, + Component95, + Component96, + Component97, + Component98, + Component99, + Component100, + Component101, + Component102, + Component103, + Component104, + Component105, + Component106, + Component107, + Component108, + Component109, + Component110, + Component111, + Component112, + Component113, + Component114, + Component115, + Component116, + Component117, + Component118, + Component119, +}; +static_assert(static_cast(Patch::Component119) == 125); + +constexpr bool IsGeneric(Patch patch) noexcept { + return patch >= Patch::Component0 && patch <= Patch::Component119; +} + +constexpr Patch PatchFactor(u32 index) { + return static_cast(index); +} + +constexpr Patch PatchGeneric(u32 index) { + return static_cast(static_cast(Patch::Component0) + index); +} + +constexpr u32 GenericPatchIndex(Patch patch) { + return (static_cast(patch) - static_cast(Patch::Component0)) / 4; +} + +constexpr u32 GenericPatchElement(Patch patch) { + return (static_cast(patch) - static_cast(Patch::Component0)) % 4; +} + +[[nodiscard]] std::string NameOf(Patch patch); + +} // namespace Shader::IR + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(const Shader::IR::Patch patch, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(patch)); + } +}; diff --git a/src/shader_recompiler/ir/pattern_matching.h b/src/shader_recompiler/ir/pattern_matching.h new file mode 100644 index 000000000..1279f14c3 --- /dev/null +++ b/src/shader_recompiler/ir/pattern_matching.h @@ -0,0 +1,127 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/ir/attribute.h" +#include "shader_recompiler/ir/value.h" + +namespace Shader::Optimiation::PatternMatching { + +// Attempt at pattern matching for Insts and Values +// Needs improvement, mostly a convenience + +template +struct MatchObject { + inline bool Match(IR::Value v) { + return static_cast(this)->Match(v); + } +}; + +struct MatchValue : MatchObject { + MatchValue(IR::Value& return_val_) : return_val(return_val_) {} + + inline bool Match(IR::Value v) { + return_val = v; + return true; + } + +private: + IR::Value& return_val; +}; + +struct MatchIgnore : MatchObject { + MatchIgnore() {} + + inline bool Match(IR::Value v) { + return true; + } +}; + +struct MatchImm : MatchObject { + MatchImm(IR::Value& v) : return_val(v) {} + + inline bool Match(IR::Value v) { + if (!v.IsImmediate()) { + return false; + } + + return_val = v; + return true; + } + +private: + IR::Value& return_val; +}; + +struct MatchAttribute : MatchObject { + MatchAttribute(IR::Attribute attribute_) : attribute(attribute_) {} + + inline bool Match(IR::Value v) { + return v.Type() == IR::Type::Attribute && v.Attribute() == attribute; + } + +private: + IR::Attribute attribute; +}; + +struct MatchU32 : MatchObject { + MatchU32(u32 imm_) : imm(imm_) {} + + inline bool Match(IR::Value v) { + return v.IsImmediate() && v.Type() == IR::Type::U32 && v.U32() == imm; + } + +private: + u32 imm; +}; + +template +struct MatchInstObject : MatchObject> { + static_assert(sizeof...(Args) == IR::NumArgsOf(opcode)); + MatchInstObject(Args&&... args) : pattern(std::forward_as_tuple(args...)) {} + + inline bool Match(IR::Value v) { + IR::Inst* inst = v.TryInstRecursive(); + if (!inst || inst->GetOpcode() != opcode) { + return false; + } + + bool matched = true; + + [&](std::index_sequence) { + ((matched = matched && std::get(pattern).Match(inst->Arg(Is))), ...); + }(std::make_index_sequence{}); + + return matched; + } + +private: + using MatchArgs = std::tuple; + MatchArgs pattern; +}; + +template +inline auto MakeInstPattern(Args&&... args) { + return MatchInstObject(std::forward(args)...); +} + +// Conveniences. TODO probably simpler way of doing this +#define M_READCONST(...) MakeInstPattern(__VA_ARGS__) +#define M_GETUSERDATA(...) MakeInstPattern(__VA_ARGS__) +#define M_BITFIELDUEXTRACT(...) MakeInstPattern(__VA_ARGS__) +#define M_BITFIELDSEXTRACT(...) MakeInstPattern(__VA_ARGS__) +#define M_GETATTRIBUTEU32(...) MakeInstPattern(__VA_ARGS__) +#define M_UMOD32(...) MakeInstPattern(__VA_ARGS__) +#define M_SHIFTRIGHTLOGICAL32(...) MakeInstPattern(__VA_ARGS__) +#define M_IADD32(...) MakeInstPattern(__VA_ARGS__) +#define M_IMUL32(...) MakeInstPattern(__VA_ARGS__) +#define M_BITWISEAND32(...) MakeInstPattern(__VA_ARGS__) +#define M_GETTESSGENERICATTRIBUTE(...) \ + MakeInstPattern(__VA_ARGS__) +#define M_SETTCSGENERICATTRIBUTE(...) \ + MakeInstPattern(__VA_ARGS__) +#define M_COMPOSITECONSTRUCTU32X2(...) \ + MakeInstPattern(__VA_ARGS__) +#define M_COMPOSITECONSTRUCTU32X4(...) \ + MakeInstPattern(__VA_ARGS__) + +} // namespace Shader::Optimiation::PatternMatching \ No newline at end of file diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index ca2e9ceb9..19e0da3dd 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -49,7 +49,8 @@ union BufferInstInfo { BitField<0, 1, u32> index_enable; BitField<1, 1, u32> offset_enable; BitField<2, 12, u32> inst_offset; - BitField<14, 1, u32> ring_access; // global + system coherency + BitField<14, 1, u32> system_coherent; + BitField<15, 1, u32> globally_coherent; }; enum class ScalarReg : u32 { diff --git a/src/shader_recompiler/ir/type.h b/src/shader_recompiler/ir/type.h index ec855a77e..0f043fb64 100644 --- a/src/shader_recompiler/ir/type.h +++ b/src/shader_recompiler/ir/type.h @@ -15,7 +15,7 @@ enum class Type { ScalarReg = 1 << 1, VectorReg = 1 << 2, Attribute = 1 << 3, - SystemValue = 1 << 4, + Patch = 1 << 4, U1 = 1 << 5, U8 = 1 << 6, U16 = 1 << 7, diff --git a/src/shader_recompiler/ir/value.cpp b/src/shader_recompiler/ir/value.cpp index 889e99556..8826b80f2 100644 --- a/src/shader_recompiler/ir/value.cpp +++ b/src/shader_recompiler/ir/value.cpp @@ -16,6 +16,8 @@ Value::Value(IR::VectorReg reg) noexcept : type{Type::VectorReg}, vreg{reg} {} Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {} +Value::Value(IR::Patch patch) noexcept : type{Type::Patch}, patch{patch} {} + Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {} Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {} diff --git a/src/shader_recompiler/ir/value.h b/src/shader_recompiler/ir/value.h index dbe8b5cc4..ed1e5536a 100644 --- a/src/shader_recompiler/ir/value.h +++ b/src/shader_recompiler/ir/value.h @@ -16,6 +16,7 @@ #include "shader_recompiler/exception.h" #include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/opcodes.h" +#include "shader_recompiler/ir/patch.h" #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/type.h" @@ -34,6 +35,7 @@ public: explicit Value(IR::ScalarReg reg) noexcept; explicit Value(IR::VectorReg reg) noexcept; explicit Value(IR::Attribute value) noexcept; + explicit Value(IR::Patch patch) noexcept; explicit Value(bool value) noexcept; explicit Value(u8 value) noexcept; explicit Value(u16 value) noexcept; @@ -56,6 +58,7 @@ public: [[nodiscard]] IR::ScalarReg ScalarReg() const; [[nodiscard]] IR::VectorReg VectorReg() const; [[nodiscard]] IR::Attribute Attribute() const; + [[nodiscard]] IR::Patch Patch() const; [[nodiscard]] bool U1() const; [[nodiscard]] u8 U8() const; [[nodiscard]] u16 U16() const; @@ -75,6 +78,7 @@ private: IR::ScalarReg sreg; IR::VectorReg vreg; IR::Attribute attribute; + IR::Patch patch; bool imm_u1; u8 imm_u8; u16 imm_u16; @@ -330,6 +334,11 @@ inline IR::Attribute Value::Attribute() const { return attribute; } +inline IR::Patch Value::Patch() const { + DEBUG_ASSERT(type == Type::Patch); + return patch; +} + inline bool Value::U1() const { if (IsIdentity()) { return inst->Arg(0).U1(); diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 64f842c42..ad57adb6a 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -1,6 +1,9 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/config.h" +#include "common/io_file.h" +#include "common/path_util.h" #include "shader_recompiler/frontend/control_flow_graph.h" #include "shader_recompiler/frontend/decode.h" #include "shader_recompiler/frontend/structured_control_flow.h" @@ -29,7 +32,7 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { } IR::Program TranslateProgram(std::span code, Pools& pools, Info& info, - const RuntimeInfo& runtime_info, const Profile& profile) { + RuntimeInfo& runtime_info, const Profile& profile) { // Ensure first instruction is expected. constexpr u32 token_mov_vcchi = 0xBEEB03FF; if (code[0] != token_mov_vcchi) { @@ -60,12 +63,29 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); // Run optimization passes + const auto stage = program.info.stage; + Shader::Optimization::SsaRewritePass(program.post_order_blocks); + Shader::Optimization::IdentityRemovalPass(program.blocks); + if (info.l_stage == LogicalStage::TessellationControl) { + // Tess passes require previous const prop passes for now (for simplicity). TODO allow + // fine grained folding or opportunistic folding we set an operand to an immediate + Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); + Shader::Optimization::TessellationPreprocess(program, runtime_info); + Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); + Shader::Optimization::HullShaderTransform(program, runtime_info); + } else if (info.l_stage == LogicalStage::TessellationEval) { + Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); + Shader::Optimization::TessellationPreprocess(program, runtime_info); + Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); + Shader::Optimization::DomainShaderTransform(program, runtime_info); + } Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); - if (program.info.stage != Stage::Compute) { + Shader::Optimization::RingAccessElimination(program, runtime_info, stage); + if (stage != Stage::Compute) { Shader::Optimization::LowerSharedMemToRegisters(program); } - Shader::Optimization::RingAccessElimination(program, runtime_info, program.info.stage); + Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); Shader::Optimization::FlattenExtendedUserdataPass(program); Shader::Optimization::ResourceTrackingPass(program); Shader::Optimization::IdentityRemovalPass(program.blocks); diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h index f8acf6c9e..8180c29b3 100644 --- a/src/shader_recompiler/recompiler.h +++ b/src/shader_recompiler/recompiler.h @@ -28,6 +28,6 @@ struct Pools { }; [[nodiscard]] IR::Program TranslateProgram(std::span code, Pools& pools, Info& info, - const RuntimeInfo& runtime_info, const Profile& profile); + RuntimeInfo& runtime_info, const Profile& profile); } // namespace Shader diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 4c779a368..23e23c118 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -7,6 +7,7 @@ #include #include #include "common/types.h" +#include "shader_recompiler/frontend/tessellation.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/types.h" @@ -21,12 +22,31 @@ enum class Stage : u32 { Local, Compute, }; -constexpr u32 MaxStageTypes = 7; + +// Vertex intentionally comes after TCS/TES due to order of compilation +enum class LogicalStage : u32 { + Fragment, + TessellationControl, + TessellationEval, + Vertex, + Geometry, + Compute, + NumLogicalStages +}; + +constexpr u32 MaxStageTypes = static_cast(LogicalStage::NumLogicalStages); [[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept { return static_cast(index); } +struct LocalRuntimeInfo { + u32 ls_stride; + bool links_with_tcs; + + auto operator<=>(const LocalRuntimeInfo&) const noexcept = default; +}; + struct ExportRuntimeInfo { u32 vertex_data_size; @@ -64,9 +84,57 @@ struct VertexRuntimeInfo { u32 num_outputs; std::array outputs; bool emulate_depth_negative_one_to_one{}; + // Domain + AmdGpu::TessellationType tess_type; + AmdGpu::TessellationTopology tess_topology; + AmdGpu::TessellationPartitioning tess_partitioning; + u32 hs_output_cp_stride{}; bool operator==(const VertexRuntimeInfo& other) const noexcept { - return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one; + return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one && + tess_type == other.tess_type && tess_topology == other.tess_topology && + tess_partitioning == other.tess_partitioning && + hs_output_cp_stride == other.hs_output_cp_stride; + } + + void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) { + hs_output_cp_stride = tess_constants.hs_cp_stride; + } +}; + +struct HullRuntimeInfo { + // from registers + u32 num_input_control_points; + u32 num_threads; + AmdGpu::TessellationType tess_type; + + // from tess constants buffer + u32 ls_stride; + u32 hs_output_cp_stride; + u32 hs_output_base; + + auto operator<=>(const HullRuntimeInfo&) const noexcept = default; + + // It might be possible for a non-passthrough TCS to have these conditions, in some + // dumb situation. + // In that case, it should be fine to assume passthrough and declare some extra + // output control points and attributes that shouldnt be read by the TES anyways + bool IsPassthrough() const { + return hs_output_base == 0 && ls_stride == hs_output_cp_stride && num_threads == 1; + }; + + // regs.ls_hs_config.hs_output_control_points contains the number of threads, which + // isn't exactly the number of output control points. + // For passthrough shaders, the register field is set to 1, so use the number of + // input control points + u32 NumOutputControlPoints() const { + return IsPassthrough() ? num_input_control_points : num_threads; + } + + void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) { + ls_stride = tess_constants.ls_stride; + hs_output_cp_stride = tess_constants.hs_cp_stride; + hs_output_base = tess_constants.hs_output_base; } }; @@ -150,8 +218,10 @@ struct RuntimeInfo { AmdGpu::FpDenormMode fp_denorm_mode32; AmdGpu::FpRoundMode fp_round_mode32; union { + LocalRuntimeInfo ls_info; ExportRuntimeInfo es_info; VertexRuntimeInfo vs_info; + HullRuntimeInfo hs_info; GeometryRuntimeInfo gs_info; FragmentRuntimeInfo fs_info; ComputeRuntimeInfo cs_info; @@ -174,6 +244,10 @@ struct RuntimeInfo { return es_info == other.es_info; case Stage::Geometry: return gs_info == other.gs_info; + case Stage::Hull: + return hs_info == other.hs_info; + case Stage::Local: + return ls_info == other.ls_info; default: return true; } diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 9b5dd8fa1..5799c4c95 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -127,6 +127,18 @@ struct StageSpecialization { [](auto& spec, const auto& desc, AmdGpu::Sampler sharp) { spec.force_unnormalized = sharp.force_unnormalized; }); + + // Initialize runtime_info fields that rely on analysis in tessellation passes + if (info->l_stage == LogicalStage::TessellationControl || + info->l_stage == LogicalStage::TessellationEval) { + Shader::TessellationDataConstantBuffer tess_constants; + info->ReadTessConstantBuffer(tess_constants); + if (info->l_stage == LogicalStage::TessellationControl) { + runtime_info.hs_info.InitFromTessConstants(tess_constants); + } else { + runtime_info.vs_info.InitFromTessConstants(tess_constants); + } + } } void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) { diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 9bc3454d8..b6172d37b 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -143,6 +143,13 @@ struct Liverpool { } }; + struct HsTessFactorClamp { + // I've only seen min=0.0, max=1.0 so far. + // TODO why is max set to 1.0? Makes no sense + float hs_max_tess; + float hs_min_tess; + }; + struct ComputeProgram { u32 dispatch_initiator; u32 dim_x; @@ -956,6 +963,7 @@ struct Liverpool { enum VgtStages : u32 { Vs = 0u, // always enabled EsGs = 0xB0u, + LsHs = 0x45u, }; VgtStages raw; @@ -963,7 +971,8 @@ struct Liverpool { BitField<2, 1, u32> hs_en; BitField<3, 2, u32> es_en; BitField<5, 1, u32> gs_en; - BitField<6, 1, u32> vs_en; + BitField<6, 2, u32> vs_en; + BitField<8, 1, u32> dynamic_hs; bool IsStageEnabled(u32 stage) const { switch (stage) { @@ -1059,6 +1068,28 @@ struct Liverpool { }; }; + union LsHsConfig { + u32 raw; + BitField<0, 8, u32> num_patches; + BitField<8, 6, u32> hs_input_control_points; + BitField<14, 6, u32> hs_output_control_points; + }; + + union TessellationConfig { + u32 raw; + BitField<0, 2, TessellationType> type; + BitField<2, 3, TessellationPartitioning> partitioning; + BitField<5, 3, TessellationTopology> topology; + }; + + union TessFactorMemoryBase { + u32 base; + + u64 MemoryBase() const { + return static_cast(base) << 8; + } + }; + union Eqaa { u32 raw; BitField<0, 1, u32> max_anchor_samples; @@ -1109,7 +1140,7 @@ struct Liverpool { ShaderProgram es_program; INSERT_PADDING_WORDS(0x2C); ShaderProgram hs_program; - INSERT_PADDING_WORDS(0x2C); + INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20); ShaderProgram ls_program; INSERT_PADDING_WORDS(0xA4); ComputeProgram cs_program; @@ -1176,7 +1207,9 @@ struct Liverpool { PolygonControl polygon_control; ViewportControl viewport_control; VsOutputControl vs_output_control; - INSERT_PADDING_WORDS(0xA290 - 0xA207 - 1); + INSERT_PADDING_WORDS(0xA287 - 0xA207 - 1); + HsTessFactorClamp hs_clamp; + INSERT_PADDING_WORDS(0xA290 - 0xA287 - 2); GsMode vgt_gs_mode; INSERT_PADDING_WORDS(1); ModeControl mode_control; @@ -1200,9 +1233,10 @@ struct Liverpool { BitField<0, 11, u32> vgt_gs_max_vert_out; INSERT_PADDING_WORDS(0xA2D5 - 0xA2CE - 1); ShaderStageEnable stage_enable; - INSERT_PADDING_WORDS(1); + LsHsConfig ls_hs_config; u32 vgt_gs_vert_itemsize[4]; - INSERT_PADDING_WORDS(4); + TessellationConfig tess_config; + INSERT_PADDING_WORDS(3); PolygonOffset poly_offset; GsInstances vgt_gs_instance_cnt; StreamOutConfig vgt_strmout_config; @@ -1216,6 +1250,8 @@ struct Liverpool { INSERT_PADDING_WORDS(0xC24C - 0xC243); u32 num_indices; VgtNumInstances num_instances; + INSERT_PADDING_WORDS(0xC250 - 0xC24D - 1); + TessFactorMemoryBase vgt_tf_memory_base; }; std::array reg_array{}; @@ -1431,6 +1467,7 @@ static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202); static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204); static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206); static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207); +static_assert(GFX6_3D_REG_INDEX(hs_clamp) == 0xA287); static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290); static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292); static_assert(GFX6_3D_REG_INDEX(vgt_gs_out_prim_type) == 0xA29B); @@ -1445,6 +1482,7 @@ static_assert(GFX6_3D_REG_INDEX(vgt_gsvs_ring_itemsize) == 0xA2AC); static_assert(GFX6_3D_REG_INDEX(vgt_gs_max_vert_out) == 0xA2CE); static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5); static_assert(GFX6_3D_REG_INDEX(vgt_gs_vert_itemsize[0]) == 0xA2D7); +static_assert(GFX6_3D_REG_INDEX(tess_config) == 0xA2DB); static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF); static_assert(GFX6_3D_REG_INDEX(vgt_gs_instance_cnt) == 0xA2E4); static_assert(GFX6_3D_REG_INDEX(vgt_strmout_config) == 0xA2E5); @@ -1456,6 +1494,7 @@ static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A); static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381); static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242); static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D); +static_assert(GFX6_3D_REG_INDEX(vgt_tf_memory_base) == 0xc250); #undef GFX6_3D_REG_INDEX diff --git a/src/video_core/amdgpu/types.h b/src/video_core/amdgpu/types.h index 6b95ed910..fa8491665 100644 --- a/src/video_core/amdgpu/types.h +++ b/src/video_core/amdgpu/types.h @@ -3,6 +3,8 @@ #pragma once +#include +#include #include "common/types.h" namespace AmdGpu { @@ -21,6 +23,69 @@ enum class FpDenormMode : u32 { InOutAllow = 3, }; +enum class TessellationType : u32 { + Isoline = 0, + Triangle = 1, + Quad = 2, +}; + +constexpr std::string_view NameOf(TessellationType type) { + switch (type) { + case TessellationType::Isoline: + return "Isoline"; + case TessellationType::Triangle: + return "Triangle"; + case TessellationType::Quad: + return "Quad"; + default: + return "Unknown"; + } +} + +enum class TessellationPartitioning : u32 { + Integer = 0, + Pow2 = 1, + FracOdd = 2, + FracEven = 3, +}; + +constexpr std::string_view NameOf(TessellationPartitioning partitioning) { + switch (partitioning) { + case TessellationPartitioning::Integer: + return "Integer"; + case TessellationPartitioning::Pow2: + return "Pow2"; + case TessellationPartitioning::FracOdd: + return "FracOdd"; + case TessellationPartitioning::FracEven: + return "FracEven"; + default: + return "Unknown"; + } +} + +enum class TessellationTopology : u32 { + Point = 0, + Line = 1, + TriangleCw = 2, + TriangleCcw = 3, +}; + +constexpr std::string_view NameOf(TessellationTopology topology) { + switch (topology) { + case TessellationTopology::Point: + return "Point"; + case TessellationTopology::Line: + return "Line"; + case TessellationTopology::TriangleCw: + return "TriangleCw"; + case TessellationTopology::TriangleCcw: + return "TriangleCcw"; + default: + return "Unknown"; + } +} + // See `VGT_PRIMITIVE_TYPE` description in [Radeon Sea Islands 3D/Compute Register Reference Guide] enum class PrimitiveType : u32 { None = 0, @@ -118,3 +183,33 @@ enum class NumberFormat : u32 { }; } // namespace AmdGpu + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(AmdGpu::TessellationType type, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type)); + } +}; + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(AmdGpu::TessellationPartitioning type, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type)); + } +}; + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(AmdGpu::TessellationTopology type, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type)); + } +}; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 8d495ab06..a39b18378 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -16,7 +16,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler ComputePipelineKey compute_key_, const Shader::Info& info_, vk::ShaderModule module) : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache, true}, compute_key{compute_key_} { - auto& info = stages[int(Shader::Stage::Compute)]; + auto& info = stages[int(Shader::LogicalStage::Compute)]; info = &info_; const vk::PipelineShaderStageCreateInfo shader_ci = { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 795537574..222ffb5a9 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -8,6 +8,7 @@ #include "common/assert.h" #include "common/scope_exit.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" @@ -52,7 +53,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul boost::container::static_vector vertex_bindings; boost::container::static_vector vertex_attributes; if (fetch_shader && !instance.IsVertexInputDynamicState()) { - const auto& vs_info = GetStage(Shader::Stage::Vertex); + const auto& vs_info = GetStage(Shader::LogicalStage::Vertex); for (const auto& attrib : fetch_shader->attributes) { if (attrib.UsesStepRates()) { // Skip attribute binding as the data will be pulled by shader @@ -106,6 +107,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul key.primitive_restart_index == 0xFFFFFFFF, "Primitive restart index other than -1 is not supported yet"); + const vk::PipelineTessellationStateCreateInfo tessellation_state = { + .patchControlPoints = key.patch_control_points, + }; + const vk::PipelineRasterizationStateCreateInfo raster_state = { .depthClampEnable = false, .rasterizerDiscardEnable = false, @@ -204,7 +209,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul boost::container::static_vector shader_stages; - auto stage = u32(Shader::Stage::Vertex); + auto stage = u32(Shader::LogicalStage::Vertex); if (infos[stage]) { shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eVertex, @@ -212,7 +217,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pName = "main", }); } - stage = u32(Shader::Stage::Geometry); + stage = u32(Shader::LogicalStage::Geometry); if (infos[stage]) { shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eGeometry, @@ -220,7 +225,23 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pName = "main", }); } - stage = u32(Shader::Stage::Fragment); + stage = u32(Shader::LogicalStage::TessellationControl); + if (infos[stage]) { + shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eTessellationControl, + .module = modules[stage], + .pName = "main", + }); + } + stage = u32(Shader::LogicalStage::TessellationEval); + if (infos[stage]) { + shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eTessellationEvaluation, + .module = modules[stage], + .pName = "main", + }); + } + stage = u32(Shader::LogicalStage::Fragment); if (infos[stage]) { shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eFragment, @@ -301,6 +322,8 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pStages = shader_stages.data(), .pVertexInputState = !instance.IsVertexInputDynamicState() ? &vertex_input_info : nullptr, .pInputAssemblyState = &input_assembly, + .pTessellationState = + stages[u32(Shader::LogicalStage::TessellationControl)] ? &tessellation_state : nullptr, .pViewportState = &viewport_info, .pRasterizationState = &raster_state, .pMultisampleState = &multisampling, @@ -327,7 +350,6 @@ void GraphicsPipeline::BuildDescSetLayout() { if (!stage) { continue; } - if (stage->has_readconst) { bindings.push_back({ .binding = binding++, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 703a0680e..444c8517e 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -52,6 +52,7 @@ struct GraphicsPipelineKey { std::array blend_controls; std::array write_masks; std::array vertex_buffer_formats; + u32 patch_control_points; bool operator==(const GraphicsPipelineKey& key) const noexcept { return std::memcmp(this, &key, sizeof(key)) == 0; @@ -73,7 +74,7 @@ public: bool IsEmbeddedVs() const noexcept { static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f; - return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash; + return key.stage_hashes[u32(Shader::LogicalStage::Vertex)] == EmbeddedVsHash; } auto GetWriteMasks() const { diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index e844150b2..76efb215d 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -327,6 +327,7 @@ bool Instance::CreateDevice() { .imageCubeArray = features.imageCubeArray, .independentBlend = features.independentBlend, .geometryShader = features.geometryShader, + .tessellationShader = features.tessellationShader, .logicOp = features.logicOp, .depthBiasClamp = features.depthBiasClamp, .fillModeNonSolid = features.fillModeNonSolid, @@ -378,6 +379,7 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT{ .extendedDynamicState = true, }, + vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT{}, vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT{ .extendedDynamicState3ColorWriteMask = true, }, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ff27b742f..58473496f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -22,6 +22,8 @@ extern std::unique_ptr presenter; namespace Vulkan { +using Shader::LogicalStage; +using Shader::Stage; using Shader::VsOutput; constexpr static std::array DescriptorHeapSizes = { @@ -78,7 +80,7 @@ void GatherVertexOutputs(Shader::VertexRuntimeInfo& info, : (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None)); } -Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { +Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_stage) { auto info = Shader::RuntimeInfo{stage}; const auto& regs = liverpool->regs; const auto BuildCommon = [&](const auto& program) { @@ -89,20 +91,47 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { info.fp_round_mode32 = program.settings.fp_round_mode32; }; switch (stage) { - case Shader::Stage::Export: { + case Stage::Local: { + BuildCommon(regs.ls_program); + if (regs.stage_enable.IsStageEnabled(static_cast(Stage::Hull))) { + info.ls_info.links_with_tcs = true; + Shader::TessellationDataConstantBuffer tess_constants; + const auto* pgm = regs.ProgramForStage(static_cast(Stage::Hull)); + const auto params = Liverpool::GetParams(*pgm); + const auto& hull_info = program_cache.at(params.hash)->info; + hull_info.ReadTessConstantBuffer(tess_constants); + info.ls_info.ls_stride = tess_constants.ls_stride; + } + break; + } + case Stage::Hull: { + BuildCommon(regs.hs_program); + info.hs_info.num_input_control_points = regs.ls_hs_config.hs_input_control_points.Value(); + info.hs_info.num_threads = regs.ls_hs_config.hs_output_control_points.Value(); + info.hs_info.tess_type = regs.tess_config.type; + + // We need to initialize most hs_info fields after finding the V# with tess constants + break; + } + case Stage::Export: { BuildCommon(regs.es_program); info.es_info.vertex_data_size = regs.vgt_esgs_ring_itemsize; break; } - case Shader::Stage::Vertex: { + case Stage::Vertex: { BuildCommon(regs.vs_program); GatherVertexOutputs(info.vs_info, regs.vs_output_control); info.vs_info.emulate_depth_negative_one_to_one = !instance.IsDepthClipControlSupported() && regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW; + if (l_stage == LogicalStage::TessellationEval) { + info.vs_info.tess_type = regs.tess_config.type; + info.vs_info.tess_topology = regs.tess_config.topology; + info.vs_info.tess_partitioning = regs.tess_config.partitioning; + } break; } - case Shader::Stage::Geometry: { + case Stage::Geometry: { BuildCommon(regs.gs_program); auto& gs_info = info.gs_info; gs_info.output_vertices = regs.vgt_gs_max_vert_out; @@ -121,7 +150,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { DumpShader(gs_info.vs_copy, gs_info.vs_copy_hash, Shader::Stage::Vertex, 0, "copy.bin"); break; } - case Shader::Stage::Fragment: { + case Stage::Fragment: { BuildCommon(regs.ps_program); info.fs_info.en_flags = regs.ps_input_ena; info.fs_info.addr_flags = regs.ps_input_addr; @@ -143,7 +172,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { } break; } - case Shader::Stage::Compute: { + case Stage::Compute: { const auto& cs_pgm = regs.cs_program; info.num_user_data = cs_pgm.settings.num_user_regs; info.num_allocated_vgprs = regs.cs_program.settings.num_vgprs * 4; @@ -277,6 +306,11 @@ bool PipelineCache::RefreshGraphicsKey() { key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard); key.vertex_buffer_formats.fill(vk::Format::eUndefined); + key.patch_control_points = 0; + if (regs.stage_enable.hs_en.Value()) { + key.patch_control_points = regs.ls_hs_config.hs_input_control_points.Value(); + } + // First pass of bindings check to idenitfy formats and swizzles and pass them to rhe shader // recompiler. for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { @@ -305,7 +339,7 @@ bool PipelineCache::RefreshGraphicsKey() { fetch_shader = std::nullopt; Shader::Backend::Bindings binding{}; - const auto& TryBindStageRemap = [&](Shader::Stage stage_in, Shader::Stage stage_out) -> bool { + const auto& TryBindStage = [&](Shader::Stage stage_in, Shader::LogicalStage stage_out) -> bool { const auto stage_in_idx = static_cast(stage_in); const auto stage_out_idx = static_cast(stage_out); if (!regs.stage_enable.IsStageEnabled(stage_in_idx)) { @@ -332,23 +366,23 @@ bool PipelineCache::RefreshGraphicsKey() { auto params = Liverpool::GetParams(*pgm); std::optional fetch_shader_; std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_, - key.stage_hashes[stage_out_idx]) = GetProgram(stage_in, params, binding); + key.stage_hashes[stage_out_idx]) = + GetProgram(stage_in, stage_out, params, binding); if (fetch_shader_) { fetch_shader = fetch_shader_; } return true; }; - const auto& TryBindStage = [&](Shader::Stage stage) { return TryBindStageRemap(stage, stage); }; - const auto& IsGsFeaturesSupported = [&]() -> bool { // These checks are temporary until all functionality is implemented. return !regs.vgt_gs_mode.onchip && !regs.vgt_strmout_config.raw; }; - TryBindStage(Shader::Stage::Fragment); + infos.fill(nullptr); + TryBindStage(Stage::Fragment, LogicalStage::Fragment); - const auto* fs_info = infos[static_cast(Shader::Stage::Fragment)]; + const auto* fs_info = infos[static_cast(LogicalStage::Fragment)]; key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u; switch (regs.stage_enable.raw) { @@ -356,22 +390,36 @@ bool PipelineCache::RefreshGraphicsKey() { if (!instance.IsGeometryStageSupported() || !IsGsFeaturesSupported()) { return false; } - if (!TryBindStageRemap(Shader::Stage::Export, Shader::Stage::Vertex)) { + if (!TryBindStage(Stage::Export, LogicalStage::Vertex)) { return false; } - if (!TryBindStage(Shader::Stage::Geometry)) { + if (!TryBindStage(Stage::Geometry, LogicalStage::Geometry)) { + return false; + } + break; + } + case Liverpool::ShaderStageEnable::VgtStages::LsHs: { + if (!instance.IsTessellationSupported()) { + break; + } + if (!TryBindStage(Stage::Hull, LogicalStage::TessellationControl)) { + return false; + } + if (!TryBindStage(Stage::Vertex, LogicalStage::TessellationEval)) { + return false; + } + if (!TryBindStage(Stage::Local, LogicalStage::Vertex)) { return false; } break; } default: { - TryBindStage(Shader::Stage::Vertex); - infos[static_cast(Shader::Stage::Geometry)] = nullptr; + TryBindStage(Stage::Vertex, LogicalStage::Vertex); break; } } - const auto vs_info = infos[static_cast(Shader::Stage::Vertex)]; + const auto vs_info = infos[static_cast(Shader::LogicalStage::Vertex)]; if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) { u32 vertex_binding = 0; for (const auto& attrib : fetch_shader->attributes) { @@ -424,19 +472,18 @@ bool PipelineCache::RefreshGraphicsKey() { key.num_samples = num_samples; return true; -} +} // namespace Vulkan bool PipelineCache::RefreshComputeKey() { Shader::Backend::Bindings binding{}; const auto* cs_pgm = &liverpool->regs.cs_program; const auto cs_params = Liverpool::GetParams(*cs_pgm); std::tie(infos[0], modules[0], fetch_shader, compute_key.value) = - GetProgram(Shader::Stage::Compute, cs_params, binding); + GetProgram(Shader::Stage::Compute, LogicalStage::Compute, cs_params, binding); return true; } -vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, - const Shader::RuntimeInfo& runtime_info, +vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info, std::span code, size_t perm_idx, Shader::Backend::Bindings& binding) { LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash, @@ -461,19 +508,19 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, const auto name = fmt::format("{}_{:#018x}_{}", info.stage, info.pgm_hash, perm_idx); Vulkan::SetObjectName(instance.GetDevice(), module, name); if (Config::collectShadersForDebug()) { - DebugState.CollectShader(name, module, spv, code, patch ? *patch : std::span{}, - is_patched); + DebugState.CollectShader(name, info.l_stage, module, spv, code, + patch ? *patch : std::span{}, is_patched); } return module; } -std::tuple, u64> -PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params, - Shader::Backend::Bindings& binding) { - const auto runtime_info = BuildRuntimeInfo(stage); +PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stage, + Shader::ShaderParams params, + Shader::Backend::Bindings& binding) { + auto runtime_info = BuildRuntimeInfo(stage, l_stage); auto [it_pgm, new_program] = program_cache.try_emplace(params.hash); if (new_program) { - it_pgm.value() = std::make_unique(stage, params); + it_pgm.value() = std::make_unique(stage, l_stage, params); auto& program = it_pgm.value(); auto start = binding; const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding); @@ -492,7 +539,7 @@ PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params, const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec); if (it == program->modules.end()) { - auto new_info = Shader::Info(stage, params); + auto new_info = Shader::Info(stage, l_stage, params); module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding); program->AddPermut(module, std::move(spec)); } else { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index c5c2fc98e..ec4406448 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -34,11 +34,13 @@ struct Program { vk::ShaderModule module; Shader::StageSpecialization spec; }; + using ModuleList = boost::container::small_vector; Shader::Info info; - boost::container::small_vector modules; + ModuleList modules; - explicit Program(Shader::Stage stage, Shader::ShaderParams params) : info{stage, params} {} + explicit Program(Shader::Stage stage, Shader::LogicalStage l_stage, Shader::ShaderParams params) + : info{stage, l_stage, params} {} void AddPermut(vk::ShaderModule module, const Shader::StageSpecialization&& spec) { modules.emplace_back(module, std::move(spec)); @@ -55,10 +57,10 @@ public: const ComputePipeline* GetComputePipeline(); - std::tuple, - u64> - GetProgram(Shader::Stage stage, Shader::ShaderParams params, - Shader::Backend::Bindings& binding); + using Result = std::tuple, u64>; + Result GetProgram(Shader::Stage stage, Shader::LogicalStage l_stage, + Shader::ShaderParams params, Shader::Backend::Bindings& binding); std::optional ReplaceShader(vk::ShaderModule module, std::span spv_code); @@ -71,10 +73,10 @@ private: std::string_view ext); std::optional> GetShaderPatch(u64 hash, Shader::Stage stage, size_t perm_idx, std::string_view ext); - vk::ShaderModule CompileModule(Shader::Info& info, const Shader::RuntimeInfo& runtime_info, + vk::ShaderModule CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info, std::span code, size_t perm_idx, Shader::Backend::Bindings& binding); - Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage); + Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage, Shader::LogicalStage l_stage); private: const Instance& instance; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.h b/src/video_core/renderer_vulkan/vk_pipeline_common.h index 8c48c83f7..1b13a1797 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_common.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.h @@ -14,9 +14,10 @@ class BufferCache; namespace Vulkan { -static constexpr auto gp_stage_flags = vk::ShaderStageFlagBits::eVertex | - vk::ShaderStageFlagBits::eGeometry | - vk::ShaderStageFlagBits::eFragment; +static constexpr auto gp_stage_flags = + vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eTessellationControl | + vk::ShaderStageFlagBits::eTessellationEvaluation | vk::ShaderStageFlagBits::eGeometry | + vk::ShaderStageFlagBits::eFragment; class Instance; class Scheduler; @@ -37,6 +38,7 @@ public: } auto GetStages() const { + static_assert(static_cast(Shader::LogicalStage::Compute) == Shader::MaxStageTypes - 1); if (is_compute) { return std::span{stages.cend() - 1, stages.cend()}; } else { @@ -44,7 +46,7 @@ public: } } - const Shader::Info& GetStage(Shader::Stage stage) const noexcept { + const Shader::Info& GetStage(Shader::LogicalStage stage) const noexcept { return *stages[u32(stage)]; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index eb2ef3600..fef4c7ec5 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -4,6 +4,7 @@ #include "common/config.h" #include "common/debug.h" #include "core/memory.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" @@ -48,10 +49,6 @@ void Rasterizer::CpSync() { bool Rasterizer::FilterDraw() { const auto& regs = liverpool->regs; - // Tessellation is unsupported so skip the draw to avoid locking up the driver. - if (regs.primitive_type == AmdGpu::PrimitiveType::PatchPrimitive) { - return false; - } // There are several cases (e.g. FCE, FMask/HTile decompression) where we don't need to do an // actual draw hence can skip pipeline creation. if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::EliminateFastClear) { @@ -214,7 +211,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { return; } - const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); + const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex); const auto& fetch_shader = pipeline->GetFetchShader(); buffer_cache.BindVertexBuffers(vs_info, fetch_shader); const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset); @@ -271,7 +268,7 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 return; } - const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); + const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex); const auto& fetch_shader = pipeline->GetFetchShader(); buffer_cache.BindVertexBuffers(vs_info, fetch_shader); buffer_cache.BindIndexBuffer(is_indexed, 0); @@ -326,7 +323,7 @@ void Rasterizer::DispatchDirect() { return; } - const auto& cs = pipeline->GetStage(Shader::Stage::Compute); + const auto& cs = pipeline->GetStage(Shader::LogicalStage::Compute); if (ExecuteShaderHLE(cs, liverpool->regs, *this)) { return; } @@ -387,7 +384,7 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { const auto& regs = liverpool->regs; if (pipeline->IsCompute()) { - const auto& info = pipeline->GetStage(Shader::Stage::Compute); + const auto& info = pipeline->GetStage(Shader::LogicalStage::Compute); // Most of the time when a metadata is updated with a shader it gets cleared. It means // we can skip the whole dispatch and update the tracked state instead. Also, it is not From cafd40f2c2f2d0062979ad1ec12b6d755eeb4e81 Mon Sep 17 00:00:00 2001 From: Vladislav Mikhalin Date: Sat, 14 Dec 2024 15:33:06 +0300 Subject: [PATCH 08/31] DmaData and Recompiler fixes (#1775) * liverpool: fix dmadata packet handling * recompiler: emit a label right after s_branch to prevent dead code interferrence * specialize barriers --- .../frontend/control_flow_graph.cpp | 1 + src/video_core/amdgpu/liverpool.cpp | 26 ++-- src/video_core/buffer_cache/buffer_cache.cpp | 124 +++++++++++++++++- src/video_core/buffer_cache/buffer_cache.h | 1 + .../renderer_vulkan/vk_rasterizer.cpp | 4 + .../renderer_vulkan/vk_rasterizer.h | 1 + 6 files changed, 140 insertions(+), 17 deletions(-) diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index 8c3122b28..1fb129f6c 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -80,6 +80,7 @@ void CFG::EmitLabels() { if (inst.IsUnconditionalBranch()) { const u32 target = inst.BranchTarget(pc); AddLabel(target); + AddLabel(pc + inst.length); } else if (inst.IsConditionalBranch()) { const u32 true_label = inst.BranchTarget(pc); const u32 false_label = pc + inst.length; diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 8db2d63c4..820903ab7 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -573,21 +573,21 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spansrc_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Gds) { - rasterizer->InlineData(dma_data->dst_addr_lo, - dma_data->SrcAddress(), - dma_data->NumBytes(), true); + rasterizer->CopyBuffer(dma_data->dst_addr_lo, dma_data->SrcAddress(), + dma_data->NumBytes(), true, false); } else if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Memory) { rasterizer->InlineData(dma_data->DstAddress(), &dma_data->data, sizeof(u32), false); } else if (dma_data->src_sel == DmaDataSrc::Gds && dma_data->dst_sel == DmaDataDst::Memory) { - // LOG_WARNING(Render_Vulkan, "GDS memory read"); + rasterizer->CopyBuffer(dma_data->DstAddress(), dma_data->src_addr_lo, + dma_data->NumBytes(), false, true); } else if (dma_data->src_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Memory) { - rasterizer->InlineData(dma_data->DstAddress(), - dma_data->SrcAddress(), - dma_data->NumBytes(), false); + rasterizer->CopyBuffer(dma_data->DstAddress(), + dma_data->SrcAddress(), dma_data->NumBytes(), + false, false); } else { UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}", u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value())); @@ -731,20 +731,20 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { rasterizer->InlineData(dma_data->dst_addr_lo, &dma_data->data, sizeof(u32), true); } else if (dma_data->src_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Gds) { - rasterizer->InlineData(dma_data->dst_addr_lo, dma_data->SrcAddress(), - dma_data->NumBytes(), true); + rasterizer->CopyBuffer(dma_data->dst_addr_lo, dma_data->SrcAddress(), + dma_data->NumBytes(), true, false); } else if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Memory) { rasterizer->InlineData(dma_data->DstAddress(), &dma_data->data, sizeof(u32), false); } else if (dma_data->src_sel == DmaDataSrc::Gds && dma_data->dst_sel == DmaDataDst::Memory) { - // LOG_WARNING(Render_Vulkan, "GDS memory read"); + rasterizer->CopyBuffer(dma_data->DstAddress(), dma_data->src_addr_lo, + dma_data->NumBytes(), false, true); } else if (dma_data->src_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Memory) { - rasterizer->InlineData(dma_data->DstAddress(), - dma_data->SrcAddress(), dma_data->NumBytes(), - false); + rasterizer->CopyBuffer(dma_data->DstAddress(), dma_data->SrcAddress(), + dma_data->NumBytes(), false, false); } else { UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}", u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value())); diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index e9fc06493..31b2a2c58 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -312,8 +312,23 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo const BufferId buffer_id = FindBuffer(address, num_bytes); return &slot_buffers[buffer_id]; }(); - const vk::BufferMemoryBarrier2 buf_barrier = { - .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + const vk::BufferMemoryBarrier2 buf_barrier_before = { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = buffer->Handle(), + .offset = buffer->Offset(address), + .size = num_bytes, + }; + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &buf_barrier_before, + }); + cmdbuf.updateBuffer(buffer->Handle(), buffer->Offset(address), num_bytes, value); + const vk::BufferMemoryBarrier2 buf_barrier_after = { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, @@ -324,9 +339,96 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo cmdbuf.pipelineBarrier2(vk::DependencyInfo{ .dependencyFlags = vk::DependencyFlagBits::eByRegion, .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &buf_barrier, + .pBufferMemoryBarriers = &buf_barrier_after, + }); +} + +void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) { + if (!dst_gds && !IsRegionRegistered(dst, num_bytes)) { + if (!src_gds && !IsRegionRegistered(src, num_bytes)) { + // Both buffers were not transferred to GPU yet. Can safely copy in host memory. + memcpy(std::bit_cast(dst), std::bit_cast(src), num_bytes); + return; + } + // Without a readback there's nothing we can do with this + // Fallback to creating dst buffer on GPU to at least have this data there + } + if (!src_gds && !IsRegionRegistered(src, num_bytes)) { + InlineData(dst, std::bit_cast(src), num_bytes, dst_gds); + return; + } + auto& src_buffer = [&] -> const Buffer& { + if (src_gds) { + return gds_buffer; + } + const BufferId buffer_id = FindBuffer(src, num_bytes); + return slot_buffers[buffer_id]; + }(); + auto& dst_buffer = [&] -> const Buffer& { + if (dst_gds) { + return gds_buffer; + } + const BufferId buffer_id = FindBuffer(dst, num_bytes); + return slot_buffers[buffer_id]; + }(); + vk::BufferCopy region{ + .srcOffset = src_buffer.Offset(src), + .dstOffset = dst_buffer.Offset(dst), + .size = num_bytes, + }; + const vk::BufferMemoryBarrier2 buf_barriers_before[2] = { + { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = dst_buffer.Handle(), + .offset = dst_buffer.Offset(dst), + .size = num_bytes, + }, + { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eTransferRead, + .buffer = src_buffer.Handle(), + .offset = src_buffer.Offset(src), + .size = num_bytes, + }, + }; + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 2, + .pBufferMemoryBarriers = buf_barriers_before, + }); + cmdbuf.copyBuffer(src_buffer.Handle(), dst_buffer.Handle(), region); + const vk::BufferMemoryBarrier2 buf_barriers_after[2] = { + { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, + .buffer = dst_buffer.Handle(), + .offset = dst_buffer.Offset(dst), + .size = num_bytes, + }, + { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eTransferRead, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryWrite, + .buffer = src_buffer.Handle(), + .offset = src_buffer.Offset(src), + .size = num_bytes, + }, + }; + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 2, + .pBufferMemoryBarriers = buf_barriers_after, }); - cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value); } std::pair BufferCache::ObtainHostUBO(std::span data) { @@ -701,8 +803,22 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, scheduler.EndRendering(); image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); const auto cmdbuf = scheduler.CommandBuffer(); + static constexpr vk::MemoryBarrier READ_BARRIER{ + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, + }; + static constexpr vk::MemoryBarrier WRITE_BARRIER{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + }; + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, READ_BARRIER, {}, {}); cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, copies); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); } return true; } diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index e62913413..4c57e9c29 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -87,6 +87,7 @@ public: /// Writes a value to GPU buffer. void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); + void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds); [[nodiscard]] std::pair ObtainHostUBO(std::span data); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index fef4c7ec5..9e9b40ca5 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -840,6 +840,10 @@ void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, boo buffer_cache.InlineData(address, value, num_bytes, is_gds); } +void Rasterizer::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) { + buffer_cache.CopyBuffer(dst, src, num_bytes, dst_gds, src_gds); +} + u32 Rasterizer::ReadDataFromGds(u32 gds_offset) { auto* gds_buf = buffer_cache.GetGdsBuffer(); u32 value; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index ec1b5e134..b5bead697 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -53,6 +53,7 @@ public: void ScopedMarkerInsertColor(const std::string_view& str, const u32 color); void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); + void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds); u32 ReadDataFromGds(u32 gsd_offset); bool InvalidateMemory(VAddr addr, u64 size); bool IsMapped(VAddr addr, u64 size); From e752f04cde25941818e141cc933ffa380cc321e5 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sat, 14 Dec 2024 04:33:24 -0800 Subject: [PATCH 09/31] shader_recompiler: Fixups from stencil changes (#1776) --- src/shader_recompiler/frontend/translate/export.cpp | 2 +- src/video_core/texture_cache/image_view.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index f4914577d..5927aa696 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -13,7 +13,7 @@ void Translator::EmitExport(const GcnInst& inst) { const auto& exp = inst.control.exp; const IR::Attribute attrib{exp.target}; - if (attrib == IR::Attribute::Depth && exp.en != 1) { + if (attrib == IR::Attribute::Depth && exp.en != 0 && exp.en != 1) { LOG_WARNING(Render_Vulkan, "Unsupported depth export"); return; } diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 41c45019e..ec1fda0d8 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -131,7 +131,8 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info format = image.info.pixel_format; aspect = vk::ImageAspectFlagBits::eDepth; } - if (image.aspect_mask & vk::ImageAspectFlagBits::eStencil && format == vk::Format::eR8Uint) { + if (image.aspect_mask & vk::ImageAspectFlagBits::eStencil && + (format == vk::Format::eR8Uint || format == vk::Format::eR8Unorm)) { format = image.info.pixel_format; aspect = vk::ImageAspectFlagBits::eStencil; } From 27447537c3e846e0da923fdb1525e4253b555849 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sat, 14 Dec 2024 06:12:41 -0800 Subject: [PATCH 10/31] externals: Update sirit to fix debug assert (#1783) --- externals/sirit | 2 +- src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/externals/sirit b/externals/sirit index 5b5ff49a5..1e74f4ef8 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit 5b5ff49a58f5be27af1058794c6ca907dabc05b3 +Subproject commit 1e74f4ef8d2a0e3221a4de51977663f342b53c35 diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 8da9280d0..e5d4f3077 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -247,7 +247,7 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id ImageOperands operands; if (ctx.profile.supports_image_load_store_lod) { operands.Add(spv::ImageOperandsMask::Lod, lod); - } else if (lod.value != 0) { + } else if (Sirit::ValidId(lod)) { LOG_WARNING(Render, "Image write with LOD not supported by driver"); } ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color), operands.mask, From e9ede8d62749d2697c0b807296846d132acf4919 Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Sat, 14 Dec 2024 16:17:14 +0200 Subject: [PATCH 11/31] Revert "DmaData and Recompiler fixes (#1775)" (#1784) This reverts commit cafd40f2c2f2d0062979ad1ec12b6d755eeb4e81. --- .../frontend/control_flow_graph.cpp | 1 - src/video_core/amdgpu/liverpool.cpp | 26 ++-- src/video_core/buffer_cache/buffer_cache.cpp | 124 +----------------- src/video_core/buffer_cache/buffer_cache.h | 1 - .../renderer_vulkan/vk_rasterizer.cpp | 4 - .../renderer_vulkan/vk_rasterizer.h | 1 - 6 files changed, 17 insertions(+), 140 deletions(-) diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index 1fb129f6c..8c3122b28 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -80,7 +80,6 @@ void CFG::EmitLabels() { if (inst.IsUnconditionalBranch()) { const u32 target = inst.BranchTarget(pc); AddLabel(target); - AddLabel(pc + inst.length); } else if (inst.IsConditionalBranch()) { const u32 true_label = inst.BranchTarget(pc); const u32 false_label = pc + inst.length; diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 820903ab7..8db2d63c4 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -573,21 +573,21 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spansrc_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Gds) { - rasterizer->CopyBuffer(dma_data->dst_addr_lo, dma_data->SrcAddress(), - dma_data->NumBytes(), true, false); + rasterizer->InlineData(dma_data->dst_addr_lo, + dma_data->SrcAddress(), + dma_data->NumBytes(), true); } else if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Memory) { rasterizer->InlineData(dma_data->DstAddress(), &dma_data->data, sizeof(u32), false); } else if (dma_data->src_sel == DmaDataSrc::Gds && dma_data->dst_sel == DmaDataDst::Memory) { - rasterizer->CopyBuffer(dma_data->DstAddress(), dma_data->src_addr_lo, - dma_data->NumBytes(), false, true); + // LOG_WARNING(Render_Vulkan, "GDS memory read"); } else if (dma_data->src_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Memory) { - rasterizer->CopyBuffer(dma_data->DstAddress(), - dma_data->SrcAddress(), dma_data->NumBytes(), - false, false); + rasterizer->InlineData(dma_data->DstAddress(), + dma_data->SrcAddress(), + dma_data->NumBytes(), false); } else { UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}", u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value())); @@ -731,20 +731,20 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { rasterizer->InlineData(dma_data->dst_addr_lo, &dma_data->data, sizeof(u32), true); } else if (dma_data->src_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Gds) { - rasterizer->CopyBuffer(dma_data->dst_addr_lo, dma_data->SrcAddress(), - dma_data->NumBytes(), true, false); + rasterizer->InlineData(dma_data->dst_addr_lo, dma_data->SrcAddress(), + dma_data->NumBytes(), true); } else if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Memory) { rasterizer->InlineData(dma_data->DstAddress(), &dma_data->data, sizeof(u32), false); } else if (dma_data->src_sel == DmaDataSrc::Gds && dma_data->dst_sel == DmaDataDst::Memory) { - rasterizer->CopyBuffer(dma_data->DstAddress(), dma_data->src_addr_lo, - dma_data->NumBytes(), false, true); + // LOG_WARNING(Render_Vulkan, "GDS memory read"); } else if (dma_data->src_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Memory) { - rasterizer->CopyBuffer(dma_data->DstAddress(), dma_data->SrcAddress(), - dma_data->NumBytes(), false, false); + rasterizer->InlineData(dma_data->DstAddress(), + dma_data->SrcAddress(), dma_data->NumBytes(), + false); } else { UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}", u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value())); diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 31b2a2c58..e9fc06493 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -312,23 +312,8 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo const BufferId buffer_id = FindBuffer(address, num_bytes); return &slot_buffers[buffer_id]; }(); - const vk::BufferMemoryBarrier2 buf_barrier_before = { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, - .buffer = buffer->Handle(), - .offset = buffer->Offset(address), - .size = num_bytes, - }; - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &buf_barrier_before, - }); - cmdbuf.updateBuffer(buffer->Handle(), buffer->Offset(address), num_bytes, value); - const vk::BufferMemoryBarrier2 buf_barrier_after = { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + const vk::BufferMemoryBarrier2 buf_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, @@ -339,96 +324,9 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo cmdbuf.pipelineBarrier2(vk::DependencyInfo{ .dependencyFlags = vk::DependencyFlagBits::eByRegion, .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &buf_barrier_after, - }); -} - -void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) { - if (!dst_gds && !IsRegionRegistered(dst, num_bytes)) { - if (!src_gds && !IsRegionRegistered(src, num_bytes)) { - // Both buffers were not transferred to GPU yet. Can safely copy in host memory. - memcpy(std::bit_cast(dst), std::bit_cast(src), num_bytes); - return; - } - // Without a readback there's nothing we can do with this - // Fallback to creating dst buffer on GPU to at least have this data there - } - if (!src_gds && !IsRegionRegistered(src, num_bytes)) { - InlineData(dst, std::bit_cast(src), num_bytes, dst_gds); - return; - } - auto& src_buffer = [&] -> const Buffer& { - if (src_gds) { - return gds_buffer; - } - const BufferId buffer_id = FindBuffer(src, num_bytes); - return slot_buffers[buffer_id]; - }(); - auto& dst_buffer = [&] -> const Buffer& { - if (dst_gds) { - return gds_buffer; - } - const BufferId buffer_id = FindBuffer(dst, num_bytes); - return slot_buffers[buffer_id]; - }(); - vk::BufferCopy region{ - .srcOffset = src_buffer.Offset(src), - .dstOffset = dst_buffer.Offset(dst), - .size = num_bytes, - }; - const vk::BufferMemoryBarrier2 buf_barriers_before[2] = { - { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, - .buffer = dst_buffer.Handle(), - .offset = dst_buffer.Offset(dst), - .size = num_bytes, - }, - { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = vk::AccessFlagBits2::eMemoryWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eTransferRead, - .buffer = src_buffer.Handle(), - .offset = src_buffer.Offset(src), - .size = num_bytes, - }, - }; - scheduler.EndRendering(); - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 2, - .pBufferMemoryBarriers = buf_barriers_before, - }); - cmdbuf.copyBuffer(src_buffer.Handle(), dst_buffer.Handle(), region); - const vk::BufferMemoryBarrier2 buf_barriers_after[2] = { - { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, - .buffer = dst_buffer.Handle(), - .offset = dst_buffer.Offset(dst), - .size = num_bytes, - }, - { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = vk::AccessFlagBits2::eTransferRead, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eMemoryWrite, - .buffer = src_buffer.Handle(), - .offset = src_buffer.Offset(src), - .size = num_bytes, - }, - }; - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 2, - .pBufferMemoryBarriers = buf_barriers_after, + .pBufferMemoryBarriers = &buf_barrier, }); + cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value); } std::pair BufferCache::ObtainHostUBO(std::span data) { @@ -803,22 +701,8 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, scheduler.EndRendering(); image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); const auto cmdbuf = scheduler.CommandBuffer(); - static constexpr vk::MemoryBarrier READ_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, - }; - static constexpr vk::MemoryBarrier WRITE_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, - }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eTransfer, - vk::DependencyFlagBits::eByRegion, READ_BARRIER, {}, {}); cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, copies); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eTransfer, - vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); } return true; } diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 4c57e9c29..e62913413 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -87,7 +87,6 @@ public: /// Writes a value to GPU buffer. void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); - void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds); [[nodiscard]] std::pair ObtainHostUBO(std::span data); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 9e9b40ca5..fef4c7ec5 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -840,10 +840,6 @@ void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, boo buffer_cache.InlineData(address, value, num_bytes, is_gds); } -void Rasterizer::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) { - buffer_cache.CopyBuffer(dst, src, num_bytes, dst_gds, src_gds); -} - u32 Rasterizer::ReadDataFromGds(u32 gds_offset) { auto* gds_buf = buffer_cache.GetGdsBuffer(); u32 value; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index b5bead697..ec1b5e134 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -53,7 +53,6 @@ public: void ScopedMarkerInsertColor(const std::string_view& str, const u32 color); void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); - void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds); u32 ReadDataFromGds(u32 gsd_offset); bool InvalidateMemory(VAddr addr, u64 size); bool IsMapped(VAddr addr, u64 size); From 8b88344679af4a45e7d5e35089e778b463eac13b Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sat, 14 Dec 2024 12:46:19 -0800 Subject: [PATCH 12/31] vk_instance: Remove unused dynamic state 2 features struct (#1791) --- src/video_core/renderer_vulkan/vk_instance.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 76efb215d..d7bfaee4e 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -379,7 +379,6 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT{ .extendedDynamicState = true, }, - vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT{}, vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT{ .extendedDynamicState3ColorWriteMask = true, }, From f93677b95371a83db695151341d4629e133d2203 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sat, 14 Dec 2024 12:46:35 -0800 Subject: [PATCH 13/31] resource_tracking_pass: Fix converting dimensions to float for normalization. (#1790) --- .../ir/passes/resource_tracking_pass.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index f436db07a..a59398952 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -586,12 +586,13 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info, const auto dimensions = unnormalized ? ir.ImageQueryDimension(ir.Imm32(image_binding), ir.Imm32(0u), ir.Imm1(false)) : IR::Value{}; - const auto get_coord = [&](u32 idx, u32 dim_idx) -> IR::Value { - const auto coord = get_addr_reg(idx); + const auto get_coord = [&](u32 coord_idx, u32 dim_idx) -> IR::Value { + const auto coord = get_addr_reg(coord_idx); if (unnormalized) { // Normalize the coordinate for sampling, dividing by its corresponding dimension. - return ir.FPDiv(coord, - ir.BitCast(IR::U32{ir.CompositeExtract(dimensions, dim_idx)})); + const auto dim = + ir.ConvertUToF(32, 32, IR::U32{ir.CompositeExtract(dimensions, dim_idx)}); + return ir.FPDiv(coord, dim); } return coord; }; From 876445faf1b0ef63ddb9d0111e35b74bd31b4a42 Mon Sep 17 00:00:00 2001 From: Vladislav Mikhalin Date: Sat, 14 Dec 2024 23:46:55 +0300 Subject: [PATCH 14/31] recompiler: emit a label right after s_branch to prevent dead code interferrence (#1785) --- src/shader_recompiler/frontend/control_flow_graph.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index 8c3122b28..0816ec088 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -80,6 +80,8 @@ void CFG::EmitLabels() { if (inst.IsUnconditionalBranch()) { const u32 target = inst.BranchTarget(pc); AddLabel(target); + // Emit this label so that the block ends with s_branch instruction + AddLabel(pc + inst.length); } else if (inst.IsConditionalBranch()) { const u32 true_label = inst.BranchTarget(pc); const u32 false_label = pc + inst.length; From af26c945b10c400f9720dbb29857876867a57c35 Mon Sep 17 00:00:00 2001 From: Connor Garey Date: Sat, 14 Dec 2024 22:30:17 +0000 Subject: [PATCH 15/31] Fix for "shadPS4" not being given on Linux volume mixers (#1789) --- src/sdl_window.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/sdl_window.cpp b/src/sdl_window.cpp index f6b57436f..4b13844b8 100644 --- a/src/sdl_window.cpp +++ b/src/sdl_window.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include #include #include @@ -68,6 +69,9 @@ static Uint32 SDLCALL PollController(void* userdata, SDL_TimerID timer_id, Uint3 WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_, std::string_view window_title) : width{width_}, height{height_}, controller{controller_} { + if (!SDL_SetHint(SDL_HINT_APP_NAME, "shadPS4")) { + UNREACHABLE_MSG("Failed to set SDL window hint: {}", SDL_GetError()); + } if (!SDL_Init(SDL_INIT_VIDEO)) { UNREACHABLE_MSG("Failed to initialize SDL video subsystem: {}", SDL_GetError()); } From 0fd1ab674bbb6b41f5cc6d46ca5ed4bcd6d6052c Mon Sep 17 00:00:00 2001 From: psucien <168137814+psucien@users.noreply.github.com> Date: Sat, 14 Dec 2024 23:54:46 +0100 Subject: [PATCH 16/31] GPU processor refactoring (#1787) * coroutine code prettification * asc queues submission refactoring * better asc ring context handling * final touches and review notes * even more simplification for context saving --- src/common/debug.h | 3 + src/core/debug_state.cpp | 68 +++--- src/core/debug_state.h | 8 +- src/core/libraries/gnmdriver/gnmdriver.cpp | 36 ++-- .../backend/spirv/spirv_emit_context.cpp | 2 +- src/video_core/amdgpu/liverpool.cpp | 204 ++++++++++-------- src/video_core/amdgpu/liverpool.h | 25 ++- .../renderer_vulkan/vk_pipeline_cache.cpp | 9 +- .../renderer_vulkan/vk_rasterizer.cpp | 6 +- .../renderer_vulkan/vk_shader_hle.cpp | 16 +- .../renderer_vulkan/vk_shader_hle.h | 2 +- src/video_core/texture_cache/tile_manager.cpp | 1 + 12 files changed, 234 insertions(+), 146 deletions(-) diff --git a/src/common/debug.h b/src/common/debug.h index 091c6191d..4d42aa4ab 100644 --- a/src/common/debug.h +++ b/src/common/debug.h @@ -57,3 +57,6 @@ enum MarkersPalette : int { tracy::SourceLocationData{nullptr, name, TracyFile, (uint32_t)TracyLine, 0}; #define FRAME_END FrameMark + +#define FIBER_ENTER(name) TracyFiberEnter(name) +#define FIBER_EXIT TracyFiberLeave diff --git a/src/core/debug_state.cpp b/src/core/debug_state.cpp index c68fd469d..daf614bd9 100644 --- a/src/core/debug_state.cpp +++ b/src/core/debug_state.cpp @@ -142,41 +142,61 @@ void DebugStateImpl::PushQueueDump(QueueDump dump) { frame.queues.push_back(std::move(dump)); } -void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, - const AmdGpu::Liverpool::Regs& regs, bool is_compute) { - std::scoped_lock lock{frame_dump_list_mutex}; +std::optional DebugStateImpl::GetRegDump(uintptr_t base_addr, uintptr_t header_addr) { const auto it = waiting_reg_dumps.find(header_addr); if (it == waiting_reg_dumps.end()) { - return; + return std::nullopt; } auto& frame = *it->second; waiting_reg_dumps.erase(it); waiting_reg_dumps_dbg.erase(waiting_reg_dumps_dbg.find(header_addr)); - auto& dump = frame.regs[header_addr - base_addr]; - dump.regs = regs; - if (is_compute) { - dump.is_compute = true; - const auto& cs = dump.regs.cs_program; - dump.cs_data = PipelineComputerProgramDump{ - .cs_program = cs, - .code = std::vector{cs.Code().begin(), cs.Code().end()}, - }; - } else { - for (int i = 0; i < RegDump::MaxShaderStages; i++) { - if (regs.stage_enable.IsStageEnabled(i)) { - auto stage = regs.ProgramForStage(i); - if (stage->address_lo != 0) { - auto code = stage->Code(); - dump.stages[i] = PipelineShaderProgramDump{ - .user_data = *stage, - .code = std::vector{code.begin(), code.end()}, - }; - } + return &frame.regs[header_addr - base_addr]; +} + +void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, + const AmdGpu::Liverpool::Regs& regs) { + std::scoped_lock lock{frame_dump_list_mutex}; + + auto dump = GetRegDump(base_addr, header_addr); + if (!dump) { + return; + } + + (*dump)->regs = regs; + + for (int i = 0; i < RegDump::MaxShaderStages; i++) { + if ((*dump)->regs.stage_enable.IsStageEnabled(i)) { + auto stage = (*dump)->regs.ProgramForStage(i); + if (stage->address_lo != 0) { + auto code = stage->Code(); + (*dump)->stages[i] = PipelineShaderProgramDump{ + .user_data = *stage, + .code = std::vector{code.begin(), code.end()}, + }; } } } } +void DebugStateImpl::PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr, + const CsState& cs_state) { + std::scoped_lock lock{frame_dump_list_mutex}; + + auto dump = GetRegDump(base_addr, header_addr); + if (!dump) { + return; + } + + (*dump)->is_compute = true; + auto& cs = (*dump)->regs.cs_program; + cs = cs_state; + + (*dump)->cs_data = PipelineComputerProgramDump{ + .cs_program = cs, + .code = std::vector{cs.Code().begin(), cs.Code().end()}, + }; +} + void DebugStateImpl::CollectShader(const std::string& name, Shader::LogicalStage l_stage, vk::ShaderModule module, std::span spv, std::span raw_code, std::span patch_spv, diff --git a/src/core/debug_state.h b/src/core/debug_state.h index 0db5bc468..a0e428b6b 100644 --- a/src/core/debug_state.h +++ b/src/core/debug_state.h @@ -11,7 +11,6 @@ #include #include "common/types.h" -#include "video_core/amdgpu/liverpool.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #ifdef _WIN32 @@ -204,12 +203,17 @@ public: void PushQueueDump(QueueDump dump); void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, - const AmdGpu::Liverpool::Regs& regs, bool is_compute = false); + const AmdGpu::Liverpool::Regs& regs); + using CsState = AmdGpu::Liverpool::ComputeProgram; + void PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr, const CsState& cs_state); void CollectShader(const std::string& name, Shader::LogicalStage l_stage, vk::ShaderModule module, std::span spv, std::span raw_code, std::span patch_spv, bool is_patched); + +private: + std::optional GetRegDump(uintptr_t base_addr, uintptr_t header_addr); }; } // namespace DebugStateType diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index e85b8b890..583339dd9 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -296,17 +296,12 @@ static_assert(CtxInitSequence400.size() == 0x61); // In case if `submitDone` is issued we need to block submissions until GPU idle static u32 submission_lock{}; std::condition_variable cv_lock{}; -static std::mutex m_submission{}; +std::mutex m_submission{}; static u64 frames_submitted{}; // frame counter static bool send_init_packet{true}; // initialize HW state before first game's submit in a frame static int sdk_version{0}; -struct AscQueueInfo { - VAddr map_addr; - u32* read_addr; - u32 ring_size_dw; -}; -static Common::SlotVector asc_queues{}; +static u32 asc_next_offs_dw[Liverpool::NumComputeRings]; static constexpr VAddr tessellation_factors_ring_addr = Core::SYSTEM_RESERVED_MAX - 0xFFFFFFF; static constexpr u32 tessellation_offchip_buffer_size = 0x800000u; @@ -506,11 +501,19 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) { } auto vqid = gnm_vqid - 1; - auto& asc_queue = asc_queues[{vqid}]; - const auto* acb_ptr = reinterpret_cast(asc_queue.map_addr + *asc_queue.read_addr); - const auto acb_size = next_offs_dw ? (next_offs_dw << 2u) - *asc_queue.read_addr - : (asc_queue.ring_size_dw << 2u) - *asc_queue.read_addr; - const std::span acb_span{acb_ptr, acb_size >> 2u}; + auto& asc_queue = liverpool->asc_queues[{vqid}]; + + const auto& offs_dw = asc_next_offs_dw[vqid]; + + if (next_offs_dw < offs_dw) { + ASSERT_MSG(next_offs_dw == 0, "ACB submission is split at the end of ring buffer"); + } + + const auto* acb_ptr = reinterpret_cast(asc_queue.map_addr) + offs_dw; + const auto acb_size_dw = (next_offs_dw ? next_offs_dw : asc_queue.ring_size_dw) - offs_dw; + const std::span acb_span{acb_ptr, acb_size_dw}; + + asc_next_offs_dw[vqid] = next_offs_dw; if (DebugState.DumpingCurrentFrame()) { static auto last_frame_num = -1LL; @@ -545,9 +548,6 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) { }); } liverpool->SubmitAsc(gnm_vqid, acb_span); - - *asc_queue.read_addr += acb_size; - *asc_queue.read_addr %= asc_queue.ring_size_dw * 4; } void PS4_SYSV_ABI sceGnmDingDongForWorkload(u32 gnm_vqid, u32 next_offs_dw, u64 workload_id) { @@ -1266,12 +1266,16 @@ int PS4_SYSV_ABI sceGnmMapComputeQueue(u32 pipe_id, u32 queue_id, VAddr ring_bas return ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_READ_PTR_ADDR; } - auto vqid = asc_queues.insert(VAddr(ring_base_addr), read_ptr_addr, ring_size_dw); + const auto vqid = + liverpool->asc_queues.insert(VAddr(ring_base_addr), read_ptr_addr, ring_size_dw, pipe_id); // We need to offset index as `dingDong` assumes it to be from the range [1..64] const auto gnm_vqid = vqid.index + 1; LOG_INFO(Lib_GnmDriver, "ASC pipe {} queue {} mapped to vqueue {}", pipe_id, queue_id, gnm_vqid); + const auto& queue = liverpool->asc_queues[vqid]; + *queue.read_addr = 0u; + return gnm_vqid; } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 2e09e70a7..5f0ad298e 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -294,7 +294,7 @@ void EmitContext::DefineInputs() { }); // Note that we pass index rather than Id input_params[attrib.semantic] = SpirvAttribute{ - .id = rate_idx, + .id = {rate_idx}, .pointer_type = input_u32, .component_type = U32[1], .num_components = std::min(attrib.num_elements, num_components), diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 8db2d63c4..8cca636c0 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include + #include "common/assert.h" #include "common/config.h" #include "common/debug.h" @@ -18,7 +20,32 @@ namespace AmdGpu { static const char* dcb_task_name{"DCB_TASK"}; static const char* ccb_task_name{"CCB_TASK"}; -static const char* acb_task_name{"ACB_TASK"}; + +#define MAX_NAMES 56 +static_assert(Liverpool::NumComputeRings <= MAX_NAMES); + +#define NAME_NUM(z, n, name) BOOST_PP_STRINGIZE(name) BOOST_PP_STRINGIZE(n), +#define NAME_ARRAY(name, num) {BOOST_PP_REPEAT(num, NAME_NUM, name)} + +static const char* acb_task_name[] = NAME_ARRAY(ACB_TASK, MAX_NAMES); + +#define YIELD(name) \ + FIBER_EXIT; \ + co_yield {}; \ + FIBER_ENTER(name); + +#define YIELD_CE() YIELD(ccb_task_name) +#define YIELD_GFX() YIELD(dcb_task_name) +#define YIELD_ASC(id) YIELD(acb_task_name[id]) + +#define RESUME(task, name) \ + FIBER_EXIT; \ + task.handle.resume(); \ + FIBER_ENTER(name); + +#define RESUME_CE(task) RESUME(task, ccb_task_name) +#define RESUME_GFX(task) RESUME(task, dcb_task_name) +#define RESUME_ASC(task, id) RESUME(task, acb_task_name[id]) std::array Liverpool::ConstantEngine::constants_heap; @@ -60,7 +87,7 @@ void Liverpool::Process(std::stop_token stoken) { VideoCore::StartCapture(); - int qid = -1; + curr_qid = -1; while (num_submits || num_commands) { @@ -79,9 +106,9 @@ void Liverpool::Process(std::stop_token stoken) { --num_commands; } - qid = (qid + 1) % NumTotalQueues; + curr_qid = (curr_qid + 1) % num_mapped_queues; - auto& queue = mapped_queues[qid]; + auto& queue = mapped_queues[curr_qid]; Task::Handle task{}; { @@ -119,7 +146,7 @@ void Liverpool::Process(std::stop_token stoken) { } Liverpool::Task Liverpool::ProcessCeUpdate(std::span ccb) { - TracyFiberEnter(ccb_task_name); + FIBER_ENTER(ccb_task_name); while (!ccb.empty()) { const auto* header = reinterpret_cast(ccb.data()); @@ -155,9 +182,7 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span ccb) { case PM4ItOpcode::WaitOnDeCounterDiff: { const auto diff = it_body[0]; while ((cblock.de_count - cblock.ce_count) >= diff) { - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(ccb_task_name); + YIELD_CE(); } break; } @@ -165,13 +190,12 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span ccb) { const auto* indirect_buffer = reinterpret_cast(header); auto task = ProcessCeUpdate({indirect_buffer->Address(), indirect_buffer->ib_size}); - while (!task.handle.done()) { - task.handle.resume(); + RESUME_CE(task); - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(ccb_task_name); - }; + while (!task.handle.done()) { + YIELD_CE(); + RESUME_CE(task); + } break; } default: @@ -182,11 +206,11 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span ccb) { ccb = NextPacket(ccb, header->type3.NumWords() + 1); } - TracyFiberLeave; + FIBER_EXIT; } Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span ccb) { - TracyFiberEnter(dcb_task_name); + FIBER_ENTER(dcb_task_name); cblock.Reset(); @@ -197,9 +221,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(dcb.data()); @@ -353,8 +375,18 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); - std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, - (count - 1) * sizeof(u32)); + const auto set_size = (count - 1) * sizeof(u32); + + if (set_data->reg_offset >= 0x200 && + set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) { + ASSERT(set_size <= sizeof(ComputeProgram)); + auto* addr = reinterpret_cast(&mapped_queues[GfxQueueId].cs_state) + + (set_data->reg_offset - 0x200); + std::memcpy(addr, header + 2, set_size); + } else { + std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, + set_size); + } break; } case PM4ItOpcode::SetUconfigReg: { @@ -474,15 +506,16 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); - regs.cs_program.dim_x = dispatch_direct->dim_x; - regs.cs_program.dim_y = dispatch_direct->dim_y; - regs.cs_program.dim_z = dispatch_direct->dim_z; - regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; + auto& cs_program = GetCsRegs(); + cs_program.dim_x = dispatch_direct->dim_x; + cs_program.dim_y = dispatch_direct->dim_y; + cs_program.dim_z = dispatch_direct->dim_z; + cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; if (DebugState.DumpingCurrentReg()) { - DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs, - true); + DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast(header), + cs_program); } - if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { + if (rasterizer && (cs_program.dispatch_initiator & 1)) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:Dispatch", cmd_address)); rasterizer->DispatchDirect(); @@ -493,14 +526,15 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); + auto& cs_program = GetCsRegs(); const auto offset = dispatch_indirect->data_offset; const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr; const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions); if (DebugState.DumpingCurrentReg()) { - DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs, - true); + DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast(header), + cs_program); } - if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { + if (rasterizer && (cs_program.dispatch_initiator & 1)) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin( fmt::format("dcb:{}:DispatchIndirect", cmd_address)); @@ -613,11 +647,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); while (!rewind->Valid()) { - mapped_queues[GfxQueueId].cs_state = regs.cs_program; - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(dcb_task_name); - regs.cs_program = mapped_queues[GfxQueueId].cs_state; + YIELD_GFX(); } break; } @@ -633,11 +663,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanWaitVoLabel([&] { return wait_reg_mem->Test(); }); } while (!wait_reg_mem->Test()) { - mapped_queues[GfxQueueId].cs_state = regs.cs_program; - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(dcb_task_name); - regs.cs_program = mapped_queues[GfxQueueId].cs_state; + YIELD_GFX(); } break; } @@ -645,13 +671,12 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); auto task = ProcessGraphics( {indirect_buffer->Address(), indirect_buffer->ib_size}, {}); - while (!task.handle.done()) { - task.handle.resume(); + RESUME_GFX(task); - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(dcb_task_name); - }; + while (!task.handle.done()) { + YIELD_GFX(); + RESUME_GFX(task); + } break; } case PM4ItOpcode::IncrementDeCounter: { @@ -660,9 +685,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span dcb, std::span acb, int vqid) { - TracyFiberEnter(acb_task_name); +template +Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { + FIBER_ENTER(acb_task_name[vqid]); + const auto& queue = asc_queues[{vqid}]; auto base_addr = reinterpret_cast(acb.data()); while (!acb.empty()) { @@ -711,15 +736,14 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { } case PM4ItOpcode::IndirectBuffer: { const auto* indirect_buffer = reinterpret_cast(header); - auto task = ProcessCompute( + auto task = ProcessCompute( {indirect_buffer->Address(), indirect_buffer->ib_size}, vqid); - while (!task.handle.done()) { - task.handle.resume(); + RESUME_ASC(task, vqid); - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(acb_task_name); - }; + while (!task.handle.done()) { + YIELD_ASC(vqid); + RESUME_ASC(task, vqid); + } break; } case PM4ItOpcode::DmaData: { @@ -757,30 +781,38 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { case PM4ItOpcode::Rewind: { const PM4CmdRewind* rewind = reinterpret_cast(header); while (!rewind->Valid()) { - mapped_queues[vqid].cs_state = regs.cs_program; - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(acb_task_name); - regs.cs_program = mapped_queues[vqid].cs_state; + YIELD_ASC(vqid); } break; } case PM4ItOpcode::SetShReg: { const auto* set_data = reinterpret_cast(header); - std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, - (count - 1) * sizeof(u32)); + const auto set_size = (count - 1) * sizeof(u32); + + if (set_data->reg_offset >= 0x200 && + set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) { + ASSERT(set_size <= sizeof(ComputeProgram)); + auto* addr = reinterpret_cast(&mapped_queues[vqid + 1].cs_state) + + (set_data->reg_offset - 0x200); + std::memcpy(addr, header + 2, set_size); + } else { + std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, + set_size); + } break; } case PM4ItOpcode::DispatchDirect: { const auto* dispatch_direct = reinterpret_cast(header); - regs.cs_program.dim_x = dispatch_direct->dim_x; - regs.cs_program.dim_y = dispatch_direct->dim_y; - regs.cs_program.dim_z = dispatch_direct->dim_z; - regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; + auto& cs_program = GetCsRegs(); + cs_program.dim_x = dispatch_direct->dim_x; + cs_program.dim_y = dispatch_direct->dim_y; + cs_program.dim_z = dispatch_direct->dim_z; + cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; if (DebugState.DumpingCurrentReg()) { - DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs, true); + DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast(header), + cs_program); } - if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { + if (rasterizer && (cs_program.dispatch_initiator & 1)) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address)); rasterizer->DispatchDirect(); @@ -803,17 +835,13 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { const auto* wait_reg_mem = reinterpret_cast(header); ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); while (!wait_reg_mem->Test()) { - mapped_queues[vqid].cs_state = regs.cs_program; - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(acb_task_name); - regs.cs_program = mapped_queues[vqid].cs_state; + YIELD_ASC(vqid); } break; } case PM4ItOpcode::ReleaseMem: { const auto* release_mem = reinterpret_cast(header); - release_mem->SignalFence(Platform::InterruptId::Compute0RelMem); // <--- + release_mem->SignalFence(static_cast(queue.pipe_id)); break; } default: @@ -821,10 +849,16 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { static_cast(opcode), count); } - acb = NextPacket(acb, header->type3.NumWords() + 1); + const auto packet_size_dw = header->type3.NumWords() + 1; + acb = NextPacket(acb, packet_size_dw); + + if constexpr (!is_indirect) { + *queue.read_addr += packet_size_dw; + *queue.read_addr %= queue.ring_size_dw; + } } - TracyFiberLeave; + FIBER_EXIT; } std::pair, std::span> Liverpool::CopyCmdBuffers( @@ -881,10 +915,11 @@ void Liverpool::SubmitGfx(std::span dcb, std::span ccb) { submit_cv.notify_one(); } -void Liverpool::SubmitAsc(u32 vqid, std::span acb) { - ASSERT_MSG(vqid >= 0 && vqid < NumTotalQueues, "Invalid virtual ASC queue index"); - auto& queue = mapped_queues[vqid]; +void Liverpool::SubmitAsc(u32 gnm_vqid, std::span acb) { + ASSERT_MSG(gnm_vqid > 0 && gnm_vqid < NumTotalQueues, "Invalid virtual ASC queue index"); + auto& queue = mapped_queues[gnm_vqid]; + const auto vqid = gnm_vqid - 1; const auto& task = ProcessCompute(acb, vqid); { std::scoped_lock lock{queue.m_access}; @@ -892,6 +927,7 @@ void Liverpool::SubmitAsc(u32 vqid, std::span acb) { } std::scoped_lock lk{submit_mutex}; + num_mapped_queues = std::max(num_mapped_queues, gnm_vqid + 1); ++num_submits; submit_cv.notify_one(); } diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index b6172d37b..4c74d37d0 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -16,6 +16,7 @@ #include "common/assert.h" #include "common/bit_field.h" #include "common/polyfill_thread.h" +#include "common/slot_vector.h" #include "common/types.h" #include "common/unique_function.h" #include "shader_recompiler/params.h" @@ -45,7 +46,8 @@ struct Liverpool { static constexpr u32 NumGfxRings = 1u; // actually 2, but HP is reserved by system software static constexpr u32 NumComputePipes = 7u; // actually 8, but #7 is reserved by system software static constexpr u32 NumQueuesPerPipe = 8u; - static constexpr u32 NumTotalQueues = NumGfxRings + (NumComputePipes * NumQueuesPerPipe); + static constexpr u32 NumComputeRings = NumComputePipes * NumQueuesPerPipe; + static constexpr u32 NumTotalQueues = NumGfxRings + NumComputeRings; static_assert(NumTotalQueues < 64u); // need to fit into u64 bitmap for ffs static constexpr u32 NumColorBuffers = 8; @@ -1143,7 +1145,7 @@ struct Liverpool { INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20); ShaderProgram ls_program; INSERT_PADDING_WORDS(0xA4); - ComputeProgram cs_program; + ComputeProgram cs_program; // shadowed by `cs_state` in `mapped_queues` INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5); DepthRenderControl depth_render_control; INSERT_PADDING_WORDS(1); @@ -1298,7 +1300,7 @@ public: ~Liverpool(); void SubmitGfx(std::span dcb, std::span ccb); - void SubmitAsc(u32 vqid, std::span acb); + void SubmitAsc(u32 gnm_vqid, std::span acb); void SubmitDone() noexcept { std::scoped_lock lk{submit_mutex}; @@ -1341,6 +1343,18 @@ public: gfx_queue.dcb_buffer.reserve(GfxReservedSize); } + inline ComputeProgram& GetCsRegs() { + return mapped_queues[curr_qid].cs_state; + } + + struct AscQueueInfo { + VAddr map_addr; + u32* read_addr; + u32 ring_size_dw; + u32 pipe_id; + }; + Common::SlotVector asc_queues{}; + private: struct Task { struct promise_type { @@ -1378,7 +1392,8 @@ private: std::span ccb); Task ProcessGraphics(std::span dcb, std::span ccb); Task ProcessCeUpdate(std::span ccb); - Task ProcessCompute(std::span acb, int vqid); + template + Task ProcessCompute(std::span acb, u32 vqid); void Process(std::stop_token stoken); @@ -1393,6 +1408,7 @@ private: VAddr indirect_args_addr{}; }; std::array mapped_queues{}; + u32 num_mapped_queues{1u}; // GFX is always available struct ConstantEngine { void Reset() { @@ -1421,6 +1437,7 @@ private: std::mutex submit_mutex; std::condition_variable_any submit_cv; std::queue> command_queue{}; + int curr_qid{-1}; }; static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 58473496f..50396287b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -173,9 +173,9 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_ break; } case Stage::Compute: { - const auto& cs_pgm = regs.cs_program; + const auto& cs_pgm = liverpool->GetCsRegs(); info.num_user_data = cs_pgm.settings.num_user_regs; - info.num_allocated_vgprs = regs.cs_program.settings.num_vgprs * 4; + info.num_allocated_vgprs = cs_pgm.settings.num_vgprs * 4; info.cs_info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full, cs_pgm.num_thread_z.full}; info.cs_info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1), @@ -476,8 +476,8 @@ bool PipelineCache::RefreshGraphicsKey() { bool PipelineCache::RefreshComputeKey() { Shader::Backend::Bindings binding{}; - const auto* cs_pgm = &liverpool->regs.cs_program; - const auto cs_params = Liverpool::GetParams(*cs_pgm); + const auto& cs_pgm = liverpool->GetCsRegs(); + const auto cs_params = Liverpool::GetParams(cs_pgm); std::tie(infos[0], modules[0], fetch_shader, compute_key.value) = GetProgram(Shader::Stage::Compute, LogicalStage::Compute, cs_params, binding); return true; @@ -529,6 +529,7 @@ PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stag return std::make_tuple(&program->info, module, spec.fetch_shader_data, HashCombine(params.hash, 0)); } + it_pgm.value()->info.user_data = params.user_data; auto& program = it_pgm.value(); auto& info = program->info; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index fef4c7ec5..bd8906f86 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -317,14 +317,14 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 void Rasterizer::DispatchDirect() { RENDERER_TRACE; - const auto& cs_program = liverpool->regs.cs_program; + const auto& cs_program = liverpool->GetCsRegs(); const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline(); if (!pipeline) { return; } const auto& cs = pipeline->GetStage(Shader::LogicalStage::Compute); - if (ExecuteShaderHLE(cs, liverpool->regs, *this)) { + if (ExecuteShaderHLE(cs, liverpool->regs, cs_program, *this)) { return; } @@ -344,7 +344,7 @@ void Rasterizer::DispatchDirect() { void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) { RENDERER_TRACE; - const auto& cs_program = liverpool->regs.cs_program; + const auto& cs_program = liverpool->GetCsRegs(); const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline(); if (!pipeline) { return; diff --git a/src/video_core/renderer_vulkan/vk_shader_hle.cpp b/src/video_core/renderer_vulkan/vk_shader_hle.cpp index b863dce21..ff78f5d24 100644 --- a/src/video_core/renderer_vulkan/vk_shader_hle.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_hle.cpp @@ -2,17 +2,19 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "shader_recompiler/info.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_hle.h" -#include "vk_rasterizer.h" +extern std::unique_ptr liverpool; namespace Vulkan { static constexpr u64 COPY_SHADER_HASH = 0xfefebf9f; -bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, - Rasterizer& rasterizer) { +static bool ExecuteCopyShaderHLE(const Shader::Info& info, + const AmdGpu::Liverpool::ComputeProgram& cs_program, + Rasterizer& rasterizer) { auto& scheduler = rasterizer.GetScheduler(); auto& buffer_cache = rasterizer.GetBufferCache(); @@ -34,9 +36,9 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg static std::vector copies; copies.clear(); - copies.reserve(regs.cs_program.dim_x); + copies.reserve(cs_program.dim_x); - for (u32 i = 0; i < regs.cs_program.dim_x; i++) { + for (u32 i = 0; i < cs_program.dim_x; i++) { const auto& [dst_idx, src_idx, end] = ctl_buf[i]; const u32 local_dst_offset = dst_idx * buf_stride; const u32 local_src_offset = src_idx * buf_stride; @@ -122,10 +124,10 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg } bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, - Rasterizer& rasterizer) { + const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer) { switch (info.pgm_hash) { case COPY_SHADER_HASH: - return ExecuteCopyShaderHLE(info, regs, rasterizer); + return ExecuteCopyShaderHLE(info, cs_program, rasterizer); default: return false; } diff --git a/src/video_core/renderer_vulkan/vk_shader_hle.h b/src/video_core/renderer_vulkan/vk_shader_hle.h index fda9b1735..008de8003 100644 --- a/src/video_core/renderer_vulkan/vk_shader_hle.h +++ b/src/video_core/renderer_vulkan/vk_shader_hle.h @@ -15,6 +15,6 @@ class Rasterizer; /// Attempts to execute a shader using HLE if possible. bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, - Rasterizer& rasterizer); + const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer); } // namespace Vulkan diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index d8d23c400..a5e09e45d 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -212,6 +212,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { case vk::Format::eBc7SrgbBlock: case vk::Format::eBc7UnormBlock: case vk::Format::eBc6HUfloatBlock: + case vk::Format::eR32G32B32A32Uint: case vk::Format::eR32G32B32A32Sfloat: return vk::Format::eR32G32B32A32Uint; default: From d2ac92481b514bd2dacbd60abdc460cd076d9d3c Mon Sep 17 00:00:00 2001 From: Richard Habitzreuter Date: Sun, 15 Dec 2024 11:28:36 -0300 Subject: [PATCH 17/31] style: add Gruvbox theme (#1796) --- src/qt_gui/main_window.cpp | 13 +++++++++++++ src/qt_gui/main_window_themes.cpp | 22 ++++++++++++++++++++-- src/qt_gui/main_window_themes.h | 8 +------- src/qt_gui/main_window_ui.h | 6 ++++++ 4 files changed, 40 insertions(+), 9 deletions(-) diff --git a/src/qt_gui/main_window.cpp b/src/qt_gui/main_window.cpp index 3eb629c0b..0b5137c4b 100644 --- a/src/qt_gui/main_window.cpp +++ b/src/qt_gui/main_window.cpp @@ -111,6 +111,7 @@ void MainWindow::CreateActions() { m_theme_act_group->addAction(ui->setThemeGreen); m_theme_act_group->addAction(ui->setThemeBlue); m_theme_act_group->addAction(ui->setThemeViolet); + m_theme_act_group->addAction(ui->setThemeGruvbox); } void MainWindow::AddUiWidgets() { @@ -540,6 +541,14 @@ void MainWindow::CreateConnects() { isIconBlack = false; } }); + connect(ui->setThemeGruvbox, &QAction::triggered, &m_window_themes, [this]() { + m_window_themes.SetWindowTheme(Theme::Gruvbox, ui->mw_searchbar); + Config::setMainWindowTheme(static_cast(Theme::Gruvbox)); + if (isIconBlack) { + SetUiIcons(false); + isIconBlack = false; + } + }); } void MainWindow::StartGame() { @@ -912,6 +921,10 @@ void MainWindow::SetLastUsedTheme() { ui->setThemeViolet->setChecked(true); isIconBlack = false; SetUiIcons(false); + case Theme::Gruvbox: + ui->setThemeGruvbox->setChecked(true); + isIconBlack = false; + SetUiIcons(false); break; } } diff --git a/src/qt_gui/main_window_themes.cpp b/src/qt_gui/main_window_themes.cpp index 35e64ef74..65dd04269 100644 --- a/src/qt_gui/main_window_themes.cpp +++ b/src/qt_gui/main_window_themes.cpp @@ -15,7 +15,6 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { themePalette.setColor(QPalette::Window, QColor(50, 50, 50)); themePalette.setColor(QPalette::WindowText, Qt::white); themePalette.setColor(QPalette::Base, QColor(20, 20, 20)); - themePalette.setColor(QPalette::AlternateBase, QColor(25, 25, 25)); themePalette.setColor(QPalette::AlternateBase, QColor(53, 53, 53)); themePalette.setColor(QPalette::ToolTipBase, Qt::white); themePalette.setColor(QPalette::ToolTipText, Qt::white); @@ -28,7 +27,6 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { themePalette.setColor(QPalette::HighlightedText, Qt::black); qApp->setPalette(themePalette); break; - case Theme::Light: mw_searchbar->setStyleSheet("background-color: #ffffff;" // Light gray background "color: #000000;" // Black text @@ -115,6 +113,26 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { themePalette.setColor(QPalette::Highlight, QColor(42, 130, 218)); // Light blue highlight themePalette.setColor(QPalette::HighlightedText, Qt::black); // Black highlighted text + qApp->setPalette(themePalette); + break; + case Theme::Gruvbox: + mw_searchbar->setStyleSheet("background-color: #1d2021;" + "color: #f9f5d7;" + "border: 2px solid #f9f5d7;" + "padding: 5px;"); + themePalette.setColor(QPalette::Window, QColor(29, 32, 33)); + themePalette.setColor(QPalette::WindowText, QColor(249, 245, 215)); + themePalette.setColor(QPalette::Base, QColor(29, 32, 33)); + themePalette.setColor(QPalette::AlternateBase, QColor(50, 48, 47)); + themePalette.setColor(QPalette::ToolTipBase, QColor(249, 245, 215)); + themePalette.setColor(QPalette::ToolTipText, QColor(249, 245, 215)); + themePalette.setColor(QPalette::Text, QColor(249, 245, 215)); + themePalette.setColor(QPalette::Button, QColor(40, 40, 40)); + themePalette.setColor(QPalette::ButtonText, QColor(249, 245, 215)); + themePalette.setColor(QPalette::BrightText, QColor(251, 73, 52)); + themePalette.setColor(QPalette::Link, QColor(131, 165, 152)); + themePalette.setColor(QPalette::Highlight, QColor(131, 165, 152)); + themePalette.setColor(QPalette::HighlightedText, Qt::black); qApp->setPalette(themePalette); break; } diff --git a/src/qt_gui/main_window_themes.h b/src/qt_gui/main_window_themes.h index 6da70e995..d162da87b 100644 --- a/src/qt_gui/main_window_themes.h +++ b/src/qt_gui/main_window_themes.h @@ -7,13 +7,7 @@ #include #include -enum class Theme : int { - Dark, - Light, - Green, - Blue, - Violet, -}; +enum class Theme : int { Dark, Light, Green, Blue, Violet, Gruvbox }; class WindowThemes : public QObject { Q_OBJECT diff --git a/src/qt_gui/main_window_ui.h b/src/qt_gui/main_window_ui.h index 5ff572f86..df64361fd 100644 --- a/src/qt_gui/main_window_ui.h +++ b/src/qt_gui/main_window_ui.h @@ -36,6 +36,7 @@ public: QAction* setThemeGreen; QAction* setThemeBlue; QAction* setThemeViolet; + QAction* setThemeGruvbox; QWidget* centralWidget; QLineEdit* mw_searchbar; QPushButton* playButton; @@ -158,6 +159,9 @@ public: setThemeViolet = new QAction(MainWindow); setThemeViolet->setObjectName("setThemeViolet"); setThemeViolet->setCheckable(true); + setThemeGruvbox = new QAction(MainWindow); + setThemeGruvbox->setObjectName("setThemeGruvbox"); + setThemeGruvbox->setCheckable(true); centralWidget = new QWidget(MainWindow); centralWidget->setObjectName("centralWidget"); sizePolicy.setHeightForWidth(centralWidget->sizePolicy().hasHeightForWidth()); @@ -282,6 +286,7 @@ public: menuThemes->addAction(setThemeGreen); menuThemes->addAction(setThemeBlue); menuThemes->addAction(setThemeViolet); + menuThemes->addAction(setThemeGruvbox); menuGame_List_Icons->addAction(setIconSizeTinyAct); menuGame_List_Icons->addAction(setIconSizeSmallAct); menuGame_List_Icons->addAction(setIconSizeMediumAct); @@ -368,6 +373,7 @@ public: setThemeGreen->setText(QCoreApplication::translate("MainWindow", "Green", nullptr)); setThemeBlue->setText(QCoreApplication::translate("MainWindow", "Blue", nullptr)); setThemeViolet->setText(QCoreApplication::translate("MainWindow", "Violet", nullptr)); + setThemeGruvbox->setText("Gruvbox"); toolBar->setWindowTitle(QCoreApplication::translate("MainWindow", "toolBar", nullptr)); } // retranslateUi }; From 9aa1c13c7e20079716a55657e1d47bda1aafd3ff Mon Sep 17 00:00:00 2001 From: baggins183 Date: Sun, 15 Dec 2024 06:30:19 -0800 Subject: [PATCH 18/31] Fix some compiler problems with ds3 (#1793) - Implement S_CMOVK_I32 - Handle Isoline abstract patch type --- .../frontend/translate/scalar_alu.cpp | 20 +++++++++++-------- .../frontend/translate/translate.h | 2 +- .../ir/passes/hull_shader_transform.cpp | 10 ++++++---- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 1ef0d82d8..e731e299a 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -161,8 +161,9 @@ void Translator::EmitSOPK(const GcnInst& inst) { switch (inst.opcode) { // SOPK case Opcode::S_MOVK_I32: - return S_MOVK(inst); - + return S_MOVK(inst, false); + case Opcode::S_CMOVK_I32: + return S_MOVK(inst, true); case Opcode::S_CMPK_EQ_I32: return S_CMPK(ConditionOp::EQ, true, inst); case Opcode::S_CMPK_LG_I32: @@ -458,13 +459,16 @@ void Translator::S_ABSDIFF_I32(const GcnInst& inst) { // SOPK -void Translator::S_MOVK(const GcnInst& inst) { - const auto simm16 = inst.control.sopk.simm; - if (simm16 & (1 << 15)) { - // TODO: need to verify the case of imm sign extension - UNREACHABLE(); +void Translator::S_MOVK(const GcnInst& inst, bool is_conditional) { + const s16 simm16 = inst.control.sopk.simm; + // do the sign extension + const s32 simm32 = static_cast(simm16); + IR::U32 val = ir.Imm32(simm32); + if (is_conditional) { + // if !SCC its a NOP + val = IR::U32{ir.Select(ir.GetScc(), val, GetSrc(inst.dst[0]))}; } - SetDst(inst.dst[0], ir.Imm32(simm16)); + SetDst(inst.dst[0], val); } void Translator::S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst) { diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 60bad1864..8e575fcad 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -100,7 +100,7 @@ public: void S_NOT_B32(const GcnInst& inst); // SOPK - void S_MOVK(const GcnInst& inst); + void S_MOVK(const GcnInst& inst, bool is_conditional); void S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst); void S_ADDK_I32(const GcnInst& inst); void S_MULK_I32(const GcnInst& inst); diff --git a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp index 5cf02b6d0..895c9823e 100644 --- a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp +++ b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp @@ -398,8 +398,8 @@ void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { // communicated to the driver. // The layout seems to be implied by the type of the abstract domain. switch (runtime_info.hs_info.tess_type) { - case AmdGpu::TessellationType::Quad: - ASSERT(gcn_factor_idx < 6); + case AmdGpu::TessellationType::Isoline: + ASSERT(gcn_factor_idx < 2); return IR::PatchFactor(gcn_factor_idx); case AmdGpu::TessellationType::Triangle: ASSERT(gcn_factor_idx < 4); @@ -407,9 +407,11 @@ void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { return IR::Patch::TessellationLodInteriorU; } return IR::PatchFactor(gcn_factor_idx); + case AmdGpu::TessellationType::Quad: + ASSERT(gcn_factor_idx < 6); + return IR::PatchFactor(gcn_factor_idx); default: - // Point domain types haven't been seen so far - UNREACHABLE_MSG("Unhandled tess type"); + UNREACHABLE(); } }; From 3001b007f6a450f62526fa61724753ab532bef20 Mon Sep 17 00:00:00 2001 From: DanielSvoboda Date: Sun, 15 Dec 2024 11:30:53 -0300 Subject: [PATCH 19/31] Keybord on README / Fix Play Time (#1786) * Keybord on README F10_F11_F12 * Update game_list_frame.cpp --- README.md | 7 +++++++ src/qt_gui/game_list_frame.cpp | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 18e69546c..7ef5bdf65 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,13 @@ For more information on how to test, debug and report issues with the emulator o # Keyboard mapping +| Button | Function | +|-------------|-------------| +F10 | FPS Counter +Ctrl+F10 | Video Debug Info +F11 | Fullscreen +F12 | Trigger RenderDoc Capture + > [!NOTE] > Xbox and DualShock controllers work out of the box. diff --git a/src/qt_gui/game_list_frame.cpp b/src/qt_gui/game_list_frame.cpp index 47bfbfef9..63f6b63b8 100644 --- a/src/qt_gui/game_list_frame.cpp +++ b/src/qt_gui/game_list_frame.cpp @@ -123,7 +123,7 @@ void GameListFrame::PopulateGameList() { formattedPlayTime = formattedPlayTime.trimmed(); m_game_info->m_games[i].play_time = playTime.toStdString(); if (formattedPlayTime.isEmpty()) { - SetTableItem(i, 7, "0"); + SetTableItem(i, 7, QString("%1s").arg(seconds)); } else { SetTableItem(i, 7, formattedPlayTime); } From e7c4ffe032a0dd5605322363b9766425ea5531f9 Mon Sep 17 00:00:00 2001 From: psucien Date: Sun, 15 Dec 2024 20:53:29 +0100 Subject: [PATCH 20/31] hot-fix: Tracy operation restored; memory leak fix as a bonus --- externals/CMakeLists.txt | 2 +- src/common/debug.h | 7 +++++++ src/core/libraries/gnmdriver/gnmdriver.cpp | 3 +++ src/video_core/amdgpu/liverpool.cpp | 3 ++- src/video_core/renderer_vulkan/vk_instance.cpp | 4 +++- 5 files changed, 16 insertions(+), 3 deletions(-) diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 1ab23a403..dbe6794d8 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -193,7 +193,7 @@ option(TRACY_ENABLE "" ON) option(TRACY_NO_CRASH_HANDLER "" ON) # Otherwise texture cache exceptions will be treaten as a crash option(TRACY_ON_DEMAND "" ON) option(TRACY_NO_FRAME_IMAGE "" ON) -option(TRACY_FIBERS "" ON) # For AmdGpu frontend profiling +option(TRACY_FIBERS "" OFF) # For AmdGpu frontend profiling, disabled due to instability option(TRACY_NO_SYSTEM_TRACING "" ON) option(TRACY_NO_CALLSTACK "" ON) option(TRACY_NO_CODE_TRANSFER "" ON) diff --git a/src/common/debug.h b/src/common/debug.h index 4d42aa4ab..882e9e5c4 100644 --- a/src/common/debug.h +++ b/src/common/debug.h @@ -17,6 +17,8 @@ static inline bool IsProfilerConnected() { return tracy::GetProfiler().IsConnected(); } +#define TRACY_GPU_ENABLED 0 + #define CUSTOM_LOCK(type, varname) \ tracy::LockableCtx varname { \ []() -> const tracy::SourceLocationData* { \ @@ -58,5 +60,10 @@ enum MarkersPalette : int { #define FRAME_END FrameMark +#ifdef TRACY_FIBERS #define FIBER_ENTER(name) TracyFiberEnter(name) #define FIBER_EXIT TracyFiberLeave +#else +#define FIBER_ENTER(name) +#define FIBER_EXIT +#endif diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 583339dd9..1a6007bf8 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -488,6 +488,7 @@ int PS4_SYSV_ABI sceGnmDestroyWorkloadStream() { } void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) { + HLE_TRACE; LOG_DEBUG(Lib_GnmDriver, "vqid {}, offset_dw {}", gnm_vqid, next_offs_dw); if (gnm_vqid == 0) { @@ -2166,6 +2167,7 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(u32 workload, u32 count, u32* dcb_sizes_in_bytes, const u32* ccb_gpu_addrs[], u32* ccb_sizes_in_bytes) { + HLE_TRACE; LOG_DEBUG(Lib_GnmDriver, "called"); if (!dcb_gpu_addrs || !dcb_sizes_in_bytes) { @@ -2258,6 +2260,7 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[ } int PS4_SYSV_ABI sceGnmSubmitDone() { + HLE_TRACE; LOG_DEBUG(Lib_GnmDriver, "called"); WaitGpuIdle(); if (!liverpool->IsGpuIdle()) { diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 8cca636c0..5dd3edd6d 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -659,7 +659,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanAddress(); - if (vo_port->IsVoLabel(wait_addr) && num_submits == 1) { + if (vo_port->IsVoLabel(wait_addr) && + num_submits == mapped_queues[GfxQueueId].submits.size()) { vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); }); } while (!wait_reg_mem->Test()) { diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index d7bfaee4e..b479c1464 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -9,6 +9,7 @@ #include "common/assert.h" #include "common/config.h" +#include "common/debug.h" #include "sdl_window.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -261,7 +262,8 @@ bool Instance::CreateDevice() { // The next two extensions are required to be available together in order to support write masks color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME); color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); - const bool calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME); + const bool calibrated_timestamps = + TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false; const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME); maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME); From 8a4e03228aca87fd31da6730e60723ece601a1c5 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sun, 15 Dec 2024 16:11:15 -0800 Subject: [PATCH 21/31] spirv_emit_context: Prevent double-add of GS in attributes to interface. (#1800) --- src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 5f0ad298e..5d2ec6f96 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -381,9 +381,8 @@ void EmitContext::DefineInputs() { for (int param_id = 0; param_id < num_params; ++param_id) { const Id type{TypeArray(F32[4], ConstU32(num_verts_in))}; const Id id{DefineInput(type, param_id)}; - Name(id, fmt::format("in_attr{}", param_id)); + Name(id, fmt::format("gs_in_attr{}", param_id)); input_params[param_id] = {id, input_f32, F32[1], 4}; - interfaces.push_back(id); } break; } From 5585e42677d3f00c71679fe20aab1446f9a5bca3 Mon Sep 17 00:00:00 2001 From: Richard Habitzreuter Date: Tue, 17 Dec 2024 07:32:30 -0300 Subject: [PATCH 22/31] style: add rounded borders and focus color styling to the search bar (#1804) --- src/qt_gui/main_window.cpp | 1 + src/qt_gui/main_window_themes.cpp | 64 +++++++++++++++++-------------- 2 files changed, 37 insertions(+), 28 deletions(-) diff --git a/src/qt_gui/main_window.cpp b/src/qt_gui/main_window.cpp index 0b5137c4b..9c81bcf11 100644 --- a/src/qt_gui/main_window.cpp +++ b/src/qt_gui/main_window.cpp @@ -921,6 +921,7 @@ void MainWindow::SetLastUsedTheme() { ui->setThemeViolet->setChecked(true); isIconBlack = false; SetUiIcons(false); + break; case Theme::Gruvbox: ui->setThemeGruvbox->setChecked(true); isIconBlack = false; diff --git a/src/qt_gui/main_window_themes.cpp b/src/qt_gui/main_window_themes.cpp index 65dd04269..a52b4466e 100644 --- a/src/qt_gui/main_window_themes.cpp +++ b/src/qt_gui/main_window_themes.cpp @@ -8,10 +8,12 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { switch (theme) { case Theme::Dark: - mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background - "color: #ffffff;" // White text - "border: 2px solid #ffffff;" // White border - "padding: 5px;"); + mw_searchbar->setStyleSheet( + "QLineEdit {" + "background-color: #1e1e1e; color: #ffffff; border: 1px solid #ffffff; " + "border-radius: 4px; padding: 5px; }" + "QLineEdit:focus {" + "border: 1px solid #2A82DA; }"); themePalette.setColor(QPalette::Window, QColor(50, 50, 50)); themePalette.setColor(QPalette::WindowText, Qt::white); themePalette.setColor(QPalette::Base, QColor(20, 20, 20)); @@ -28,10 +30,12 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { qApp->setPalette(themePalette); break; case Theme::Light: - mw_searchbar->setStyleSheet("background-color: #ffffff;" // Light gray background - "color: #000000;" // Black text - "border: 2px solid #000000;" // Black border - "padding: 5px;"); + mw_searchbar->setStyleSheet( + "QLineEdit {" + "background-color: #ffffff; color: #000000; border: 1px solid #000000; " + "border-radius: 4px; padding: 5px; }" + "QLineEdit:focus {" + "border: 1px solid #2A82DA; }"); themePalette.setColor(QPalette::Window, QColor(240, 240, 240)); // Light gray themePalette.setColor(QPalette::WindowText, Qt::black); // Black themePalette.setColor(QPalette::Base, QColor(230, 230, 230, 80)); // Grayish @@ -46,12 +50,13 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { themePalette.setColor(QPalette::HighlightedText, Qt::white); // White qApp->setPalette(themePalette); break; - case Theme::Green: - mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background - "color: #ffffff;" // White text - "border: 2px solid #ffffff;" // White border - "padding: 5px;"); + mw_searchbar->setStyleSheet( + "QLineEdit {" + "background-color: #192819; color: #ffffff; border: 1px solid #ffffff; " + "border-radius: 4px; padding: 5px; }" + "QLineEdit:focus {" + "border: 1px solid #2A82DA; }"); themePalette.setColor(QPalette::Window, QColor(53, 69, 53)); // Dark green background themePalette.setColor(QPalette::WindowText, Qt::white); // White text themePalette.setColor(QPalette::Base, QColor(25, 40, 25)); // Darker green base @@ -66,15 +71,15 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { themePalette.setColor(QPalette::Link, QColor(42, 130, 218)); // Light blue links themePalette.setColor(QPalette::Highlight, QColor(42, 130, 218)); // Light blue highlight themePalette.setColor(QPalette::HighlightedText, Qt::black); // Black highlighted text - qApp->setPalette(themePalette); break; - case Theme::Blue: - mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background - "color: #ffffff;" // White text - "border: 2px solid #ffffff;" // White border - "padding: 5px;"); + mw_searchbar->setStyleSheet( + "QLineEdit {" + "background-color: #14283c; color: #ffffff; border: 1px solid #ffffff; " + "border-radius: 4px; padding: 5px; }" + "QLineEdit:focus {" + "border: 1px solid #2A82DA; }"); themePalette.setColor(QPalette::Window, QColor(40, 60, 90)); // Dark blue background themePalette.setColor(QPalette::WindowText, Qt::white); // White text themePalette.setColor(QPalette::Base, QColor(20, 40, 60)); // Darker blue base @@ -92,12 +97,13 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { qApp->setPalette(themePalette); break; - case Theme::Violet: - mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background - "color: #ffffff;" // White text - "border: 2px solid #ffffff;" // White border - "padding: 5px;"); + mw_searchbar->setStyleSheet( + "QLineEdit {" + "background-color: #501e5a; color: #ffffff; border: 1px solid #ffffff; " + "border-radius: 4px; padding: 5px; }" + "QLineEdit:focus {" + "border: 1px solid #2A82DA; }"); themePalette.setColor(QPalette::Window, QColor(100, 50, 120)); // Violet background themePalette.setColor(QPalette::WindowText, Qt::white); // White text themePalette.setColor(QPalette::Base, QColor(80, 30, 90)); // Darker violet base @@ -116,10 +122,12 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { qApp->setPalette(themePalette); break; case Theme::Gruvbox: - mw_searchbar->setStyleSheet("background-color: #1d2021;" - "color: #f9f5d7;" - "border: 2px solid #f9f5d7;" - "padding: 5px;"); + mw_searchbar->setStyleSheet( + "QLineEdit {" + "background-color: #1d2021; color: #f9f5d7; border: 1px solid #f9f5d7; " + "border-radius: 4px; padding: 5px; }" + "QLineEdit:focus {" + "border: 1px solid #83A598; }"); themePalette.setColor(QPalette::Window, QColor(29, 32, 33)); themePalette.setColor(QPalette::WindowText, QColor(249, 245, 215)); themePalette.setColor(QPalette::Base, QColor(29, 32, 33)); From 3c8e25e8e48f5a7618d84d28a534e3530c538790 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 17 Dec 2024 02:34:43 -0800 Subject: [PATCH 23/31] fs: Fix wrong mounts being matched by partial guest path. (#1809) --- src/core/file_sys/fs.cpp | 21 +++++++++++++++++---- src/core/file_sys/fs.h | 9 ++++++--- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/core/file_sys/fs.cpp b/src/core/file_sys/fs.cpp index 0fdbb2783..92f725cc7 100644 --- a/src/core/file_sys/fs.cpp +++ b/src/core/file_sys/fs.cpp @@ -10,16 +10,28 @@ namespace Core::FileSys { +std::string RemoveTrailingSlashes(const std::string& path) { + // Remove trailing slashes to make comparisons simpler. + std::string path_sanitized = path; + while (path_sanitized.ends_with("/")) { + path_sanitized.pop_back(); + } + return path_sanitized; +} + void MntPoints::Mount(const std::filesystem::path& host_folder, const std::string& guest_folder, bool read_only) { std::scoped_lock lock{m_mutex}; - m_mnt_pairs.emplace_back(host_folder, guest_folder, read_only); + const auto guest_folder_sanitized = RemoveTrailingSlashes(guest_folder); + m_mnt_pairs.emplace_back(host_folder, guest_folder_sanitized, read_only); } void MntPoints::Unmount(const std::filesystem::path& host_folder, const std::string& guest_folder) { std::scoped_lock lock{m_mutex}; - auto it = std::remove_if(m_mnt_pairs.begin(), m_mnt_pairs.end(), - [&](const MntPair& pair) { return pair.mount == guest_folder; }); + const auto guest_folder_sanitized = RemoveTrailingSlashes(guest_folder); + auto it = std::remove_if(m_mnt_pairs.begin(), m_mnt_pairs.end(), [&](const MntPair& pair) { + return pair.mount == guest_folder_sanitized; + }); m_mnt_pairs.erase(it, m_mnt_pairs.end()); } @@ -47,7 +59,8 @@ std::filesystem::path MntPoints::GetHostPath(std::string_view path, bool* is_rea } // Nothing to do if getting the mount itself. - if (corrected_path == mount->mount) { + const auto corrected_path_sanitized = RemoveTrailingSlashes(corrected_path); + if (corrected_path_sanitized == mount->mount) { return mount->host_path; } diff --git a/src/core/file_sys/fs.h b/src/core/file_sys/fs.h index b0153c162..e219887c8 100644 --- a/src/core/file_sys/fs.h +++ b/src/core/file_sys/fs.h @@ -22,7 +22,7 @@ class MntPoints { public: struct MntPair { std::filesystem::path host_path; - std::string mount; // e.g /app0/ + std::string mount; // e.g /app0 bool read_only; }; @@ -39,8 +39,11 @@ public: const MntPair* GetMount(const std::string& guest_path) { std::scoped_lock lock{m_mutex}; - const auto it = std::ranges::find_if( - m_mnt_pairs, [&](const auto& mount) { return guest_path.starts_with(mount.mount); }); + const auto it = std::ranges::find_if(m_mnt_pairs, [&](const auto& mount) { + // When doing starts-with check, add a trailing slash to make sure we don't match + // against only part of the mount path. + return guest_path == mount.mount || guest_path.starts_with(mount.mount + "/"); + }); return it == m_mnt_pairs.end() ? nullptr : &*it; } From aa5c1c10dfa560d55c59da2d7f841c3e9e0485fc Mon Sep 17 00:00:00 2001 From: ElBread3 <92335081+ElBread3@users.noreply.github.com> Date: Tue, 17 Dec 2024 04:42:21 -0600 Subject: [PATCH 24/31] More Fixes for Separate Update (#1487) * handle getdents + fix condition + add info to description * fix not handling dents errors * to not overwrite it, only gather separate update entries when normal folder is done * fix always setting entries to 0 and guest name including "UPDATE" * reset indexes on completion * don't use concat, fixes long standing bug * make sce_module module loading take both paths into account --- src/core/file_sys/fs.cpp | 10 ++++ src/core/file_sys/fs.h | 10 ++++ src/core/libraries/kernel/file_system.cpp | 58 ++++++++++++++++++++++- src/emulator.cpp | 46 +++++++++++++----- src/qt_gui/gui_context_menus.h | 22 ++++----- src/qt_gui/translations/en.ts | 2 +- 6 files changed, 119 insertions(+), 29 deletions(-) diff --git a/src/core/file_sys/fs.cpp b/src/core/file_sys/fs.cpp index 92f725cc7..45ba67b93 100644 --- a/src/core/file_sys/fs.cpp +++ b/src/core/file_sys/fs.cpp @@ -199,4 +199,14 @@ void HandleTable::CreateStdHandles() { setup("/dev/stderr", new Devices::Logger("stderr", true)); // stderr } +int HandleTable::GetFileDescriptor(File* file) { + std::scoped_lock lock{m_mutex}; + auto it = std::find(m_files.begin(), m_files.end(), file); + + if (it != m_files.end()) { + return std::distance(m_files.begin(), it); + } + return 0; +} + } // namespace Core::FileSys diff --git a/src/core/file_sys/fs.h b/src/core/file_sys/fs.h index e219887c8..56df32ad0 100644 --- a/src/core/file_sys/fs.h +++ b/src/core/file_sys/fs.h @@ -9,6 +9,7 @@ #include #include #include "common/io_file.h" +#include "common/logging/formatter.h" #include "core/devices/base_device.h" namespace Core::FileSys { @@ -37,6 +38,14 @@ public: std::filesystem::path GetHostPath(std::string_view guest_directory, bool* is_read_only = nullptr); + const MntPair* GetMountFromHostPath(const std::string& host_path) { + std::scoped_lock lock{m_mutex}; + const auto it = std::ranges::find_if(m_mnt_pairs, [&](const MntPair& mount) { + return host_path.starts_with(std::string{fmt::UTF(mount.host_path.u8string()).data}); + }); + return it == m_mnt_pairs.end() ? nullptr : &*it; + } + const MntPair* GetMount(const std::string& guest_path) { std::scoped_lock lock{m_mutex}; const auto it = std::ranges::find_if(m_mnt_pairs, [&](const auto& mount) { @@ -86,6 +95,7 @@ public: void DeleteHandle(int d); File* GetFile(int d); File* GetFile(const std::filesystem::path& host_name); + int GetFileDescriptor(File* file); void CreateStdHandles(); diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index 5ba9976c6..57efbb631 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -695,12 +695,66 @@ static int GetDents(int fd, char* buf, int nbytes, s64* basep) { return sizeof(OrbisKernelDirent); } +static int HandleSeparateUpdateDents(int fd, char* buf, int nbytes, s64* basep) { + int dir_entries = 0; + + auto* h = Common::Singleton::Instance(); + auto* mnt = Common::Singleton::Instance(); + auto* file = h->GetFile(fd); + auto update_dir_name = std::string{fmt::UTF(file->m_host_name.u8string()).data}; + auto mount = mnt->GetMountFromHostPath(update_dir_name); + auto suffix = std::string{fmt::UTF(mount->host_path.u8string()).data}; + + size_t pos = update_dir_name.find("-UPDATE"); + if (pos != std::string::npos) { + update_dir_name.erase(pos, 7); + auto guest_name = mount->mount + "/" + update_dir_name.substr(suffix.size() + 1); + int descriptor; + + auto existent_folder = h->GetFile(update_dir_name); + if (!existent_folder) { + u32 handle = h->CreateHandle(); + auto* new_file = h->GetFile(handle); + new_file->type = Core::FileSys::FileType::Directory; + new_file->m_guest_name = guest_name; + new_file->m_host_name = update_dir_name; + if (!std::filesystem::is_directory(new_file->m_host_name)) { + h->DeleteHandle(handle); + return dir_entries; + } else { + new_file->dirents = GetDirectoryEntries(new_file->m_host_name); + new_file->dirents_index = 0; + } + new_file->is_opened = true; + descriptor = h->GetFileDescriptor(new_file); + } else { + descriptor = h->GetFileDescriptor(existent_folder); + } + + dir_entries = GetDents(descriptor, buf, nbytes, basep); + if (dir_entries == ORBIS_OK && existent_folder) { + existent_folder->dirents_index = 0; + file->dirents_index = 0; + } + } + + return dir_entries; +} + int PS4_SYSV_ABI sceKernelGetdents(int fd, char* buf, int nbytes) { - return GetDents(fd, buf, nbytes, nullptr); + int a = GetDents(fd, buf, nbytes, nullptr); + if (a == ORBIS_OK) { + return HandleSeparateUpdateDents(fd, buf, nbytes, nullptr); + } + return a; } int PS4_SYSV_ABI sceKernelGetdirentries(int fd, char* buf, int nbytes, s64* basep) { - return GetDents(fd, buf, nbytes, basep); + int a = GetDents(fd, buf, nbytes, basep); + if (a == ORBIS_OK) { + return HandleSeparateUpdateDents(fd, buf, nbytes, basep); + } + return a; } s64 PS4_SYSV_ABI sceKernelPwrite(int d, void* buf, size_t nbytes, s64 offset) { diff --git a/src/emulator.cpp b/src/emulator.cpp index c517bc284..252a34418 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include #include "common/config.h" @@ -106,9 +107,11 @@ Emulator::~Emulator() { void Emulator::Run(const std::filesystem::path& file) { // Use the eboot from the separated updates folder if it's there - std::filesystem::path game_patch_folder = file.parent_path().concat("-UPDATE"); - bool use_game_patch = std::filesystem::exists(game_patch_folder / "sce_sys"); - std::filesystem::path eboot_path = use_game_patch ? game_patch_folder / file.filename() : file; + std::filesystem::path game_patch_folder = file.parent_path(); + game_patch_folder += "-UPDATE"; + std::filesystem::path eboot_path = std::filesystem::exists(game_patch_folder / file.filename()) + ? game_patch_folder / file.filename() + : file; // Applications expect to be run from /app0 so mount the file's parent path as app0. auto* mnt = Common::Singleton::Instance(); @@ -226,20 +229,37 @@ void Emulator::Run(const std::filesystem::path& file) { LoadSystemModules(eboot_path, game_info.game_serial); // Load all prx from game's sce_module folder - std::filesystem::path sce_module_folder = file.parent_path() / "sce_module"; - if (std::filesystem::is_directory(sce_module_folder)) { - for (const auto& entry : std::filesystem::directory_iterator(sce_module_folder)) { - std::filesystem::path module_path = entry.path(); - std::filesystem::path update_module_path = - eboot_path.parent_path() / "sce_module" / entry.path().filename(); - if (std::filesystem::exists(update_module_path) && use_game_patch) { - module_path = update_module_path; + std::vector modules_to_load; + std::filesystem::path game_module_folder = file.parent_path() / "sce_module"; + if (std::filesystem::is_directory(game_module_folder)) { + for (const auto& entry : std::filesystem::directory_iterator(game_module_folder)) { + if (entry.is_regular_file()) { + modules_to_load.push_back(entry.path()); } - LOG_INFO(Loader, "Loading {}", fmt::UTF(module_path.u8string())); - linker->LoadModule(module_path); } } + // Load all prx from separate update's sce_module folder + std::filesystem::path update_module_folder = game_patch_folder / "sce_module"; + if (std::filesystem::is_directory(update_module_folder)) { + for (const auto& entry : std::filesystem::directory_iterator(update_module_folder)) { + auto it = std::find_if(modules_to_load.begin(), modules_to_load.end(), + [&entry](const std::filesystem::path& p) { + return p.filename() == entry.path().filename(); + }); + if (it != modules_to_load.end()) { + *it = entry.path(); + } else { + modules_to_load.push_back(entry.path()); + } + } + } + + for (const auto& module_path : modules_to_load) { + LOG_INFO(Loader, "Loading {}", fmt::UTF(module_path.u8string())); + linker->LoadModule(module_path); + } + #ifdef ENABLE_DISCORD_RPC // Discord RPC if (Config::getEnableDiscordRPC()) { diff --git a/src/qt_gui/gui_context_menus.h b/src/qt_gui/gui_context_menus.h index 6eef1230c..3cc12c11e 100644 --- a/src/qt_gui/gui_context_menus.h +++ b/src/qt_gui/gui_context_menus.h @@ -122,11 +122,11 @@ public: if (selected == &openSfoViewer) { PSF psf; - QString game_update_path; - Common::FS::PathToQString(game_update_path, m_games[itemID].path.concat("-UPDATE")); std::filesystem::path game_folder_path = m_games[itemID].path; - if (std::filesystem::exists(Common::FS::PathFromQString(game_update_path))) { - game_folder_path = Common::FS::PathFromQString(game_update_path); + std::filesystem::path game_update_path = game_folder_path; + game_update_path += "UPDATE"; + if (std::filesystem::exists(game_update_path)) { + game_folder_path = game_update_path; } if (psf.Open(game_folder_path / "sce_sys" / "param.sfo")) { int rows = psf.GetEntries().size(); @@ -320,21 +320,17 @@ public: bool error = false; QString folder_path, game_update_path, dlc_path; Common::FS::PathToQString(folder_path, m_games[itemID].path); - Common::FS::PathToQString(game_update_path, m_games[itemID].path.concat("-UPDATE")); + game_update_path = folder_path + "-UPDATE"; Common::FS::PathToQString( dlc_path, Config::getAddonInstallDir() / Common::FS::PathFromQString(folder_path).parent_path().filename()); QString message_type = tr("Game"); if (selected == deleteUpdate) { - if (!Config::getSeparateUpdateEnabled()) { - QMessageBox::critical(nullptr, tr("Error"), - QString(tr("requiresEnableSeparateUpdateFolder_MSG"))); - error = true; - } else if (!std::filesystem::exists( - Common::FS::PathFromQString(game_update_path))) { - QMessageBox::critical(nullptr, tr("Error"), - QString(tr("This game has no update to delete!"))); + if (!std::filesystem::exists(Common::FS::PathFromQString(game_update_path))) { + QMessageBox::critical( + nullptr, tr("Error"), + QString(tr("This game has no separate update to delete!"))); error = true; } else { folder_path = game_update_path; diff --git a/src/qt_gui/translations/en.ts b/src/qt_gui/translations/en.ts index 7ae583040..9eccec8ea 100644 --- a/src/qt_gui/translations/en.ts +++ b/src/qt_gui/translations/en.ts @@ -1159,7 +1159,7 @@ separateUpdatesCheckBox - Enable Separate Update Folder:\nEnables installing game updates into a separate folder for easy management. + Enable Separate Update Folder:\nEnables installing game updates into a separate folder for easy management.\nThis can be manually created by adding the extracted update to the game folder with the name "CUSA00000-UPDATE" where the CUSA ID matches the game's ID. From 87773a417b96417a14bab695422f70e80697f4e4 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 17 Dec 2024 05:04:19 -0800 Subject: [PATCH 25/31] mac: Choose whether system Vulkan is needed at runtime. (#1780) --- CMakeLists.txt | 13 ++- REUSE.toml | 1 + externals/MoltenVK/CMakeLists.txt | 16 ++- externals/MoltenVK/MoltenVK_icd.json | 8 ++ src/video_core/renderer_vulkan/vk_common.h | 4 - .../renderer_vulkan/vk_platform.cpp | 105 +++++++++++------- 6 files changed, 95 insertions(+), 52 deletions(-) create mode 100644 externals/MoltenVK/MoltenVK_icd.json diff --git a/CMakeLists.txt b/CMakeLists.txt index 78d8421a3..8f0397e86 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -892,11 +892,16 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") endif() if (APPLE) - option(USE_SYSTEM_VULKAN_LOADER "Enables using the system Vulkan loader instead of directly linking with MoltenVK. Useful for loading validation layers." OFF) - if (USE_SYSTEM_VULKAN_LOADER) - target_compile_definitions(shadps4 PRIVATE USE_SYSTEM_VULKAN_LOADER=1) + if (ENABLE_QT_GUI) + # Include MoltenVK in the app bundle, along with an ICD file so it can be found by the system Vulkan loader if used for loading layers. + target_sources(shadps4 PRIVATE externals/MoltenVK/MoltenVK_icd.json) + set_source_files_properties(externals/MoltenVK/MoltenVK_icd.json + PROPERTIES MACOSX_PACKAGE_LOCATION Resources/vulkan/icd.d) + add_custom_command(TARGET shadps4 POST_BUILD + COMMAND cmake -E copy $ $/Contents/Frameworks/libMoltenVK.dylib) + set_property(TARGET shadps4 APPEND PROPERTY BUILD_RPATH "@executable_path/../Frameworks") else() - # Link MoltenVK for Vulkan support + # For non-bundled SDL build, just do a normal library link. target_link_libraries(shadps4 PRIVATE MoltenVK) endif() diff --git a/REUSE.toml b/REUSE.toml index 747679c8b..cba63adf1 100644 --- a/REUSE.toml +++ b/REUSE.toml @@ -15,6 +15,7 @@ path = [ "documents/changelog.md", "documents/Quickstart/2.png", "documents/Screenshots/*", + "externals/MoltenVK/MoltenVK_icd.json", "scripts/ps4_names.txt", "src/images/about_icon.png", "src/images/controller_icon.png", diff --git a/externals/MoltenVK/CMakeLists.txt b/externals/MoltenVK/CMakeLists.txt index 00e3231ee..908c2847c 100644 --- a/externals/MoltenVK/CMakeLists.txt +++ b/externals/MoltenVK/CMakeLists.txt @@ -1,17 +1,29 @@ # SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project # SPDX-License-Identifier: GPL-2.0-or-later -# Prepare version information +# Prepare MoltenVK Git revision find_package(Git) if(GIT_FOUND) execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD OUTPUT_VARIABLE MVK_GIT_REV + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) endif() -set(MVK_VERSION "1.2.12") set(MVK_GENERATED_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}/Generated) file(WRITE ${MVK_GENERATED_INCLUDES}/mvkGitRevDerived.h "static const char* mvkRevString = \"${MVK_GIT_REV}\";") +message(STATUS "MoltenVK revision: ${MVK_GIT_REV}") + +# Prepare MoltenVK version +file(READ ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/MoltenVK/MoltenVK/API/mvk_private_api.h MVK_PRIVATE_API) +string(REGEX MATCH "#define MVK_VERSION_MAJOR [0-9]+" MVK_VERSION_MAJOR_LINE "${MVK_PRIVATE_API}") +string(REGEX MATCH "[0-9]+" MVK_VERSION_MAJOR "${MVK_VERSION_MAJOR_LINE}") +string(REGEX MATCH "#define MVK_VERSION_MINOR [0-9]+" MVK_VERSION_MINOR_LINE "${MVK_PRIVATE_API}") +string(REGEX MATCH "[0-9]+" MVK_VERSION_MINOR "${MVK_VERSION_MINOR_LINE}") +string(REGEX MATCH "#define MVK_VERSION_PATCH [0-9]+" MVK_VERSION_PATCH_LINE "${MVK_PRIVATE_API}") +string(REGEX MATCH "[0-9]+" MVK_VERSION_PATCH "${MVK_VERSION_PATCH_LINE}") +set(MVK_VERSION "${MVK_VERSION_MAJOR}.${MVK_VERSION_MINOR}.${MVK_VERSION_PATCH}") +message(STATUS "MoltenVK version: ${MVK_VERSION}") # Find required system libraries find_library(APPKIT_LIBRARY AppKit REQUIRED) diff --git a/externals/MoltenVK/MoltenVK_icd.json b/externals/MoltenVK/MoltenVK_icd.json new file mode 100644 index 000000000..2c3319263 --- /dev/null +++ b/externals/MoltenVK/MoltenVK_icd.json @@ -0,0 +1,8 @@ +{ + "file_format_version": "1.0.0", + "ICD": { + "library_path": "../../../Frameworks/libMoltenVK.dylib", + "api_version": "1.2.0", + "is_portability_driver": true + } +} diff --git a/src/video_core/renderer_vulkan/vk_common.h b/src/video_core/renderer_vulkan/vk_common.h index 9178aeb65..5fe199e0e 100644 --- a/src/video_core/renderer_vulkan/vk_common.h +++ b/src/video_core/renderer_vulkan/vk_common.h @@ -3,10 +3,6 @@ #pragma once -#if defined(__APPLE__) && !USE_SYSTEM_VULKAN_LOADER -#define VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL 0 -#endif - // Include vulkan-hpp header #define VK_ENABLE_BETA_EXTENSIONS #define VK_NO_PROTOTYPES diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index f5e513611..dbdabe0d9 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -14,6 +14,7 @@ #endif #include +#include #include "common/assert.h" #include "common/config.h" #include "common/logging/log.h" @@ -21,15 +22,6 @@ #include "sdl_window.h" #include "video_core/renderer_vulkan/vk_platform.h" -#if VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL -static vk::detail::DynamicLoader dl; -#else -extern "C" { -VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr(VkInstance instance, - const char* pName); -} -#endif - namespace Vulkan { static const char* const VALIDATION_LAYER_NAME = "VK_LAYER_KHRONOS_validation"; @@ -199,15 +191,57 @@ std::vector GetInstanceExtensions(Frontend::WindowSystemType window return extensions; } +std::vector GetInstanceLayers(bool enable_validation, bool enable_crash_diagnostic) { + const auto [properties_result, properties] = vk::enumerateInstanceLayerProperties(); + if (properties_result != vk::Result::eSuccess || properties.empty()) { + LOG_ERROR(Render_Vulkan, "Failed to query layer properties: {}", + vk::to_string(properties_result)); + return {}; + } + + std::vector layers; + layers.reserve(2); + + if (enable_validation) { + layers.push_back(VALIDATION_LAYER_NAME); + } + if (enable_crash_diagnostic) { + layers.push_back(CRASH_DIAGNOSTIC_LAYER_NAME); + } + + // Sanitize layer list + std::erase_if(layers, [&](const char* layer) -> bool { + const auto it = std::ranges::find_if(properties, [layer](const auto& prop) { + return std::strcmp(layer, prop.layerName) == 0; + }); + if (it == properties.end()) { + LOG_ERROR(Render_Vulkan, "Requested layer {} is not available", layer); + return true; + } + return false; + }); + + return layers; +} + vk::UniqueInstance CreateInstance(Frontend::WindowSystemType window_type, bool enable_validation, bool enable_crash_diagnostic) { LOG_INFO(Render_Vulkan, "Creating vulkan instance"); -#if VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL - auto vkGetInstanceProcAddr = - dl.getProcAddress("vkGetInstanceProcAddr"); +#ifdef __APPLE__ + // If the Vulkan loader exists in /usr/local/lib, give it priority. The Vulkan SDK + // installs it here by default but it is not in the default library search path. + // The loader has a clause to check for it, but at a lower priority than the bundled + // libMoltenVK.dylib, so we need to handle it ourselves to give it priority. + static const std::string usr_local_path = "/usr/local/lib/libvulkan.dylib"; + static vk::detail::DynamicLoader dl = std::filesystem::exists(usr_local_path) + ? vk::detail::DynamicLoader(usr_local_path) + : vk::detail::DynamicLoader(); +#else + static vk::detail::DynamicLoader dl; #endif - VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr); + VULKAN_HPP_DEFAULT_DISPATCHER.init( + dl.getProcAddress("vkGetInstanceProcAddr")); const auto [available_version_result, available_version] = VULKAN_HPP_DEFAULT_DISPATCHER.vkEnumerateInstanceVersion @@ -230,38 +264,25 @@ vk::UniqueInstance CreateInstance(Frontend::WindowSystemType window_type, bool e .apiVersion = available_version, }; - u32 num_layers = 0; - std::array layers; + const auto layers = GetInstanceLayers(enable_validation, enable_crash_diagnostic); - vk::Bool32 enable_force_barriers = vk::False; - const char* log_path{}; + const std::string extensions_string = fmt::format("{}", fmt::join(extensions, ", ")); + const std::string layers_string = fmt::format("{}", fmt::join(layers, ", ")); + LOG_INFO(Render_Vulkan, "Enabled instance extensions: {}", extensions_string); + LOG_INFO(Render_Vulkan, "Enabled instance layers: {}", layers_string); -#if VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL - if (enable_validation) { - layers[num_layers++] = VALIDATION_LAYER_NAME; - } + // Validation settings + vk::Bool32 enable_sync = Config::vkValidationSyncEnabled() ? vk::True : vk::False; + vk::Bool32 enable_gpuav = Config::vkValidationSyncEnabled() ? vk::True : vk::False; + const char* gpuav_mode = + Config::vkValidationGpuEnabled() ? "GPU_BASED_GPU_ASSISTED" : "GPU_BASED_NONE"; - if (enable_crash_diagnostic) { - layers[num_layers++] = CRASH_DIAGNOSTIC_LAYER_NAME; - static const auto crash_diagnostic_path = - Common::FS::GetUserPathString(Common::FS::PathType::LogDir); - log_path = crash_diagnostic_path.c_str(); - enable_force_barriers = vk::True; - } -#else - if (enable_validation || enable_crash_diagnostic) { - LOG_WARNING(Render_Vulkan, - "Skipping loading Vulkan layers as dynamic loading is not enabled."); - } -#endif + // Crash diagnostics settings + static const auto crash_diagnostic_path = + Common::FS::GetUserPathString(Common::FS::PathType::LogDir); + const char* log_path = crash_diagnostic_path.c_str(); + vk::Bool32 enable_force_barriers = vk::True; - vk::Bool32 enable_sync = - enable_validation && Config::vkValidationSyncEnabled() ? vk::True : vk::False; - vk::Bool32 enable_gpuav = - enable_validation && Config::vkValidationSyncEnabled() ? vk::True : vk::False; - const char* gpuav_mode = enable_validation && Config::vkValidationGpuEnabled() - ? "GPU_BASED_GPU_ASSISTED" - : "GPU_BASED_NONE"; const std::array layer_setings = { vk::LayerSettingEXT{ .pLayerName = VALIDATION_LAYER_NAME, @@ -331,7 +352,7 @@ vk::UniqueInstance CreateInstance(Frontend::WindowSystemType window_type, bool e vk::StructureChain instance_ci_chain = { vk::InstanceCreateInfo{ .pApplicationInfo = &application_info, - .enabledLayerCount = num_layers, + .enabledLayerCount = static_cast(layers.size()), .ppEnabledLayerNames = layers.data(), .enabledExtensionCount = static_cast(extensions.size()), .ppEnabledExtensionNames = extensions.data(), From ccfb1bbfa8ab7e536d684fd2c80b90fcfe66b5f6 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 17 Dec 2024 21:56:08 -0800 Subject: [PATCH 26/31] vk_instance: Add additional fallback for missing D16UnormS8Uint. (#1810) --- .../renderer_vulkan/vk_instance.cpp | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index b479c1464..790e76400 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -583,23 +583,20 @@ bool Instance::IsFormatSupported(const vk::Format format, return (GetFormatFeatureFlags(format) & flags) == flags; } -static vk::Format GetAlternativeFormat(const vk::Format format) { - switch (format) { - case vk::Format::eD16UnormS8Uint: - return vk::Format::eD24UnormS8Uint; - default: - return format; - } -} - vk::Format Instance::GetSupportedFormat(const vk::Format format, const vk::FormatFeatureFlags2 flags) const { - if (IsFormatSupported(format, flags)) [[likely]] { - return format; - } - const vk::Format alternative = GetAlternativeFormat(format); - if (IsFormatSupported(alternative, flags)) [[likely]] { - return alternative; + if (!IsFormatSupported(format, flags)) [[unlikely]] { + switch (format) { + case vk::Format::eD16UnormS8Uint: + if (IsFormatSupported(vk::Format::eD24UnormS8Uint, flags)) { + return vk::Format::eD24UnormS8Uint; + } + if (IsFormatSupported(vk::Format::eD32SfloatS8Uint, flags)) { + return vk::Format::eD32SfloatS8Uint; + } + default: + break; + } } return format; } From be4c38bf1c4ebd6bf669176cc5123cbf9103bd01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?= Date: Wed, 18 Dec 2024 20:48:00 +0100 Subject: [PATCH 27/31] Handle 32bit int ImageFormat (#1823) --- src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 5d2ec6f96..255a3e2b2 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -696,6 +696,10 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) { image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) { return spv::ImageFormat::R32ui; } + if (image.GetDataFmt() == AmdGpu::DataFormat::Format32 && + image.GetNumberFmt() == AmdGpu::NumberFormat::Sint) { + return spv::ImageFormat::R32i; + } if (image.GetDataFmt() == AmdGpu::DataFormat::Format32 && image.GetNumberFmt() == AmdGpu::NumberFormat::Float) { return spv::ImageFormat::R32f; From b1b4c8c48777a380e26da844e7a71cf3a94e4ce5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?= Date: Wed, 18 Dec 2024 20:57:58 +0100 Subject: [PATCH 28/31] Handle setting Vcc in Translator::SetDst64 (#1826) --- src/shader_recompiler/frontend/translate/translate.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 3031e6643..a14bff706 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -439,7 +439,8 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra ir.SetVectorReg(IR::VectorReg(operand.code + 1), hi); return ir.SetVectorReg(IR::VectorReg(operand.code), lo); case OperandField::VccLo: - UNREACHABLE(); + ir.SetVccLo(lo); + return ir.SetVccHi(hi); case OperandField::VccHi: UNREACHABLE(); case OperandField::M0: From 32435674f24708b5d0533c3960b9447870ea35f4 Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Wed, 18 Dec 2024 14:05:35 -0600 Subject: [PATCH 29/31] Misc UE4 fixes (#1821) * Add ExecLo case to S_SAVEEXEC_B64 Seen in CUSA38209 * S_BCNT1_I32_B32 Turtle said our implementation of S_BCNT1_I32_B64 was meant to be for S_BCNT1_I32_B32, so renaming the opcode is the fix. --- src/shader_recompiler/frontend/translate/scalar_alu.cpp | 8 +++++--- src/shader_recompiler/frontend/translate/translate.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index e731e299a..f96fd0f40 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -98,8 +98,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { break; case Opcode::S_BREV_B32: return S_BREV_B32(inst); - case Opcode::S_BCNT1_I32_B64: - return S_BCNT1_I32_B64(inst); + case Opcode::S_BCNT1_I32_B32: + return S_BCNT1_I32_B32(inst); case Opcode::S_FF1_I32_B32: return S_FF1_I32_B32(inst); case Opcode::S_AND_SAVEEXEC_B64: @@ -579,7 +579,7 @@ void Translator::S_BREV_B32(const GcnInst& inst) { SetDst(inst.dst[0], ir.BitReverse(GetSrc(inst.src[0]))); } -void Translator::S_BCNT1_I32_B64(const GcnInst& inst) { +void Translator::S_BCNT1_I32_B32(const GcnInst& inst) { const IR::U32 result = ir.BitCount(GetSrc(inst.src[0])); SetDst(inst.dst[0], result); ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); @@ -602,6 +602,8 @@ void Translator::S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& in return ir.GetVcc(); case OperandField::ScalarGPR: return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code)); + case OperandField::ExecLo: + return ir.GetExec(); default: UNREACHABLE(); } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 8e575fcad..218b66d74 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -110,7 +110,7 @@ public: void S_MOV_B64(const GcnInst& inst); void S_NOT_B64(const GcnInst& inst); void S_BREV_B32(const GcnInst& inst); - void S_BCNT1_I32_B64(const GcnInst& inst); + void S_BCNT1_I32_B32(const GcnInst& inst); void S_FF1_I32_B32(const GcnInst& inst); void S_GETPC_B64(u32 pc, const GcnInst& inst); void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst); From 1e0809903680dd401641ce24d964b6e75d629482 Mon Sep 17 00:00:00 2001 From: Mahmoud Adel <94652220+AboMedoz@users.noreply.github.com> Date: Wed, 18 Dec 2024 22:06:30 +0200 Subject: [PATCH 30/31] add R8Uint in image Detiling (#1812) used by InFamous, and maybe other games --- src/video_core/texture_cache/tile_manager.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index a5e09e45d..94d37c993 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -172,6 +172,7 @@ void ConvertTileToLinear(u8* dst, const u8* src, u32 width, u32 height, bool is_ vk::Format DemoteImageFormatForDetiling(vk::Format format) { switch (format) { + case vk::Format::eR8Uint: case vk::Format::eR8Unorm: return vk::Format::eR8Uint; case vk::Format::eR4G4B4A4UnormPack16: From adf4b635f743ed2bc1d4d8d18ebacdd45649f7b4 Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Wed, 18 Dec 2024 22:11:09 +0200 Subject: [PATCH 31/31] hot-fix: Proper abi on init_routine --- src/core/libraries/kernel/threads/pthread.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp index 08886c6eb..372f05bff 100644 --- a/src/core/libraries/kernel/threads/pthread.cpp +++ b/src/core/libraries/kernel/threads/pthread.cpp @@ -327,7 +327,7 @@ void PS4_SYSV_ABI sched_yield() { std::this_thread::yield(); } -int PS4_SYSV_ABI posix_pthread_once(PthreadOnce* once_control, void (*init_routine)()) { +int PS4_SYSV_ABI posix_pthread_once(PthreadOnce* once_control, void PS4_SYSV_ABI (*init_routine)()) { for (;;) { auto state = once_control->state.load(); if (state == PthreadOnceState::Done) {