Merge branch 'main' into compat-gui

This commit is contained in:
georgemoralis 2024-12-19 10:39:44 +02:00 committed by GitHub
commit c5910d3a30
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
114 changed files with 3630 additions and 864 deletions

View File

@ -89,7 +89,7 @@ jobs:
arch: amd64
- name: Configure CMake
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $env:NUMBER_OF_PROCESSORS
@ -143,7 +143,7 @@ jobs:
arch: amd64
- name: Configure CMake
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $env:NUMBER_OF_PROCESSORS
@ -174,11 +174,6 @@ jobs:
with:
xcode-version: latest
- name: Install MoltenVK
run: |
arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
arch -x86_64 /usr/local/bin/brew install molten-vk
- name: Cache CMake Configuration
uses: actions/cache@v4
env:
@ -201,7 +196,7 @@ jobs:
variant: sccache
- name: Configure CMake
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu)
@ -210,7 +205,7 @@ jobs:
run: |
mkdir upload
mv ${{github.workspace}}/build/shadps4 upload
cp $(arch -x86_64 /usr/local/bin/brew --prefix)/opt/molten-vk/lib/libMoltenVK.dylib upload
cp ${{github.workspace}}/build/externals/MoltenVK/libMoltenVK.dylib upload
tar cf shadps4-macos-sdl.tar.gz -C upload .
- uses: actions/upload-artifact@v4
with:
@ -230,11 +225,8 @@ jobs:
with:
xcode-version: latest
- name: Install MoltenVK and Setup Qt
run: |
arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
arch -x86_64 /usr/local/bin/brew install molten-vk
- uses: jurplel/install-qt-action@v4
- name: Setup Qt
uses: jurplel/install-qt-action@v4
with:
version: 6.7.3
host: mac
@ -265,7 +257,7 @@ jobs:
variant: sccache
- name: Configure CMake
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu)
@ -312,7 +304,7 @@ jobs:
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
- name: Configure CMake
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc)
@ -368,7 +360,7 @@ jobs:
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
- name: Configure CMake
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc)

14
.gitmodules vendored
View File

@ -106,4 +106,16 @@
[submodule "externals/libpng"]
path = externals/libpng
url = https://github.com/pnggroup/libpng
shallow = true
shallow = true
[submodule "externals/MoltenVK/SPIRV-Cross"]
path = externals/MoltenVK/SPIRV-Cross
url = https://github.com/KhronosGroup/SPIRV-Cross
shallow = true
[submodule "externals/MoltenVK/MoltenVK"]
path = externals/MoltenVK/MoltenVK
url = https://github.com/KhronosGroup/MoltenVK
shallow = true
[submodule "externals/MoltenVK/cereal"]
path = externals/MoltenVK/cereal
url = https://github.com/USCiLab/cereal
shallow = true

View File

@ -664,12 +664,14 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp
src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp
src/shader_recompiler/ir/passes/hull_shader_transform.cpp
src/shader_recompiler/ir/passes/identity_removal_pass.cpp
src/shader_recompiler/ir/passes/ir_passes.h
src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp
src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
src/shader_recompiler/ir/passes/ring_access_elimination.cpp
src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp
src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp
src/shader_recompiler/ir/abstract_syntax_list.h
src/shader_recompiler/ir/attribute.cpp
@ -683,6 +685,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/ir/opcodes.cpp
src/shader_recompiler/ir/opcodes.h
src/shader_recompiler/ir/opcodes.inc
src/shader_recompiler/ir/patch.cpp
src/shader_recompiler/ir/patch.h
src/shader_recompiler/ir/post_order.cpp
src/shader_recompiler/ir/post_order.h
src/shader_recompiler/ir/program.cpp
@ -877,7 +881,7 @@ endif()
create_target_directory_groups(shadps4)
target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG)
target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::SPIRV glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers)
target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers)
target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h")
target_compile_definitions(Dear_ImGui PRIVATE IMGUI_USER_CONFIG="${PROJECT_SOURCE_DIR}/src/imgui/imgui_config.h")
@ -891,13 +895,17 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
endif()
if (APPLE)
option(USE_SYSTEM_VULKAN_LOADER "Enables using the system Vulkan loader instead of directly linking with MoltenVK. Useful for loading validation layers." OFF)
if (USE_SYSTEM_VULKAN_LOADER)
target_compile_definitions(shadps4 PRIVATE USE_SYSTEM_VULKAN_LOADER=1)
if (ENABLE_QT_GUI)
# Include MoltenVK in the app bundle, along with an ICD file so it can be found by the system Vulkan loader if used for loading layers.
target_sources(shadps4 PRIVATE externals/MoltenVK/MoltenVK_icd.json)
set_source_files_properties(externals/MoltenVK/MoltenVK_icd.json
PROPERTIES MACOSX_PACKAGE_LOCATION Resources/vulkan/icd.d)
add_custom_command(TARGET shadps4 POST_BUILD
COMMAND cmake -E copy $<TARGET_LINKER_FILE:MoltenVK> $<TARGET_BUNDLE_DIR:shadps4>/Contents/Frameworks/libMoltenVK.dylib)
set_property(TARGET shadps4 APPEND PROPERTY BUILD_RPATH "@executable_path/../Frameworks")
else()
# Link MoltenVK for Vulkan support
find_library(MOLTENVK MoltenVK REQUIRED)
target_link_libraries(shadps4 PRIVATE ${MOLTENVK})
# For non-bundled SDL build, just do a normal library link.
target_link_libraries(shadps4 PRIVATE MoltenVK)
endif()
if (ARCHITECTURE STREQUAL "x86_64")
@ -1018,4 +1026,4 @@ if (ENABLE_QT_GUI AND CMAKE_SYSTEM_NAME STREQUAL "Linux")
install(FILES "dist/net.shadps4.shadPS4.metainfo.xml" DESTINATION "share/metainfo")
install(FILES ".github/shadps4.png" DESTINATION "share/icons/hicolor/512x512/apps" RENAME "net.shadps4.shadPS4.png")
install(FILES "src/images/net.shadps4.shadPS4.svg" DESTINATION "share/icons/hicolor/scalable/apps")
endif()
endif()

View File

@ -76,6 +76,13 @@ For more information on how to test, debug and report issues with the emulator o
# Keyboard mapping
| Button | Function |
|-------------|-------------|
F10 | FPS Counter
Ctrl+F10 | Video Debug Info
F11 | Fullscreen
F12 | Trigger RenderDoc Capture
> [!NOTE]
> Xbox and DualShock controllers work out of the box.

View File

@ -15,6 +15,7 @@ path = [
"documents/changelog.md",
"documents/Quickstart/2.png",
"documents/Screenshots/*",
"externals/MoltenVK/MoltenVK_icd.json",
"scripts/ps4_names.txt",
"src/images/about_icon.png",
"src/images/controller_icon.png",

View File

@ -24,23 +24,21 @@ eval $(/opt/homebrew/bin/brew shellenv)
brew install clang-format cmake
```
Next, install x86_64 Homebrew and libraries.
Next, install x86_64 Qt. You can skip these steps and move on to **Cloning and compiling** if you do not intend to build the Qt GUI.
**If you are on an ARM Mac:**
```
# Installs x86_64 Homebrew to /usr/local
arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
# Installs libraries.
arch -x86_64 /usr/local/bin/brew install molten-vk qt@6
arch -x86_64 /usr/local/bin/brew install qt@6
```
**If you are on an x86_64 Mac:**
```
brew install molten-vk qt@6
brew install qt@6
```
If you don't need the Qt GUI you can remove `qt@6` from the last command.
### Cloning and compiling:
Clone the repository recursively:

View File

@ -8,6 +8,9 @@ set_directory_properties(PROPERTIES
SYSTEM ON
)
# Set CMP0069 policy to "NEW" in order to ensure consistent behavior when building external targets with LTO enabled
set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
if (MSVC)
# Silence "deprecation" warnings
add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE -D_SCL_SECURE_NO_WARNINGS)
@ -107,7 +110,7 @@ if (NOT TARGET glslang::glslang)
set(ENABLE_OPT OFF CACHE BOOL "")
add_subdirectory(glslang)
file(COPY glslang/SPIRV DESTINATION glslang/glslang FILES_MATCHING PATTERN "*.h")
target_include_directories(SPIRV INTERFACE "${CMAKE_CURRENT_BINARY_DIR}/glslang")
target_include_directories(glslang INTERFACE "${CMAKE_CURRENT_BINARY_DIR}/glslang")
endif()
# Robin-map
@ -174,15 +177,6 @@ if (NOT TARGET PNG::PNG)
add_library(PNG::PNG ALIAS png_static)
endif()
if (APPLE)
# date
if (NOT TARGET date::date-tz)
option(BUILD_TZ_LIB "" ON)
option(USE_SYSTEM_TZ_DB "" ON)
add_subdirectory(date)
endif()
endif()
# Dear ImGui
add_library(Dear_ImGui
dear_imgui/imgui.cpp
@ -199,7 +193,7 @@ option(TRACY_ENABLE "" ON)
option(TRACY_NO_CRASH_HANDLER "" ON) # Otherwise texture cache exceptions will be treaten as a crash
option(TRACY_ON_DEMAND "" ON)
option(TRACY_NO_FRAME_IMAGE "" ON)
option(TRACY_FIBERS "" ON) # For AmdGpu frontend profiling
option(TRACY_FIBERS "" OFF) # For AmdGpu frontend profiling, disabled due to instability
option(TRACY_NO_SYSTEM_TRACING "" ON)
option(TRACY_NO_CALLSTACK "" ON)
option(TRACY_NO_CODE_TRANSFER "" ON)
@ -229,3 +223,18 @@ if (NOT TARGET stb::headers)
target_include_directories(stb INTERFACE stb)
add_library(stb::headers ALIAS stb)
endif()
# Apple-only dependencies
if (APPLE)
# date
if (NOT TARGET date::date-tz)
option(BUILD_TZ_LIB "" ON)
option(USE_SYSTEM_TZ_DB "" ON)
add_subdirectory(date)
endif()
# MoltenVK
if (NOT TARGET MoltenVK)
add_subdirectory(MoltenVK)
endif()
endif()

93
externals/MoltenVK/CMakeLists.txt vendored Normal file
View File

@ -0,0 +1,93 @@
# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
# SPDX-License-Identifier: GPL-2.0-or-later
# Prepare MoltenVK Git revision
find_package(Git)
if(GIT_FOUND)
execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD
OUTPUT_VARIABLE MVK_GIT_REV
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE)
endif()
set(MVK_GENERATED_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}/Generated)
file(WRITE ${MVK_GENERATED_INCLUDES}/mvkGitRevDerived.h "static const char* mvkRevString = \"${MVK_GIT_REV}\";")
message(STATUS "MoltenVK revision: ${MVK_GIT_REV}")
# Prepare MoltenVK version
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/MoltenVK/MoltenVK/API/mvk_private_api.h MVK_PRIVATE_API)
string(REGEX MATCH "#define MVK_VERSION_MAJOR [0-9]+" MVK_VERSION_MAJOR_LINE "${MVK_PRIVATE_API}")
string(REGEX MATCH "[0-9]+" MVK_VERSION_MAJOR "${MVK_VERSION_MAJOR_LINE}")
string(REGEX MATCH "#define MVK_VERSION_MINOR [0-9]+" MVK_VERSION_MINOR_LINE "${MVK_PRIVATE_API}")
string(REGEX MATCH "[0-9]+" MVK_VERSION_MINOR "${MVK_VERSION_MINOR_LINE}")
string(REGEX MATCH "#define MVK_VERSION_PATCH [0-9]+" MVK_VERSION_PATCH_LINE "${MVK_PRIVATE_API}")
string(REGEX MATCH "[0-9]+" MVK_VERSION_PATCH "${MVK_VERSION_PATCH_LINE}")
set(MVK_VERSION "${MVK_VERSION_MAJOR}.${MVK_VERSION_MINOR}.${MVK_VERSION_PATCH}")
message(STATUS "MoltenVK version: ${MVK_VERSION}")
# Find required system libraries
find_library(APPKIT_LIBRARY AppKit REQUIRED)
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
find_library(IOKIT_LIBRARY IOKit REQUIRED)
find_library(IOSURFACE_LIBRARY IOSurface REQUIRED)
find_library(METAL_LIBRARY Metal REQUIRED)
find_library(QUARTZCORE_LIBRARY QuartzCore REQUIRED)
# cereal
option(SKIP_PORTABILITY_TEST "" ON)
option(BUILD_DOC "" OFF)
option(BUILD_SANDBOX "" OFF)
option(SKIP_PERFORMANCE_COMPARISON "" ON)
option(SPIRV_CROSS_SKIP_INSTALL "" ON)
add_subdirectory(cereal)
# SPIRV-Cross
option(SPIRV_CROSS_CLI "" OFF)
option(SPIRV_CROSS_ENABLE_TESTS "" OFF)
option(SPIRV_CROSS_ENABLE_HLSL "" OFF)
option(SPIRV_CROSS_ENABLE_CPP "" OFF)
option(SPIRV_CROSS_SKIP_INSTALL "" ON)
add_subdirectory(SPIRV-Cross)
# Common
set(MVK_COMMON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/Common)
file(GLOB_RECURSE MVK_COMMON_SOURCES CONFIGURE_DEPENDS
${MVK_COMMON_DIR}/*.cpp
${MVK_COMMON_DIR}/*.m
${MVK_COMMON_DIR}/*.mm)
set(MVK_COMMON_INCLUDES ${MVK_COMMON_DIR})
add_library(MoltenVKCommon STATIC ${MVK_COMMON_SOURCES})
target_include_directories(MoltenVKCommon PUBLIC ${MVK_COMMON_INCLUDES})
target_compile_options(MoltenVKCommon PRIVATE -w)
# MoltenVKShaderConverter
set(MVK_SHADER_CONVERTER_DIR ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/MoltenVKShaderConverter)
file(GLOB_RECURSE MVK_SHADER_CONVERTER_SOURCES CONFIGURE_DEPENDS
${MVK_SHADER_CONVERTER_DIR}/MoltenVKShaderConverter/*.cpp
${MVK_SHADER_CONVERTER_DIR}/MoltenVKShaderConverter/*.m
${MVK_SHADER_CONVERTER_DIR}/MoltenVKShaderConverter/*.mm)
set(MVK_SHADER_CONVERTER_INCLUDES ${MVK_SHADER_CONVERTER_DIR} ${MVK_SHADER_CONVERTER_DIR}/include)
add_library(MoltenVKShaderConverter STATIC ${MVK_SHADER_CONVERTER_SOURCES})
target_include_directories(MoltenVKShaderConverter PUBLIC ${MVK_SHADER_CONVERTER_INCLUDES})
target_compile_options(MoltenVKShaderConverter PRIVATE -w)
target_link_libraries(MoltenVKShaderConverter PRIVATE spirv-cross-msl spirv-cross-reflect MoltenVKCommon)
target_compile_definitions(MoltenVKShaderConverter PRIVATE MVK_EXCLUDE_SPIRV_TOOLS=1)
# MoltenVK
set(MVK_DIR ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/MoltenVK)
file(GLOB_RECURSE MVK_SOURCES CONFIGURE_DEPENDS
${MVK_DIR}/MoltenVK/*.cpp
${MVK_DIR}/MoltenVK/*.m
${MVK_DIR}/MoltenVK/*.mm)
file(GLOB MVK_SRC_INCLUDES LIST_DIRECTORIES ON ${MVK_DIR}/MoltenVK/*)
set(MVK_INCLUDES ${MVK_SRC_INCLUDES} ${MVK_GENERATED_INCLUDES} ${MVK_DIR}/include)
add_library(MoltenVK SHARED ${MVK_SOURCES})
target_include_directories(MoltenVK PRIVATE ${MVK_INCLUDES})
target_compile_options(MoltenVK PRIVATE -w)
target_link_libraries(MoltenVK PRIVATE
${APPKIT_LIBRARY} ${FOUNDATION_LIBRARY} ${IOKIT_LIBRARY} ${IOSURFACE_LIBRARY} ${METAL_LIBRARY} ${QUARTZCORE_LIBRARY}
Vulkan::Headers cereal::cereal spirv-cross-msl MoltenVKCommon MoltenVKShaderConverter)
target_compile_definitions(MoltenVK PRIVATE MVK_FRAMEWORK_VERSION=${MVK_VERSION} MVK_USE_METAL_PRIVATE_API=1)

1
externals/MoltenVK/MoltenVK vendored Submodule

@ -0,0 +1 @@
Subproject commit 5ad3ee5d2f84342950c3fe93dec97719574d1932

8
externals/MoltenVK/MoltenVK_icd.json vendored Normal file
View File

@ -0,0 +1,8 @@
{
"file_format_version": "1.0.0",
"ICD": {
"library_path": "../../../Frameworks/libMoltenVK.dylib",
"api_version": "1.2.0",
"is_portability_driver": true
}
}

1
externals/MoltenVK/SPIRV-Cross vendored Submodule

@ -0,0 +1 @@
Subproject commit 6173e24b31f09a0c3217103a130e74c4ddec14a6

1
externals/MoltenVK/cereal vendored Submodule

@ -0,0 +1 @@
Subproject commit d1fcec807b372f04e4c1041b3058e11c12853e6e

2
externals/sirit vendored

@ -1 +1 @@
Subproject commit 6cecb95d679c82c413d1f989e0b7ad9af130600d
Subproject commit 1e74f4ef8d2a0e3221a4de51977663f342b53c35

View File

@ -422,6 +422,10 @@ void setEmulatorLanguage(std::string language) {
emulator_language = language;
}
void setGameInstallDirs(const std::vector<std::filesystem::path>& settings_install_dirs_config) {
settings_install_dirs = settings_install_dirs_config;
}
u32 getMainWindowGeometryX() {
return main_window_geometry_x;
}
@ -673,14 +677,6 @@ void save(const std::filesystem::path& path) {
data["Vulkan"]["crashDiagnostic"] = vkCrashDiagnostic;
data["Debug"]["DebugDump"] = isDebugDump;
data["Debug"]["CollectShader"] = isShaderDebug;
data["GUI"]["theme"] = mw_themes;
data["GUI"]["iconSize"] = m_icon_size;
data["GUI"]["sliderPos"] = m_slider_pos;
data["GUI"]["iconSizeGrid"] = m_icon_size_grid;
data["GUI"]["sliderPosGrid"] = m_slider_pos_grid;
data["GUI"]["gameTableMode"] = m_table_mode;
data["GUI"]["mw_width"] = m_window_size_W;
data["GUI"]["mw_height"] = m_window_size_H;
std::vector<std::string> install_dirs;
for (const auto& dirString : settings_install_dirs) {
@ -690,6 +686,44 @@ void save(const std::filesystem::path& path) {
data["GUI"]["addonInstallDir"] =
std::string{fmt::UTF(settings_addon_install_dir.u8string()).data};
data["GUI"]["emulatorLanguage"] = emulator_language;
data["Settings"]["consoleLanguage"] = m_language;
std::ofstream file(path, std::ios::binary);
file << data;
file.close();
saveMainWindow(path);
}
void saveMainWindow(const std::filesystem::path& path) {
toml::value data;
std::error_code error;
if (std::filesystem::exists(path, error)) {
try {
std::ifstream ifs;
ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit);
ifs.open(path, std::ios_base::binary);
data = toml::parse(ifs, std::string{fmt::UTF(path.filename().u8string()).data});
} catch (const std::exception& ex) {
fmt::print("Exception trying to parse config file. Exception: {}\n", ex.what());
return;
}
} else {
if (error) {
fmt::print("Filesystem error: {}\n", error.message());
}
fmt::print("Saving new configuration file {}\n", fmt::UTF(path.u8string()));
}
data["GUI"]["mw_width"] = m_window_size_W;
data["GUI"]["mw_height"] = m_window_size_H;
data["GUI"]["theme"] = mw_themes;
data["GUI"]["iconSize"] = m_icon_size;
data["GUI"]["sliderPos"] = m_slider_pos;
data["GUI"]["iconSizeGrid"] = m_icon_size_grid;
data["GUI"]["sliderPosGrid"] = m_slider_pos_grid;
data["GUI"]["gameTableMode"] = m_table_mode;
data["GUI"]["geometry_x"] = main_window_geometry_x;
data["GUI"]["geometry_y"] = main_window_geometry_y;
data["GUI"]["geometry_w"] = main_window_geometry_w;
@ -697,9 +731,6 @@ void save(const std::filesystem::path& path) {
data["GUI"]["pkgDirs"] = m_pkg_viewer;
data["GUI"]["elfDirs"] = m_elf_viewer;
data["GUI"]["recentFiles"] = m_recent_files;
data["GUI"]["emulatorLanguage"] = emulator_language;
data["Settings"]["consoleLanguage"] = m_language;
std::ofstream file(path, std::ios::binary);
file << data;

View File

@ -13,6 +13,7 @@ enum HideCursorState : s16 { Never, Idle, Always };
void load(const std::filesystem::path& path);
void save(const std::filesystem::path& path);
void saveMainWindow(const std::filesystem::path& path);
bool isNeoMode();
bool isFullscreenMode();
@ -67,6 +68,7 @@ void setNeoMode(bool enable);
void setUserName(const std::string& type);
void setUpdateChannel(const std::string& type);
void setSeparateUpdateEnabled(bool use);
void setGameInstallDirs(const std::vector<std::filesystem::path>& settings_install_dirs_config);
void setCursorState(s16 cursorState);
void setCursorHideTimeout(int newcursorHideTimeout);
@ -128,4 +130,4 @@ void setDefaultValues();
// settings
u32 GetLanguage();
}; // namespace Config
}; // namespace Config

View File

@ -17,6 +17,8 @@ static inline bool IsProfilerConnected() {
return tracy::GetProfiler().IsConnected();
}
#define TRACY_GPU_ENABLED 0
#define CUSTOM_LOCK(type, varname) \
tracy::LockableCtx varname { \
[]() -> const tracy::SourceLocationData* { \
@ -57,3 +59,11 @@ enum MarkersPalette : int {
tracy::SourceLocationData{nullptr, name, TracyFile, (uint32_t)TracyLine, 0};
#define FRAME_END FrameMark
#ifdef TRACY_FIBERS
#define FIBER_ENTER(name) TracyFiberEnter(name)
#define FIBER_EXIT TracyFiberLeave
#else
#define FIBER_ENTER(name)
#define FIBER_EXIT
#endif

View File

@ -142,45 +142,66 @@ void DebugStateImpl::PushQueueDump(QueueDump dump) {
frame.queues.push_back(std::move(dump));
}
void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
const AmdGpu::Liverpool::Regs& regs, bool is_compute) {
std::scoped_lock lock{frame_dump_list_mutex};
std::optional<RegDump*> DebugStateImpl::GetRegDump(uintptr_t base_addr, uintptr_t header_addr) {
const auto it = waiting_reg_dumps.find(header_addr);
if (it == waiting_reg_dumps.end()) {
return;
return std::nullopt;
}
auto& frame = *it->second;
waiting_reg_dumps.erase(it);
waiting_reg_dumps_dbg.erase(waiting_reg_dumps_dbg.find(header_addr));
auto& dump = frame.regs[header_addr - base_addr];
dump.regs = regs;
if (is_compute) {
dump.is_compute = true;
const auto& cs = dump.regs.cs_program;
dump.cs_data = PipelineComputerProgramDump{
.cs_program = cs,
.code = std::vector<u32>{cs.Code().begin(), cs.Code().end()},
};
} else {
for (int i = 0; i < RegDump::MaxShaderStages; i++) {
if (regs.stage_enable.IsStageEnabled(i)) {
auto stage = regs.ProgramForStage(i);
if (stage->address_lo != 0) {
auto code = stage->Code();
dump.stages[i] = PipelineShaderProgramDump{
.user_data = *stage,
.code = std::vector<u32>{code.begin(), code.end()},
};
}
return &frame.regs[header_addr - base_addr];
}
void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
const AmdGpu::Liverpool::Regs& regs) {
std::scoped_lock lock{frame_dump_list_mutex};
auto dump = GetRegDump(base_addr, header_addr);
if (!dump) {
return;
}
(*dump)->regs = regs;
for (int i = 0; i < RegDump::MaxShaderStages; i++) {
if ((*dump)->regs.stage_enable.IsStageEnabled(i)) {
auto stage = (*dump)->regs.ProgramForStage(i);
if (stage->address_lo != 0) {
auto code = stage->Code();
(*dump)->stages[i] = PipelineShaderProgramDump{
.user_data = *stage,
.code = std::vector<u32>{code.begin(), code.end()},
};
}
}
}
}
void DebugStateImpl::CollectShader(const std::string& name, vk::ShaderModule module,
std::span<const u32> spv, std::span<const u32> raw_code,
std::span<const u32> patch_spv, bool is_patched) {
shader_dump_list.emplace_back(name, module, std::vector<u32>{spv.begin(), spv.end()},
void DebugStateImpl::PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr,
const CsState& cs_state) {
std::scoped_lock lock{frame_dump_list_mutex};
auto dump = GetRegDump(base_addr, header_addr);
if (!dump) {
return;
}
(*dump)->is_compute = true;
auto& cs = (*dump)->regs.cs_program;
cs = cs_state;
(*dump)->cs_data = PipelineComputerProgramDump{
.cs_program = cs,
.code = std::vector<u32>{cs.Code().begin(), cs.Code().end()},
};
}
void DebugStateImpl::CollectShader(const std::string& name, Shader::LogicalStage l_stage,
vk::ShaderModule module, std::span<const u32> spv,
std::span<const u32> raw_code, std::span<const u32> patch_spv,
bool is_patched) {
shader_dump_list.emplace_back(name, l_stage, module, std::vector<u32>{spv.begin(), spv.end()},
std::vector<u32>{raw_code.begin(), raw_code.end()},
std::vector<u32>{patch_spv.begin(), patch_spv.end()}, is_patched);
}

View File

@ -11,7 +11,6 @@
#include <queue>
#include "common/types.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#ifdef _WIN32
@ -76,6 +75,7 @@ struct FrameDump {
struct ShaderDump {
std::string name;
Shader::LogicalStage l_stage;
vk::ShaderModule module;
std::vector<u32> spv;
@ -90,16 +90,17 @@ struct ShaderDump {
std::string cache_isa_disasm{};
std::string cache_patch_disasm{};
ShaderDump(std::string name, vk::ShaderModule module, std::vector<u32> spv,
std::vector<u32> isa, std::vector<u32> patch_spv, bool is_patched)
: name(std::move(name)), module(module), spv(std::move(spv)), isa(std::move(isa)),
patch_spv(std::move(patch_spv)), is_patched(is_patched) {}
ShaderDump(std::string name, Shader::LogicalStage l_stage, vk::ShaderModule module,
std::vector<u32> spv, std::vector<u32> isa, std::vector<u32> patch_spv,
bool is_patched)
: name(std::move(name)), l_stage(l_stage), module(module), spv(std::move(spv)),
isa(std::move(isa)), patch_spv(std::move(patch_spv)), is_patched(is_patched) {}
ShaderDump(const ShaderDump& other) = delete;
ShaderDump(ShaderDump&& other) noexcept
: name{std::move(other.name)}, module{std::move(other.module)}, spv{std::move(other.spv)},
isa{std::move(other.isa)}, patch_spv{std::move(other.patch_spv)},
patch_source{std::move(other.patch_source)},
: name{std::move(other.name)}, l_stage(other.l_stage), module{std::move(other.module)},
spv{std::move(other.spv)}, isa{std::move(other.isa)},
patch_spv{std::move(other.patch_spv)}, patch_source{std::move(other.patch_source)},
cache_spv_disasm{std::move(other.cache_spv_disasm)},
cache_isa_disasm{std::move(other.cache_isa_disasm)},
cache_patch_disasm{std::move(other.cache_patch_disasm)} {}
@ -108,6 +109,7 @@ struct ShaderDump {
if (this == &other)
return *this;
name = std::move(other.name);
l_stage = other.l_stage;
module = std::move(other.module);
spv = std::move(other.spv);
isa = std::move(other.isa);
@ -201,11 +203,17 @@ public:
void PushQueueDump(QueueDump dump);
void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
const AmdGpu::Liverpool::Regs& regs, bool is_compute = false);
const AmdGpu::Liverpool::Regs& regs);
using CsState = AmdGpu::Liverpool::ComputeProgram;
void PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr, const CsState& cs_state);
void CollectShader(const std::string& name, vk::ShaderModule module, std::span<const u32> spv,
void CollectShader(const std::string& name, Shader::LogicalStage l_stage,
vk::ShaderModule module, std::span<const u32> spv,
std::span<const u32> raw_code, std::span<const u32> patch_spv,
bool is_patched);
private:
std::optional<RegDump*> GetRegDump(uintptr_t base_addr, uintptr_t header_addr);
};
} // namespace DebugStateType

View File

@ -158,16 +158,17 @@ bool ShaderList::Selection::DrawShader(DebugStateType::ShaderDump& value) {
DebugState.ShowDebugMessage(msg);
}
if (compile) {
static std::map<std::string, std::string> stage_arg = {
{"vs", "vert"},
{"gs", "geom"},
{"fs", "frag"},
{"cs", "comp"},
static std::map<Shader::LogicalStage, std::string> stage_arg = {
{Shader::LogicalStage::Vertex, "vert"},
{Shader::LogicalStage::TessellationControl, "tesc"},
{Shader::LogicalStage::TessellationEval, "tese"},
{Shader::LogicalStage::Geometry, "geom"},
{Shader::LogicalStage::Fragment, "frag"},
{Shader::LogicalStage::Compute, "comp"},
};
auto stage = stage_arg.find(value.name.substr(0, 2));
auto stage = stage_arg.find(value.l_stage);
if (stage == stage_arg.end()) {
DebugState.ShowDebugMessage(std::string{"Invalid shader stage: "} +
value.name.substr(0, 2));
DebugState.ShowDebugMessage(std::string{"Invalid shader stage"});
} else {
std::string cmd =
fmt::format("glslc --target-env=vulkan1.3 --target-spv=spv1.6 "

View File

@ -10,16 +10,28 @@
namespace Core::FileSys {
std::string RemoveTrailingSlashes(const std::string& path) {
// Remove trailing slashes to make comparisons simpler.
std::string path_sanitized = path;
while (path_sanitized.ends_with("/")) {
path_sanitized.pop_back();
}
return path_sanitized;
}
void MntPoints::Mount(const std::filesystem::path& host_folder, const std::string& guest_folder,
bool read_only) {
std::scoped_lock lock{m_mutex};
m_mnt_pairs.emplace_back(host_folder, guest_folder, read_only);
const auto guest_folder_sanitized = RemoveTrailingSlashes(guest_folder);
m_mnt_pairs.emplace_back(host_folder, guest_folder_sanitized, read_only);
}
void MntPoints::Unmount(const std::filesystem::path& host_folder, const std::string& guest_folder) {
std::scoped_lock lock{m_mutex};
auto it = std::remove_if(m_mnt_pairs.begin(), m_mnt_pairs.end(),
[&](const MntPair& pair) { return pair.mount == guest_folder; });
const auto guest_folder_sanitized = RemoveTrailingSlashes(guest_folder);
auto it = std::remove_if(m_mnt_pairs.begin(), m_mnt_pairs.end(), [&](const MntPair& pair) {
return pair.mount == guest_folder_sanitized;
});
m_mnt_pairs.erase(it, m_mnt_pairs.end());
}
@ -47,7 +59,8 @@ std::filesystem::path MntPoints::GetHostPath(std::string_view path, bool* is_rea
}
// Nothing to do if getting the mount itself.
if (corrected_path == mount->mount) {
const auto corrected_path_sanitized = RemoveTrailingSlashes(corrected_path);
if (corrected_path_sanitized == mount->mount) {
return mount->host_path;
}
@ -186,4 +199,14 @@ void HandleTable::CreateStdHandles() {
setup("/dev/stderr", new Devices::Logger("stderr", true)); // stderr
}
int HandleTable::GetFileDescriptor(File* file) {
std::scoped_lock lock{m_mutex};
auto it = std::find(m_files.begin(), m_files.end(), file);
if (it != m_files.end()) {
return std::distance(m_files.begin(), it);
}
return 0;
}
} // namespace Core::FileSys

View File

@ -9,6 +9,7 @@
#include <vector>
#include <tsl/robin_map.h>
#include "common/io_file.h"
#include "common/logging/formatter.h"
#include "core/devices/base_device.h"
namespace Core::FileSys {
@ -22,7 +23,7 @@ class MntPoints {
public:
struct MntPair {
std::filesystem::path host_path;
std::string mount; // e.g /app0/
std::string mount; // e.g /app0
bool read_only;
};
@ -37,10 +38,21 @@ public:
std::filesystem::path GetHostPath(std::string_view guest_directory,
bool* is_read_only = nullptr);
const MntPair* GetMountFromHostPath(const std::string& host_path) {
std::scoped_lock lock{m_mutex};
const auto it = std::ranges::find_if(m_mnt_pairs, [&](const MntPair& mount) {
return host_path.starts_with(std::string{fmt::UTF(mount.host_path.u8string()).data});
});
return it == m_mnt_pairs.end() ? nullptr : &*it;
}
const MntPair* GetMount(const std::string& guest_path) {
std::scoped_lock lock{m_mutex};
const auto it = std::ranges::find_if(
m_mnt_pairs, [&](const auto& mount) { return guest_path.starts_with(mount.mount); });
const auto it = std::ranges::find_if(m_mnt_pairs, [&](const auto& mount) {
// When doing starts-with check, add a trailing slash to make sure we don't match
// against only part of the mount path.
return guest_path == mount.mount || guest_path.starts_with(mount.mount + "/");
});
return it == m_mnt_pairs.end() ? nullptr : &*it;
}
@ -83,6 +95,7 @@ public:
void DeleteHandle(int d);
File* GetFile(int d);
File* GetFile(const std::filesystem::path& host_name);
int GetFileDescriptor(File* file);
void CreateStdHandles();

View File

@ -80,7 +80,7 @@ int PS4_SYSV_ABI sceAudio3dPortGetAttributesSupported(OrbisAudio3dPortId uiPortI
int PS4_SYSV_ABI sceAudio3dPortGetQueueLevel(OrbisAudio3dPortId uiPortId, u32* pQueueLevel,
u32* pQueueAvailable) {
LOG_INFO(Lib_Audio3d, "uiPortId = {}", uiPortId);
LOG_TRACE(Lib_Audio3d, "uiPortId = {}", uiPortId);
return ORBIS_OK;
}

View File

@ -296,17 +296,12 @@ static_assert(CtxInitSequence400.size() == 0x61);
// In case if `submitDone` is issued we need to block submissions until GPU idle
static u32 submission_lock{};
std::condition_variable cv_lock{};
static std::mutex m_submission{};
std::mutex m_submission{};
static u64 frames_submitted{}; // frame counter
static bool send_init_packet{true}; // initialize HW state before first game's submit in a frame
static int sdk_version{0};
struct AscQueueInfo {
VAddr map_addr;
u32* read_addr;
u32 ring_size_dw;
};
static Common::SlotVector<AscQueueInfo> asc_queues{};
static u32 asc_next_offs_dw[Liverpool::NumComputeRings];
static constexpr VAddr tessellation_factors_ring_addr = Core::SYSTEM_RESERVED_MAX - 0xFFFFFFF;
static constexpr u32 tessellation_offchip_buffer_size = 0x800000u;
@ -493,6 +488,7 @@ int PS4_SYSV_ABI sceGnmDestroyWorkloadStream() {
}
void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
HLE_TRACE;
LOG_DEBUG(Lib_GnmDriver, "vqid {}, offset_dw {}", gnm_vqid, next_offs_dw);
if (gnm_vqid == 0) {
@ -506,11 +502,19 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
}
auto vqid = gnm_vqid - 1;
auto& asc_queue = asc_queues[{vqid}];
const auto* acb_ptr = reinterpret_cast<const u32*>(asc_queue.map_addr + *asc_queue.read_addr);
const auto acb_size = next_offs_dw ? (next_offs_dw << 2u) - *asc_queue.read_addr
: (asc_queue.ring_size_dw << 2u) - *asc_queue.read_addr;
const std::span acb_span{acb_ptr, acb_size >> 2u};
auto& asc_queue = liverpool->asc_queues[{vqid}];
const auto& offs_dw = asc_next_offs_dw[vqid];
if (next_offs_dw < offs_dw) {
ASSERT_MSG(next_offs_dw == 0, "ACB submission is split at the end of ring buffer");
}
const auto* acb_ptr = reinterpret_cast<const u32*>(asc_queue.map_addr) + offs_dw;
const auto acb_size_dw = (next_offs_dw ? next_offs_dw : asc_queue.ring_size_dw) - offs_dw;
const std::span acb_span{acb_ptr, acb_size_dw};
asc_next_offs_dw[vqid] = next_offs_dw;
if (DebugState.DumpingCurrentFrame()) {
static auto last_frame_num = -1LL;
@ -545,9 +549,6 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
});
}
liverpool->SubmitAsc(gnm_vqid, acb_span);
*asc_queue.read_addr += acb_size;
*asc_queue.read_addr %= asc_queue.ring_size_dw * 4;
}
void PS4_SYSV_ABI sceGnmDingDongForWorkload(u32 gnm_vqid, u32 next_offs_dw, u64 workload_id) {
@ -971,7 +972,7 @@ s32 PS4_SYSV_ABI sceGnmFindResourcesPublic() {
}
void PS4_SYSV_ABI sceGnmFlushGarlic() {
LOG_WARNING(Lib_GnmDriver, "(STUBBED) called");
LOG_TRACE(Lib_GnmDriver, "(STUBBED) called");
}
int PS4_SYSV_ABI sceGnmGetCoredumpAddress() {
@ -1266,12 +1267,16 @@ int PS4_SYSV_ABI sceGnmMapComputeQueue(u32 pipe_id, u32 queue_id, VAddr ring_bas
return ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_READ_PTR_ADDR;
}
auto vqid = asc_queues.insert(VAddr(ring_base_addr), read_ptr_addr, ring_size_dw);
const auto vqid =
liverpool->asc_queues.insert(VAddr(ring_base_addr), read_ptr_addr, ring_size_dw, pipe_id);
// We need to offset index as `dingDong` assumes it to be from the range [1..64]
const auto gnm_vqid = vqid.index + 1;
LOG_INFO(Lib_GnmDriver, "ASC pipe {} queue {} mapped to vqueue {}", pipe_id, queue_id,
gnm_vqid);
const auto& queue = liverpool->asc_queues[vqid];
*queue.read_addr = 0u;
return gnm_vqid;
}
@ -1642,7 +1647,6 @@ s32 PS4_SYSV_ABI sceGnmSetGsShader(u32* cmdbuf, u32 size, const u32* gs_regs) {
s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u32 param4) {
LOG_TRACE(Lib_GnmDriver, "called");
if (!cmdbuf || size < 0x1E) {
return -1;
}
@ -1660,11 +1664,13 @@ s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u3
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x108u, hs_regs[0], 0u); // SPI_SHADER_PGM_LO_HS
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x10au, hs_regs[2],
hs_regs[3]); // SPI_SHADER_PGM_RSRC1_HS/SPI_SHADER_PGM_RSRC2_HS
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u, hs_regs[5],
hs_regs[5]); // VGT_HOS_MAX_TESS_LEVEL
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u,
hs_regs[5], // VGT_HOS_MAX_TESS_LEVEL
hs_regs[6]); // VGT_HOS_MIN_TESS_LEVEL
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2dbu, hs_regs[4]); // VGT_TF_PARAM
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2d6u, param4); // VGT_LS_HS_CONFIG
// right padding?
WriteTrailingNop<11>(cmdbuf);
return ORBIS_OK;
}
@ -2161,6 +2167,7 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(u32 workload, u32 count,
u32* dcb_sizes_in_bytes,
const u32* ccb_gpu_addrs[],
u32* ccb_sizes_in_bytes) {
HLE_TRACE;
LOG_DEBUG(Lib_GnmDriver, "called");
if (!dcb_gpu_addrs || !dcb_sizes_in_bytes) {
@ -2253,6 +2260,7 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[
}
int PS4_SYSV_ABI sceGnmSubmitDone() {
HLE_TRACE;
LOG_DEBUG(Lib_GnmDriver, "called");
WaitGpuIdle();
if (!liverpool->IsGpuIdle()) {

View File

@ -695,12 +695,66 @@ static int GetDents(int fd, char* buf, int nbytes, s64* basep) {
return sizeof(OrbisKernelDirent);
}
static int HandleSeparateUpdateDents(int fd, char* buf, int nbytes, s64* basep) {
int dir_entries = 0;
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
auto* file = h->GetFile(fd);
auto update_dir_name = std::string{fmt::UTF(file->m_host_name.u8string()).data};
auto mount = mnt->GetMountFromHostPath(update_dir_name);
auto suffix = std::string{fmt::UTF(mount->host_path.u8string()).data};
size_t pos = update_dir_name.find("-UPDATE");
if (pos != std::string::npos) {
update_dir_name.erase(pos, 7);
auto guest_name = mount->mount + "/" + update_dir_name.substr(suffix.size() + 1);
int descriptor;
auto existent_folder = h->GetFile(update_dir_name);
if (!existent_folder) {
u32 handle = h->CreateHandle();
auto* new_file = h->GetFile(handle);
new_file->type = Core::FileSys::FileType::Directory;
new_file->m_guest_name = guest_name;
new_file->m_host_name = update_dir_name;
if (!std::filesystem::is_directory(new_file->m_host_name)) {
h->DeleteHandle(handle);
return dir_entries;
} else {
new_file->dirents = GetDirectoryEntries(new_file->m_host_name);
new_file->dirents_index = 0;
}
new_file->is_opened = true;
descriptor = h->GetFileDescriptor(new_file);
} else {
descriptor = h->GetFileDescriptor(existent_folder);
}
dir_entries = GetDents(descriptor, buf, nbytes, basep);
if (dir_entries == ORBIS_OK && existent_folder) {
existent_folder->dirents_index = 0;
file->dirents_index = 0;
}
}
return dir_entries;
}
int PS4_SYSV_ABI sceKernelGetdents(int fd, char* buf, int nbytes) {
return GetDents(fd, buf, nbytes, nullptr);
int a = GetDents(fd, buf, nbytes, nullptr);
if (a == ORBIS_OK) {
return HandleSeparateUpdateDents(fd, buf, nbytes, nullptr);
}
return a;
}
int PS4_SYSV_ABI sceKernelGetdirentries(int fd, char* buf, int nbytes, s64* basep) {
return GetDents(fd, buf, nbytes, basep);
int a = GetDents(fd, buf, nbytes, basep);
if (a == ORBIS_OK) {
return HandleSeparateUpdateDents(fd, buf, nbytes, basep);
}
return a;
}
s64 PS4_SYSV_ABI sceKernelPwrite(int d, void* buf, size_t nbytes, s64 offset) {

View File

@ -50,6 +50,9 @@ s32 PS4_SYSV_ABI sceKernelLoadStartModule(const char* moduleFileName, size_t arg
return handle;
}
handle = linker->LoadModule(path, true);
if (handle == -1) {
return ORBIS_KERNEL_ERROR_ESRCH;
}
auto* module = linker->GetModule(handle);
linker->RelocateAnyImports(module);

View File

@ -327,7 +327,8 @@ void PS4_SYSV_ABI sched_yield() {
std::this_thread::yield();
}
int PS4_SYSV_ABI posix_pthread_once(PthreadOnce* once_control, void (*init_routine)()) {
int PS4_SYSV_ABI posix_pthread_once(PthreadOnce* once_control,
void PS4_SYSV_ABI (*init_routine)()) {
for (;;) {
auto state = once_control->state.load();
if (state == PthreadOnceState::Done) {

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cmath>
#include <cstdio>
#include "common/assert.h"
#include "common/logging/log.h"
@ -65,6 +66,15 @@ char* PS4_SYSV_ABI internal_strncpy(char* dest, const char* src, std::size_t cou
return std::strncpy(dest, src, count);
}
int PS4_SYSV_ABI internal_strncpy_s(char* dest, size_t destsz, const char* src, size_t count) {
#ifdef _WIN64
return strncpy_s(dest, destsz, src, count);
#else
std::strcpy(dest, src);
return 0;
#endif
}
char* PS4_SYSV_ABI internal_strcat(char* dest, const char* src) {
return std::strcat(dest, src);
}
@ -237,6 +247,8 @@ void RegisterlibSceLibcInternal(Core::Loader::SymbolsResolver* sym) {
internal_strlen);
LIB_FUNCTION("6sJWiWSRuqk", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1,
internal_strncpy);
LIB_FUNCTION("YNzNkJzYqEg", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1,
internal_strncpy_s);
LIB_FUNCTION("Ls4tzzhimqQ", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1,
internal_strcat);
LIB_FUNCTION("ob5xAW4ln-0", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1,

View File

@ -38,21 +38,22 @@ void TrophyUI::Finish() {
void TrophyUI::Draw() {
const auto& io = GetIO();
float AdjustWidth = io.DisplaySize.x / 1280;
float AdjustHeight = io.DisplaySize.y / 720;
const ImVec2 window_size{
std::min(io.DisplaySize.x, 250.f),
std::min(io.DisplaySize.y, 70.f),
std::min(io.DisplaySize.x, (300 * AdjustWidth)),
std::min(io.DisplaySize.y, (70 * AdjustHeight)),
};
SetNextWindowSize(window_size);
SetNextWindowCollapsed(false);
SetNextWindowPos(ImVec2(io.DisplaySize.x - 250, 50));
SetNextWindowPos(ImVec2(io.DisplaySize.x - (300 * AdjustWidth), (50 * AdjustHeight)));
KeepNavHighlight();
if (Begin("Trophy Window", nullptr,
ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_NoSavedSettings |
ImGuiWindowFlags_NoInputs)) {
if (trophy_icon) {
Image(trophy_icon.GetTexture().im_id, ImVec2(50, 50));
Image(trophy_icon.GetTexture().im_id, ImVec2((50 * AdjustWidth), (50 * AdjustHeight)));
ImGui::SameLine();
} else {
// placeholder
@ -61,6 +62,7 @@ void TrophyUI::Draw() {
GetColorU32(ImVec4{0.7f}));
ImGui::Indent(60);
}
SetWindowFontScale((1.2 * AdjustHeight));
TextWrapped("Trophy earned!\n%s", trophy_name.c_str());
}
End();

View File

@ -155,6 +155,9 @@ int PS4_SYSV_ABI scePadGetFeatureReport() {
}
int PS4_SYSV_ABI scePadGetHandle(s32 userId, s32 type, s32 index) {
if (userId == -1) {
return ORBIS_PAD_ERROR_DEVICE_NO_HANDLE;
}
LOG_DEBUG(Lib_Pad, "(DUMMY) called");
return 1;
}
@ -246,6 +249,9 @@ int PS4_SYSV_ABI scePadMbusTerm() {
int PS4_SYSV_ABI scePadOpen(s32 userId, s32 type, s32 index, const OrbisPadOpenParam* pParam) {
LOG_INFO(Lib_Pad, "(DUMMY) called user_id = {} type = {} index = {}", userId, type, index);
if (userId == -1) {
return ORBIS_PAD_ERROR_DEVICE_NO_HANDLE;
}
if (Config::getUseSpecialPad()) {
if (type != ORBIS_PAD_PORT_TYPE_SPECIAL)
return ORBIS_PAD_ERROR_DEVICE_NOT_CONNECTED;
@ -346,6 +352,9 @@ int PS4_SYSV_ABI scePadReadHistory() {
}
int PS4_SYSV_ABI scePadReadState(s32 handle, OrbisPadData* pData) {
if (handle == ORBIS_PAD_ERROR_DEVICE_NO_HANDLE) {
return ORBIS_PAD_ERROR_INVALID_HANDLE;
}
auto* controller = Common::Singleton<Input::GameController>::Instance();
int connectedCount = 0;
bool isConnected = false;

View File

@ -137,8 +137,8 @@ s32 PS4_SYSV_ABI scePlayGoGetLanguageMask(OrbisPlayGoHandle handle,
s32 PS4_SYSV_ABI scePlayGoGetLocus(OrbisPlayGoHandle handle, const OrbisPlayGoChunkId* chunkIds,
uint32_t numberOfEntries, OrbisPlayGoLocus* outLoci) {
LOG_INFO(Lib_PlayGo, "called handle = {}, chunkIds = {}, numberOfEntries = {}", handle,
*chunkIds, numberOfEntries);
LOG_DEBUG(Lib_PlayGo, "called handle = {}, chunkIds = {}, numberOfEntries = {}", handle,
*chunkIds, numberOfEntries);
if (handle != PlaygoHandle) {
return ORBIS_PLAYGO_ERROR_BAD_HANDLE;

View File

@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <set>
#include <fmt/core.h>
#include "common/config.h"
@ -100,15 +101,17 @@ Emulator::Emulator() {
Emulator::~Emulator() {
const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir);
Config::save(config_dir / "config.toml");
Config::saveMainWindow(config_dir / "config.toml");
}
void Emulator::Run(const std::filesystem::path& file) {
// Use the eboot from the separated updates folder if it's there
std::filesystem::path game_patch_folder = file.parent_path().concat("-UPDATE");
bool use_game_patch = std::filesystem::exists(game_patch_folder / "sce_sys");
std::filesystem::path eboot_path = use_game_patch ? game_patch_folder / file.filename() : file;
std::filesystem::path game_patch_folder = file.parent_path();
game_patch_folder += "-UPDATE";
std::filesystem::path eboot_path = std::filesystem::exists(game_patch_folder / file.filename())
? game_patch_folder / file.filename()
: file;
// Applications expect to be run from /app0 so mount the file's parent path as app0.
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
@ -226,20 +229,37 @@ void Emulator::Run(const std::filesystem::path& file) {
LoadSystemModules(eboot_path, game_info.game_serial);
// Load all prx from game's sce_module folder
std::filesystem::path sce_module_folder = file.parent_path() / "sce_module";
if (std::filesystem::is_directory(sce_module_folder)) {
for (const auto& entry : std::filesystem::directory_iterator(sce_module_folder)) {
std::filesystem::path module_path = entry.path();
std::filesystem::path update_module_path =
eboot_path.parent_path() / "sce_module" / entry.path().filename();
if (std::filesystem::exists(update_module_path) && use_game_patch) {
module_path = update_module_path;
std::vector<std::filesystem::path> modules_to_load;
std::filesystem::path game_module_folder = file.parent_path() / "sce_module";
if (std::filesystem::is_directory(game_module_folder)) {
for (const auto& entry : std::filesystem::directory_iterator(game_module_folder)) {
if (entry.is_regular_file()) {
modules_to_load.push_back(entry.path());
}
LOG_INFO(Loader, "Loading {}", fmt::UTF(module_path.u8string()));
linker->LoadModule(module_path);
}
}
// Load all prx from separate update's sce_module folder
std::filesystem::path update_module_folder = game_patch_folder / "sce_module";
if (std::filesystem::is_directory(update_module_folder)) {
for (const auto& entry : std::filesystem::directory_iterator(update_module_folder)) {
auto it = std::find_if(modules_to_load.begin(), modules_to_load.end(),
[&entry](const std::filesystem::path& p) {
return p.filename() == entry.path().filename();
});
if (it != modules_to_load.end()) {
*it = entry.path();
} else {
modules_to_load.push_back(entry.path());
}
}
}
for (const auto& module_path : modules_to_load) {
LOG_INFO(Loader, "Loading {}", fmt::UTF(module_path.u8string()));
linker->LoadModule(module_path);
}
#ifdef ENABLE_DISCORD_RPC
// Discord RPC
if (Config::getEnableDiscordRPC()) {
@ -266,7 +286,7 @@ void Emulator::Run(const std::filesystem::path& file) {
}
void Emulator::LoadSystemModules(const std::filesystem::path& file, std::string game_serial) {
constexpr std::array<SysModules, 10> ModulesToLoad{
constexpr std::array<SysModules, 13> ModulesToLoad{
{{"libSceNgs2.sprx", &Libraries::Ngs2::RegisterlibSceNgs2},
{"libSceFiber.sprx", &Libraries::Fiber::RegisterlibSceFiber},
{"libSceUlt.sprx", nullptr},
@ -276,7 +296,10 @@ void Emulator::LoadSystemModules(const std::filesystem::path& file, std::string
{"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap},
{"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc},
{"libSceJpegEnc.sprx", &Libraries::JpegEnc::RegisterlibSceJpegEnc},
{"libSceCesCs.sprx", nullptr}}};
{"libSceCesCs.sprx", nullptr},
{"libSceFont.sprx", nullptr},
{"libSceFontFt.sprx", nullptr},
{"libSceFreeTypeOt.sprx", nullptr}}};
std::vector<std::filesystem::path> found_modules;
const auto& sys_module_path = Common::FS::GetUserPath(Common::FS::PathType::SysModuleDir);

View File

@ -139,7 +139,7 @@ void GameListFrame::PopulateGameList() {
formattedPlayTime = formattedPlayTime.trimmed();
m_game_info->m_games[i].play_time = playTime.toStdString();
if (formattedPlayTime.isEmpty()) {
SetTableItem(i, 8, "0");
SetTableItem(i, 8, QString("%1s").arg(seconds));
} else {
SetTableItem(i, 8, formattedPlayTime);
}

View File

@ -122,11 +122,11 @@ public:
if (selected == &openSfoViewer) {
PSF psf;
QString game_update_path;
Common::FS::PathToQString(game_update_path, m_games[itemID].path.concat("-UPDATE"));
std::filesystem::path game_folder_path = m_games[itemID].path;
if (std::filesystem::exists(Common::FS::PathFromQString(game_update_path))) {
game_folder_path = Common::FS::PathFromQString(game_update_path);
std::filesystem::path game_update_path = game_folder_path;
game_update_path += "UPDATE";
if (std::filesystem::exists(game_update_path)) {
game_folder_path = game_update_path;
}
if (psf.Open(game_folder_path / "sce_sys" / "param.sfo")) {
int rows = psf.GetEntries().size();
@ -320,21 +320,17 @@ public:
bool error = false;
QString folder_path, game_update_path, dlc_path;
Common::FS::PathToQString(folder_path, m_games[itemID].path);
Common::FS::PathToQString(game_update_path, m_games[itemID].path.concat("-UPDATE"));
game_update_path = folder_path + "-UPDATE";
Common::FS::PathToQString(
dlc_path, Config::getAddonInstallDir() /
Common::FS::PathFromQString(folder_path).parent_path().filename());
QString message_type = tr("Game");
if (selected == deleteUpdate) {
if (!Config::getSeparateUpdateEnabled()) {
QMessageBox::critical(nullptr, tr("Error"),
QString(tr("requiresEnableSeparateUpdateFolder_MSG")));
error = true;
} else if (!std::filesystem::exists(
Common::FS::PathFromQString(game_update_path))) {
QMessageBox::critical(nullptr, tr("Error"),
QString(tr("This game has no update to delete!")));
if (!std::filesystem::exists(Common::FS::PathFromQString(game_update_path))) {
QMessageBox::critical(
nullptr, tr("Error"),
QString(tr("This game has no separate update to delete!")));
error = true;
} else {
folder_path = game_update_path;

View File

@ -35,7 +35,7 @@ MainWindow::MainWindow(QWidget* parent) : QMainWindow(parent), ui(new Ui::MainWi
MainWindow::~MainWindow() {
SaveWindowState();
const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir);
Config::save(config_dir / "config.toml");
Config::saveMainWindow(config_dir / "config.toml");
}
bool MainWindow::Init() {
@ -111,6 +111,7 @@ void MainWindow::CreateActions() {
m_theme_act_group->addAction(ui->setThemeGreen);
m_theme_act_group->addAction(ui->setThemeBlue);
m_theme_act_group->addAction(ui->setThemeViolet);
m_theme_act_group->addAction(ui->setThemeGruvbox);
}
void MainWindow::AddUiWidgets() {
@ -542,6 +543,14 @@ void MainWindow::CreateConnects() {
isIconBlack = false;
}
});
connect(ui->setThemeGruvbox, &QAction::triggered, &m_window_themes, [this]() {
m_window_themes.SetWindowTheme(Theme::Gruvbox, ui->mw_searchbar);
Config::setMainWindowTheme(static_cast<int>(Theme::Gruvbox));
if (isIconBlack) {
SetUiIcons(false);
isIconBlack = false;
}
});
}
void MainWindow::StartGame() {
@ -915,6 +924,11 @@ void MainWindow::SetLastUsedTheme() {
isIconBlack = false;
SetUiIcons(false);
break;
case Theme::Gruvbox:
ui->setThemeGruvbox->setChecked(true);
isIconBlack = false;
SetUiIcons(false);
break;
}
}
@ -1008,7 +1022,7 @@ void MainWindow::AddRecentFiles(QString filePath) {
}
Config::setRecentFiles(vec);
const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir);
Config::save(config_dir / "config.toml");
Config::saveMainWindow(config_dir / "config.toml");
CreateRecentGameActions(); // Refresh the QActions.
}
@ -1079,4 +1093,4 @@ bool MainWindow::eventFilter(QObject* obj, QEvent* event) {
}
}
return QMainWindow::eventFilter(obj, event);
}
}

View File

@ -8,14 +8,15 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) {
switch (theme) {
case Theme::Dark:
mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background
"color: #ffffff;" // White text
"border: 2px solid #ffffff;" // White border
"padding: 5px;");
mw_searchbar->setStyleSheet(
"QLineEdit {"
"background-color: #1e1e1e; color: #ffffff; border: 1px solid #ffffff; "
"border-radius: 4px; padding: 5px; }"
"QLineEdit:focus {"
"border: 1px solid #2A82DA; }");
themePalette.setColor(QPalette::Window, QColor(50, 50, 50));
themePalette.setColor(QPalette::WindowText, Qt::white);
themePalette.setColor(QPalette::Base, QColor(20, 20, 20));
themePalette.setColor(QPalette::AlternateBase, QColor(25, 25, 25));
themePalette.setColor(QPalette::AlternateBase, QColor(53, 53, 53));
themePalette.setColor(QPalette::ToolTipBase, Qt::white);
themePalette.setColor(QPalette::ToolTipText, Qt::white);
@ -28,12 +29,13 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) {
themePalette.setColor(QPalette::HighlightedText, Qt::black);
qApp->setPalette(themePalette);
break;
case Theme::Light:
mw_searchbar->setStyleSheet("background-color: #ffffff;" // Light gray background
"color: #000000;" // Black text
"border: 2px solid #000000;" // Black border
"padding: 5px;");
mw_searchbar->setStyleSheet(
"QLineEdit {"
"background-color: #ffffff; color: #000000; border: 1px solid #000000; "
"border-radius: 4px; padding: 5px; }"
"QLineEdit:focus {"
"border: 1px solid #2A82DA; }");
themePalette.setColor(QPalette::Window, QColor(240, 240, 240)); // Light gray
themePalette.setColor(QPalette::WindowText, Qt::black); // Black
themePalette.setColor(QPalette::Base, QColor(230, 230, 230, 80)); // Grayish
@ -48,12 +50,13 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) {
themePalette.setColor(QPalette::HighlightedText, Qt::white); // White
qApp->setPalette(themePalette);
break;
case Theme::Green:
mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background
"color: #ffffff;" // White text
"border: 2px solid #ffffff;" // White border
"padding: 5px;");
mw_searchbar->setStyleSheet(
"QLineEdit {"
"background-color: #192819; color: #ffffff; border: 1px solid #ffffff; "
"border-radius: 4px; padding: 5px; }"
"QLineEdit:focus {"
"border: 1px solid #2A82DA; }");
themePalette.setColor(QPalette::Window, QColor(53, 69, 53)); // Dark green background
themePalette.setColor(QPalette::WindowText, Qt::white); // White text
themePalette.setColor(QPalette::Base, QColor(25, 40, 25)); // Darker green base
@ -68,15 +71,15 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) {
themePalette.setColor(QPalette::Link, QColor(42, 130, 218)); // Light blue links
themePalette.setColor(QPalette::Highlight, QColor(42, 130, 218)); // Light blue highlight
themePalette.setColor(QPalette::HighlightedText, Qt::black); // Black highlighted text
qApp->setPalette(themePalette);
break;
case Theme::Blue:
mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background
"color: #ffffff;" // White text
"border: 2px solid #ffffff;" // White border
"padding: 5px;");
mw_searchbar->setStyleSheet(
"QLineEdit {"
"background-color: #14283c; color: #ffffff; border: 1px solid #ffffff; "
"border-radius: 4px; padding: 5px; }"
"QLineEdit:focus {"
"border: 1px solid #2A82DA; }");
themePalette.setColor(QPalette::Window, QColor(40, 60, 90)); // Dark blue background
themePalette.setColor(QPalette::WindowText, Qt::white); // White text
themePalette.setColor(QPalette::Base, QColor(20, 40, 60)); // Darker blue base
@ -94,12 +97,13 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) {
qApp->setPalette(themePalette);
break;
case Theme::Violet:
mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background
"color: #ffffff;" // White text
"border: 2px solid #ffffff;" // White border
"padding: 5px;");
mw_searchbar->setStyleSheet(
"QLineEdit {"
"background-color: #501e5a; color: #ffffff; border: 1px solid #ffffff; "
"border-radius: 4px; padding: 5px; }"
"QLineEdit:focus {"
"border: 1px solid #2A82DA; }");
themePalette.setColor(QPalette::Window, QColor(100, 50, 120)); // Violet background
themePalette.setColor(QPalette::WindowText, Qt::white); // White text
themePalette.setColor(QPalette::Base, QColor(80, 30, 90)); // Darker violet base
@ -115,6 +119,28 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) {
themePalette.setColor(QPalette::Highlight, QColor(42, 130, 218)); // Light blue highlight
themePalette.setColor(QPalette::HighlightedText, Qt::black); // Black highlighted text
qApp->setPalette(themePalette);
break;
case Theme::Gruvbox:
mw_searchbar->setStyleSheet(
"QLineEdit {"
"background-color: #1d2021; color: #f9f5d7; border: 1px solid #f9f5d7; "
"border-radius: 4px; padding: 5px; }"
"QLineEdit:focus {"
"border: 1px solid #83A598; }");
themePalette.setColor(QPalette::Window, QColor(29, 32, 33));
themePalette.setColor(QPalette::WindowText, QColor(249, 245, 215));
themePalette.setColor(QPalette::Base, QColor(29, 32, 33));
themePalette.setColor(QPalette::AlternateBase, QColor(50, 48, 47));
themePalette.setColor(QPalette::ToolTipBase, QColor(249, 245, 215));
themePalette.setColor(QPalette::ToolTipText, QColor(249, 245, 215));
themePalette.setColor(QPalette::Text, QColor(249, 245, 215));
themePalette.setColor(QPalette::Button, QColor(40, 40, 40));
themePalette.setColor(QPalette::ButtonText, QColor(249, 245, 215));
themePalette.setColor(QPalette::BrightText, QColor(251, 73, 52));
themePalette.setColor(QPalette::Link, QColor(131, 165, 152));
themePalette.setColor(QPalette::Highlight, QColor(131, 165, 152));
themePalette.setColor(QPalette::HighlightedText, Qt::black);
qApp->setPalette(themePalette);
break;
}

View File

@ -7,13 +7,7 @@
#include <QLineEdit>
#include <QWidget>
enum class Theme : int {
Dark,
Light,
Green,
Blue,
Violet,
};
enum class Theme : int { Dark, Light, Green, Blue, Violet, Gruvbox };
class WindowThemes : public QObject {
Q_OBJECT

View File

@ -36,6 +36,7 @@ public:
QAction* setThemeGreen;
QAction* setThemeBlue;
QAction* setThemeViolet;
QAction* setThemeGruvbox;
QWidget* centralWidget;
QLineEdit* mw_searchbar;
QPushButton* playButton;
@ -158,6 +159,9 @@ public:
setThemeViolet = new QAction(MainWindow);
setThemeViolet->setObjectName("setThemeViolet");
setThemeViolet->setCheckable(true);
setThemeGruvbox = new QAction(MainWindow);
setThemeGruvbox->setObjectName("setThemeGruvbox");
setThemeGruvbox->setCheckable(true);
centralWidget = new QWidget(MainWindow);
centralWidget->setObjectName("centralWidget");
sizePolicy.setHeightForWidth(centralWidget->sizePolicy().hasHeightForWidth());
@ -282,6 +286,7 @@ public:
menuThemes->addAction(setThemeGreen);
menuThemes->addAction(setThemeBlue);
menuThemes->addAction(setThemeViolet);
menuThemes->addAction(setThemeGruvbox);
menuGame_List_Icons->addAction(setIconSizeTinyAct);
menuGame_List_Icons->addAction(setIconSizeSmallAct);
menuGame_List_Icons->addAction(setIconSizeMediumAct);
@ -368,6 +373,7 @@ public:
setThemeGreen->setText(QCoreApplication::translate("MainWindow", "Green", nullptr));
setThemeBlue->setText(QCoreApplication::translate("MainWindow", "Blue", nullptr));
setThemeViolet->setText(QCoreApplication::translate("MainWindow", "Violet", nullptr));
setThemeGruvbox->setText("Gruvbox");
toolBar->setWindowTitle(QCoreApplication::translate("MainWindow", "toolBar", nullptr));
} // retranslateUi
};

View File

@ -12,12 +12,13 @@
#ifdef ENABLE_UPDATER
#include "check_update.h"
#endif
#include <toml.hpp>
#include "common/logging/backend.h"
#include "common/logging/filter.h"
#include "common/logging/formatter.h"
#include "main_window.h"
#include "settings_dialog.h"
#include "ui_settings_dialog.h"
QStringList languageNames = {"Arabic",
"Czech",
"Danish",
@ -94,13 +95,18 @@ SettingsDialog::SettingsDialog(std::span<const QString> physical_devices, QWidge
connect(ui->buttonBox, &QDialogButtonBox::clicked, this,
[this, config_dir](QAbstractButton* button) {
if (button == ui->buttonBox->button(QDialogButtonBox::Save)) {
UpdateSettings();
Config::save(config_dir / "config.toml");
QWidget::close();
} else if (button == ui->buttonBox->button(QDialogButtonBox::Apply)) {
UpdateSettings();
Config::save(config_dir / "config.toml");
} else if (button == ui->buttonBox->button(QDialogButtonBox::RestoreDefaults)) {
Config::setDefaultValues();
Config::save(config_dir / "config.toml");
LoadValuesFromConfig();
} else if (button == ui->buttonBox->button(QDialogButtonBox::Close)) {
ResetInstallFolders();
}
if (Common::Log::IsActive()) {
Common::Log::Filter filter;
@ -119,35 +125,6 @@ SettingsDialog::SettingsDialog(std::span<const QString> physical_devices, QWidge
// GENERAL TAB
{
connect(ui->userNameLineEdit, &QLineEdit::textChanged, this,
[](const QString& text) { Config::setUserName(text.toStdString()); });
connect(ui->consoleLanguageComboBox, QOverload<int>::of(&QComboBox::currentIndexChanged),
this, [](int index) {
if (index >= 0 && index < languageIndexes.size()) {
int languageCode = languageIndexes[index];
Config::setLanguage(languageCode);
}
});
connect(ui->fullscreenCheckBox, &QCheckBox::stateChanged, this,
[](int val) { Config::setFullscreenMode(val); });
connect(ui->separateUpdatesCheckBox, &QCheckBox::stateChanged, this,
[](int val) { Config::setSeparateUpdateEnabled(val); });
connect(ui->showSplashCheckBox, &QCheckBox::stateChanged, this,
[](int val) { Config::setShowSplash(val); });
connect(ui->ps4proCheckBox, &QCheckBox::stateChanged, this,
[](int val) { Config::setNeoMode(val); });
connect(ui->logTypeComboBox, &QComboBox::currentTextChanged, this,
[](const QString& text) { Config::setLogType(text.toStdString()); });
connect(ui->logFilterLineEdit, &QLineEdit::textChanged, this,
[](const QString& text) { Config::setLogFilter(text.toStdString()); });
#ifdef ENABLE_UPDATER
connect(ui->updateCheckBox, &QCheckBox::stateChanged, this,
[](int state) { Config::setAutoUpdate(state == Qt::Checked); });
@ -163,74 +140,12 @@ SettingsDialog::SettingsDialog(std::span<const QString> physical_devices, QWidge
ui->updaterGroupBox->setVisible(false);
ui->GUIgroupBox->setMaximumSize(265, 16777215);
#endif
connect(ui->playBGMCheckBox, &QCheckBox::stateChanged, this, [](int val) {
Config::setPlayBGM(val);
if (val == Qt::Unchecked) {
BackgroundMusicPlayer::getInstance().stopMusic();
}
});
connect(ui->BGMVolumeSlider, &QSlider::valueChanged, this, [](float val) {
Config::setBGMvolume(val);
BackgroundMusicPlayer::getInstance().setVolume(val);
});
#ifdef ENABLE_DISCORD_RPC
connect(ui->discordRPCCheckbox, &QCheckBox::stateChanged, this, [](int val) {
Config::setEnableDiscordRPC(val);
auto* rpc = Common::Singleton<DiscordRPCHandler::RPC>::Instance();
if (val == Qt::Checked) {
rpc->init();
rpc->setStatusIdling();
} else {
rpc->shutdown();
}
});
#endif
}
// Input TAB
{
connect(ui->hideCursorComboBox, QOverload<int>::of(&QComboBox::currentIndexChanged), this,
[this](s16 index) {
Config::setCursorState(index);
OnCursorStateChanged(index);
});
connect(ui->idleTimeoutSpinBox, &QSpinBox::valueChanged, this,
[](int index) { Config::setCursorHideTimeout(index); });
connect(ui->backButtonBehaviorComboBox, QOverload<int>::of(&QComboBox::currentIndexChanged),
this, [this](int index) {
if (index >= 0 && index < ui->backButtonBehaviorComboBox->count()) {
QString data = ui->backButtonBehaviorComboBox->itemData(index).toString();
Config::setBackButtonBehavior(data.toStdString());
}
});
}
// GPU TAB
{
// First options is auto selection -1, so gpuId on the GUI will always have to subtract 1
// when setting and add 1 when getting to select the correct gpu in Qt
connect(ui->graphicsAdapterBox, &QComboBox::currentIndexChanged, this,
[](int index) { Config::setGpuId(index - 1); });
connect(ui->widthSpinBox, &QSpinBox::valueChanged, this,
[](int val) { Config::setScreenWidth(val); });
connect(ui->heightSpinBox, &QSpinBox::valueChanged, this,
[](int val) { Config::setScreenHeight(val); });
connect(ui->vblankSpinBox, &QSpinBox::valueChanged, this,
[](int val) { Config::setVblankDiv(val); });
connect(ui->dumpShadersCheckBox, &QCheckBox::stateChanged, this,
[](int val) { Config::setDumpShaders(val); });
connect(ui->nullGpuCheckBox, &QCheckBox::stateChanged, this,
[](int val) { Config::setNullGpu(val); });
[this](s16 index) { OnCursorStateChanged(index); });
}
// PATH TAB
@ -262,21 +177,6 @@ SettingsDialog::SettingsDialog(std::span<const QString> physical_devices, QWidge
});
}
// DEBUG TAB
{
connect(ui->debugDump, &QCheckBox::stateChanged, this,
[](int val) { Config::setDebugDump(val); });
connect(ui->vkValidationCheckBox, &QCheckBox::stateChanged, this,
[](int val) { Config::setVkValidation(val); });
connect(ui->vkSyncValidationCheckBox, &QCheckBox::stateChanged, this,
[](int val) { Config::setVkSyncValidation(val); });
connect(ui->rdocCheckBox, &QCheckBox::stateChanged, this,
[](int val) { Config::setRdocEnabled(val); });
}
// Descriptions
{
// General
@ -323,40 +223,69 @@ SettingsDialog::SettingsDialog(std::span<const QString> physical_devices, QWidge
}
void SettingsDialog::LoadValuesFromConfig() {
ui->consoleLanguageComboBox->setCurrentIndex(
std::distance(
languageIndexes.begin(),
std::find(languageIndexes.begin(), languageIndexes.end(), Config::GetLanguage())) %
languageIndexes.size());
ui->emulatorLanguageComboBox->setCurrentIndex(languages[Config::getEmulatorLanguage()]);
ui->hideCursorComboBox->setCurrentIndex(Config::getCursorState());
OnCursorStateChanged(Config::getCursorState());
ui->idleTimeoutSpinBox->setValue(Config::getCursorHideTimeout());
ui->graphicsAdapterBox->setCurrentIndex(Config::getGpuId() + 1);
ui->widthSpinBox->setValue(Config::getScreenWidth());
ui->heightSpinBox->setValue(Config::getScreenHeight());
ui->vblankSpinBox->setValue(Config::vblankDiv());
ui->dumpShadersCheckBox->setChecked(Config::dumpShaders());
ui->nullGpuCheckBox->setChecked(Config::nullGpu());
ui->playBGMCheckBox->setChecked(Config::getPlayBGM());
ui->BGMVolumeSlider->setValue((Config::getBGMvolume()));
ui->discordRPCCheckbox->setChecked(Config::getEnableDiscordRPC());
ui->fullscreenCheckBox->setChecked(Config::isFullscreenMode());
ui->separateUpdatesCheckBox->setChecked(Config::getSeparateUpdateEnabled());
ui->showSplashCheckBox->setChecked(Config::showSplash());
ui->ps4proCheckBox->setChecked(Config::isNeoMode());
ui->logTypeComboBox->setCurrentText(QString::fromStdString(Config::getLogType()));
ui->logFilterLineEdit->setText(QString::fromStdString(Config::getLogFilter()));
ui->userNameLineEdit->setText(QString::fromStdString(Config::getUserName()));
ui->debugDump->setChecked(Config::debugDump());
ui->vkValidationCheckBox->setChecked(Config::vkValidationEnabled());
ui->vkSyncValidationCheckBox->setChecked(Config::vkValidationSyncEnabled());
ui->rdocCheckBox->setChecked(Config::isRdocEnabled());
std::filesystem::path userdir = Common::FS::GetUserPath(Common::FS::PathType::UserDir);
std::error_code error;
if (!std::filesystem::exists(userdir / "config.toml", error)) {
Config::load(userdir / "config.toml");
return;
}
try {
std::ifstream ifs;
ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit);
const toml::value data = toml::parse(userdir / "config.toml");
} catch (std::exception& ex) {
fmt::print("Got exception trying to load config file. Exception: {}\n", ex.what());
return;
}
const toml::value data = toml::parse(userdir / "config.toml");
const QVector<int> languageIndexes = {21, 23, 14, 6, 18, 1, 12, 22, 2, 4, 25, 24, 29, 5, 0, 9,
15, 16, 17, 7, 26, 8, 11, 20, 3, 13, 27, 10, 19, 30, 28};
ui->consoleLanguageComboBox->setCurrentIndex(
std::distance(languageIndexes.begin(),
std::find(languageIndexes.begin(), languageIndexes.end(),
toml::find_or<int>(data, "Settings", "consoleLanguage", 6))) %
languageIndexes.size());
ui->emulatorLanguageComboBox->setCurrentIndex(
languages[toml::find_or<std::string>(data, "GUI", "emulatorLanguage", "en")]);
ui->hideCursorComboBox->setCurrentIndex(toml::find_or<int>(data, "Input", "cursorState", 1));
OnCursorStateChanged(toml::find_or<int>(data, "Input", "cursorState", 1));
ui->idleTimeoutSpinBox->setValue(toml::find_or<int>(data, "Input", "cursorHideTimeout", 5));
// First options is auto selection -1, so gpuId on the GUI will always have to subtract 1
// when setting and add 1 when getting to select the correct gpu in Qt
ui->graphicsAdapterBox->setCurrentIndex(toml::find_or<int>(data, "Vulkan", "gpuId", -1) + 1);
ui->widthSpinBox->setValue(toml::find_or<int>(data, "GPU", "screenWidth", 1280));
ui->heightSpinBox->setValue(toml::find_or<int>(data, "GPU", "screenHeight", 720));
ui->vblankSpinBox->setValue(toml::find_or<int>(data, "GPU", "vblankDivider", 1));
ui->dumpShadersCheckBox->setChecked(toml::find_or<bool>(data, "GPU", "dumpShaders", false));
ui->nullGpuCheckBox->setChecked(toml::find_or<bool>(data, "GPU", "nullGpu", false));
ui->playBGMCheckBox->setChecked(toml::find_or<bool>(data, "General", "playBGM", false));
ui->BGMVolumeSlider->setValue(toml::find_or<int>(data, "General", "BGMvolume", 50));
ui->discordRPCCheckbox->setChecked(
toml::find_or<bool>(data, "General", "enableDiscordRPC", true));
ui->fullscreenCheckBox->setChecked(toml::find_or<bool>(data, "General", "Fullscreen", false));
ui->separateUpdatesCheckBox->setChecked(
toml::find_or<bool>(data, "General", "separateUpdateEnabled", false));
ui->showSplashCheckBox->setChecked(toml::find_or<bool>(data, "General", "showSplash", false));
ui->ps4proCheckBox->setChecked(toml::find_or<bool>(data, "General", "isPS4Pro", false));
ui->logTypeComboBox->setCurrentText(
QString::fromStdString(toml::find_or<std::string>(data, "General", "logType", "async")));
ui->logFilterLineEdit->setText(
QString::fromStdString(toml::find_or<std::string>(data, "General", "logFilter", "")));
ui->userNameLineEdit->setText(
QString::fromStdString(toml::find_or<std::string>(data, "General", "userName", "shadPS4")));
ui->debugDump->setChecked(toml::find_or<bool>(data, "Debug", "DebugDump", false));
ui->vkValidationCheckBox->setChecked(toml::find_or<bool>(data, "Vulkan", "validation", false));
ui->vkSyncValidationCheckBox->setChecked(
toml::find_or<bool>(data, "Vulkan", "validation_sync", false));
ui->rdocCheckBox->setChecked(toml::find_or<bool>(data, "Vulkan", "rdocEnable", false));
#ifdef ENABLE_UPDATER
ui->updateCheckBox->setChecked(Config::autoUpdate());
std::string updateChannel = Config::getUpdateChannel();
ui->updateCheckBox->setChecked(toml::find_or<bool>(data, "General", "autoUpdate", false));
std::string updateChannel = toml::find_or<std::string>(data, "General", "updateChannel", "");
if (updateChannel != "Release" && updateChannel != "Nightly") {
if (Common::isRelease) {
updateChannel = "Release";
@ -367,18 +296,13 @@ void SettingsDialog::LoadValuesFromConfig() {
ui->updateComboBox->setCurrentText(QString::fromStdString(updateChannel));
#endif
for (const auto& dir : Config::getGameInstallDirs()) {
QString path_string;
Common::FS::PathToQString(path_string, dir);
QListWidgetItem* item = new QListWidgetItem(path_string);
ui->gameFoldersListWidget->addItem(item);
}
QString backButtonBehavior = QString::fromStdString(Config::getBackButtonBehavior());
QString backButtonBehavior = QString::fromStdString(
toml::find_or<std::string>(data, "Input", "backButtonBehavior", "left"));
int index = ui->backButtonBehaviorComboBox->findData(backButtonBehavior);
ui->backButtonBehaviorComboBox->setCurrentIndex(index != -1 ? index : 0);
ui->removeFolderButton->setEnabled(!ui->gameFoldersListWidget->selectedItems().isEmpty());
ResetInstallFolders();
}
void SettingsDialog::InitializeEmulatorLanguages() {
@ -554,3 +478,75 @@ bool SettingsDialog::eventFilter(QObject* obj, QEvent* event) {
}
return QDialog::eventFilter(obj, event);
}
void SettingsDialog::UpdateSettings() {
const QVector<std::string> TouchPadIndex = {"left", "center", "right", "none"};
Config::setBackButtonBehavior(TouchPadIndex[ui->backButtonBehaviorComboBox->currentIndex()]);
Config::setNeoMode(ui->ps4proCheckBox->isChecked());
Config::setFullscreenMode(ui->fullscreenCheckBox->isChecked());
Config::setPlayBGM(ui->playBGMCheckBox->isChecked());
Config::setNeoMode(ui->ps4proCheckBox->isChecked());
Config::setLogType(ui->logTypeComboBox->currentText().toStdString());
Config::setLogFilter(ui->logFilterLineEdit->text().toStdString());
Config::setUserName(ui->userNameLineEdit->text().toStdString());
Config::setCursorState(ui->hideCursorComboBox->currentIndex());
Config::setCursorHideTimeout(ui->idleTimeoutSpinBox->value());
Config::setGpuId(ui->graphicsAdapterBox->currentIndex() - 1);
Config::setBGMvolume(ui->BGMVolumeSlider->value());
Config::setLanguage(languageIndexes[ui->consoleLanguageComboBox->currentIndex()]);
Config::setEnableDiscordRPC(ui->discordRPCCheckbox->isChecked());
Config::setScreenWidth(ui->widthSpinBox->value());
Config::setScreenHeight(ui->heightSpinBox->value());
Config::setVblankDiv(ui->vblankSpinBox->value());
Config::setDumpShaders(ui->dumpShadersCheckBox->isChecked());
Config::setNullGpu(ui->nullGpuCheckBox->isChecked());
Config::setSeparateUpdateEnabled(ui->separateUpdatesCheckBox->isChecked());
Config::setShowSplash(ui->showSplashCheckBox->isChecked());
Config::setDebugDump(ui->debugDump->isChecked());
Config::setVkValidation(ui->vkValidationCheckBox->isChecked());
Config::setVkSyncValidation(ui->vkSyncValidationCheckBox->isChecked());
Config::setRdocEnabled(ui->rdocCheckBox->isChecked());
Config::setAutoUpdate(ui->updateCheckBox->isChecked());
Config::setUpdateChannel(ui->updateComboBox->currentText().toStdString());
#ifdef ENABLE_DISCORD_RPC
auto* rpc = Common::Singleton<DiscordRPCHandler::RPC>::Instance();
if (Config::getEnableDiscordRPC()) {
rpc->init();
rpc->setStatusIdling();
} else {
rpc->shutdown();
}
#endif
BackgroundMusicPlayer::getInstance().setVolume(ui->BGMVolumeSlider->value());
}
void SettingsDialog::ResetInstallFolders() {
std::filesystem::path userdir = Common::FS::GetUserPath(Common::FS::PathType::UserDir);
const toml::value data = toml::parse(userdir / "config.toml");
if (data.contains("GUI")) {
const toml::value& gui = data.at("GUI");
const auto install_dir_array =
toml::find_or<std::vector<std::string>>(gui, "installDirs", {});
std::vector<std::filesystem::path> settings_install_dirs_config = {};
for (const auto& dir : install_dir_array) {
if (std::find(settings_install_dirs_config.begin(), settings_install_dirs_config.end(),
dir) == settings_install_dirs_config.end()) {
settings_install_dirs_config.push_back(dir);
}
}
for (const auto& dir : settings_install_dirs_config) {
QString path_string;
Common::FS::PathToQString(path_string, dir);
QListWidgetItem* item = new QListWidgetItem(path_string);
ui->gameFoldersListWidget->addItem(item);
}
Config::setGameInstallDirs(settings_install_dirs_config);
}
}

View File

@ -31,6 +31,8 @@ signals:
private:
void LoadValuesFromConfig();
void UpdateSettings();
void ResetInstallFolders();
void InitializeEmulatorLanguages();
void OnLanguageChanged(int index);
void OnCursorStateChanged(s16 index);

View File

@ -1159,7 +1159,7 @@
<message>
<location filename="../settings_dialog.cpp" line="293"/>
<source>separateUpdatesCheckBox</source>
<translation>Enable Separate Update Folder:\nEnables installing game updates into a separate folder for easy management.</translation>
<translation>Enable Separate Update Folder:\nEnables installing game updates into a separate folder for easy management.\nThis can be manually created by adding the extracted update to the game folder with the name "CUSA00000-UPDATE" where the CUSA ID matches the game's ID.</translation>
</message>
<message>
<location filename="../settings_dialog.cpp" line="295"/>

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <SDL3/SDL_events.h>
#include <SDL3/SDL_hints.h>
#include <SDL3/SDL_init.h>
#include <SDL3/SDL_properties.h>
#include <SDL3/SDL_timer.h>
@ -68,6 +69,9 @@ static Uint32 SDLCALL PollController(void* userdata, SDL_TimerID timer_id, Uint3
WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_,
std::string_view window_title)
: width{width_}, height{height_}, controller{controller_} {
if (!SDL_SetHint(SDL_HINT_APP_NAME, "shadPS4")) {
UNREACHABLE_MSG("Failed to set SDL window hint: {}", SDL_GetError());
}
if (!SDL_Init(SDL_INIT_VIDEO)) {
UNREACHABLE_MSG("Failed to initialize SDL video subsystem: {}", SDL_GetError());
}

View File

@ -1,6 +1,5 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <span>
#include <type_traits>
#include <utility>
@ -13,6 +12,7 @@
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/types.h"
namespace Shader::Backend::SPIRV {
@ -72,7 +72,10 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
return arg.VectorReg();
} else if constexpr (std::is_same_v<ArgType, const char*>) {
return arg.StringLiteral();
} else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
return arg.Patch();
}
UNREACHABLE();
}
template <auto func, bool is_first_arg_inst, size_t... I>
@ -206,6 +209,32 @@ Id DefineMain(EmitContext& ctx, const IR::Program& program) {
return main;
}
spv::ExecutionMode ExecutionMode(AmdGpu::TessellationType primitive) {
switch (primitive) {
case AmdGpu::TessellationType::Isoline:
return spv::ExecutionMode::Isolines;
case AmdGpu::TessellationType::Triangle:
return spv::ExecutionMode::Triangles;
case AmdGpu::TessellationType::Quad:
return spv::ExecutionMode::Quads;
}
UNREACHABLE_MSG("Tessellation primitive {}", primitive);
}
spv::ExecutionMode ExecutionMode(AmdGpu::TessellationPartitioning spacing) {
switch (spacing) {
case AmdGpu::TessellationPartitioning::Integer:
return spv::ExecutionMode::SpacingEqual;
case AmdGpu::TessellationPartitioning::FracOdd:
return spv::ExecutionMode::SpacingFractionalOdd;
case AmdGpu::TessellationPartitioning::FracEven:
return spv::ExecutionMode::SpacingFractionalEven;
default:
break;
}
UNREACHABLE_MSG("Tessellation spacing {}", spacing);
}
void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ctx) {
ctx.AddCapability(spv::Capability::Image1D);
ctx.AddCapability(spv::Capability::Sampled1D);
@ -222,6 +251,10 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
ctx.AddCapability(spv::Capability::StorageImageExtendedFormats);
ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat);
ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
if (profile.supports_image_load_store_lod) {
ctx.AddExtension("SPV_AMD_shader_image_load_store_lod");
ctx.AddCapability(spv::Capability::ImageReadWriteLodAMD);
}
}
if (info.has_texel_buffers) {
ctx.AddCapability(spv::Capability::SampledBuffer);
@ -244,36 +277,55 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
if (info.uses_group_ballot) {
ctx.AddCapability(spv::Capability::GroupNonUniformBallot);
}
if (info.stage == Stage::Export || info.stage == Stage::Vertex) {
const auto stage = info.l_stage;
if (stage == LogicalStage::Vertex) {
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
ctx.AddCapability(spv::Capability::DrawParameters);
}
if (info.stage == Stage::Geometry) {
if (stage == LogicalStage::Geometry) {
ctx.AddCapability(spv::Capability::Geometry);
}
if (info.stage == Stage::Fragment && profile.needs_manual_interpolation) {
ctx.AddExtension("SPV_KHR_fragment_shader_barycentric");
ctx.AddCapability(spv::Capability::FragmentBarycentricKHR);
}
if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) {
ctx.AddCapability(spv::Capability::Tessellation);
}
}
void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
const auto& info = program.info;
void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
spv::ExecutionModel execution_model{};
switch (program.info.stage) {
case Stage::Compute: {
switch (info.l_stage) {
case LogicalStage::Compute: {
const std::array<u32, 3> workgroup_size{ctx.runtime_info.cs_info.workgroup_size};
execution_model = spv::ExecutionModel::GLCompute;
ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
workgroup_size[1], workgroup_size[2]);
break;
}
case Stage::Export:
case Stage::Vertex:
case LogicalStage::Vertex:
execution_model = spv::ExecutionModel::Vertex;
break;
case Stage::Fragment:
case LogicalStage::TessellationControl:
execution_model = spv::ExecutionModel::TessellationControl;
ctx.AddCapability(spv::Capability::Tessellation);
ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
ctx.runtime_info.hs_info.NumOutputControlPoints());
break;
case LogicalStage::TessellationEval: {
execution_model = spv::ExecutionModel::TessellationEvaluation;
const auto& vs_info = ctx.runtime_info.vs_info;
ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_type));
ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_partitioning));
ctx.AddExecutionMode(main,
vs_info.tess_topology == AmdGpu::TessellationTopology::TriangleCcw
? spv::ExecutionMode::VertexOrderCcw
: spv::ExecutionMode::VertexOrderCw);
break;
}
case LogicalStage::Fragment:
execution_model = spv::ExecutionModel::Fragment;
if (ctx.profile.lower_left_origin_mode) {
ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft);
@ -288,7 +340,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
}
break;
case Stage::Geometry:
case LogicalStage::Geometry:
execution_model = spv::ExecutionModel::Geometry;
ctx.AddExecutionMode(main, GetInputPrimitiveType(ctx.runtime_info.gs_info.in_primitive));
ctx.AddExecutionMode(main,
@ -299,7 +351,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
ctx.runtime_info.gs_info.num_invocations);
break;
default:
throw NotImplementedException("Stage {}", u32(program.info.stage));
UNREACHABLE_MSG("Stage {}", u32(info.stage));
}
ctx.AddEntryPoint(execution_model, main, "main", interfaces);
}
@ -345,7 +397,7 @@ std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in
const IR::Program& program, Bindings& binding) {
EmitContext ctx{profile, runtime_info, program.info, binding};
const Id main{DefineMain(ctx, program)};
DefineEntryPoint(program, ctx, main);
DefineEntryPoint(program.info, ctx, main);
SetupCapabilities(program.info, profile, ctx);
SetupFloatMode(ctx, profile, runtime_info, main);
PatchPhiNodes(program, ctx);

View File

@ -18,9 +18,16 @@ void MemoryBarrier(EmitContext& ctx, spv::Scope scope) {
void EmitBarrier(EmitContext& ctx) {
const auto execution{spv::Scope::Workgroup};
const auto memory{spv::Scope::Workgroup};
const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease |
spv::MemorySemanticsMask::WorkgroupMemory};
spv::Scope memory;
spv::MemorySemanticsMask memory_semantics;
if (ctx.l_stage == Shader::LogicalStage::TessellationControl) {
memory = spv::Scope::Invocation;
memory_semantics = spv::MemorySemanticsMask::MaskNone;
} else {
memory = spv::Scope::Workgroup;
memory_semantics =
spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::WorkgroupMemory;
}
ctx.OpControlBarrier(ctx.ConstU32(static_cast<u32>(execution)),
ctx.ConstU32(static_cast<u32>(memory)),
ctx.ConstU32(static_cast<u32>(memory_semantics)));

View File

@ -4,6 +4,9 @@
#include "common/assert.h"
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/patch.h"
#include "shader_recompiler/runtime_info.h"
#include <magic_enum/magic_enum.hpp>
@ -45,13 +48,19 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& info{ctx.output_params.at(index)};
ASSERT(info.num_components > 0);
if (info.num_components == 1) {
return info.id;
const u32 attr_index{u32(attr) - u32(IR::Attribute::Param0)};
if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) {
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]);
return ctx.OpAccessChain(component_ptr, ctx.output_attr_array, ctx.ConstU32(attr_index),
ctx.ConstU32(element));
} else {
return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element));
const auto& info{ctx.output_params.at(attr_index)};
ASSERT(info.num_components > 0);
if (info.num_components == 1) {
return info.id;
} else {
return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element));
}
}
}
if (IR::IsMrt(attr)) {
@ -82,9 +91,13 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr) {
if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& info{ctx.output_params.at(index)};
return {info.component_type, info.is_integer};
if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) {
return {ctx.F32[1], false};
} else {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& info{ctx.output_params.at(index)};
return {info.component_type, info.is_integer};
}
}
if (IR::IsMrt(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::RenderTarget0)};
@ -171,12 +184,11 @@ Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
}
Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
if (IR::IsPosition(attr)) {
ASSERT(attr == IR::Attribute::Position0);
const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{
ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index), ctx.ConstU32(0u))};
const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, index, ctx.ConstU32(0u))};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
@ -186,7 +198,7 @@ Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
const auto param = ctx.input_params.at(param_id).id;
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))};
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
@ -194,9 +206,27 @@ Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u
UNREACHABLE();
}
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
if (ctx.info.stage == Stage::Geometry) {
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
if (ctx.info.l_stage == LogicalStage::Geometry) {
return EmitGetAttributeForGeometry(ctx, attr, comp, index);
} else if (ctx.info.l_stage == LogicalStage::TessellationControl ||
ctx.info.l_stage == LogicalStage::TessellationEval) {
if (IR::IsTessCoord(attr)) {
const u32 component = attr == IR::Attribute::TessellationEvaluationPointU ? 0 : 1;
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
const auto pointer{
ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))};
return ctx.OpLoad(ctx.F32[1], pointer);
} else if (IR::IsParam(attr)) {
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
const auto param = ctx.input_params.at(param_id).id;
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
}
UNREACHABLE();
}
if (IR::IsParam(attr)) {
@ -242,8 +272,14 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
}
return coord;
}
case IR::Attribute::TessellationEvaluationPointU:
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value));
case IR::Attribute::TessellationEvaluationPointV:
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.ConstU32(1U)));
default:
throw NotImplementedException("Read attribute {}", attr);
UNREACHABLE_MSG("Read attribute {}", attr);
}
}
@ -266,10 +302,32 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value,
ctx.u32_zero_value);
case IR::Attribute::PrimitiveId:
ASSERT(ctx.info.stage == Stage::Geometry);
return ctx.OpLoad(ctx.U32[1], ctx.primitive_id);
case IR::Attribute::InvocationId:
ASSERT(ctx.info.l_stage == LogicalStage::Geometry ||
ctx.info.l_stage == LogicalStage::TessellationControl);
return ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
case IR::Attribute::PatchVertices:
ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl);
return ctx.OpLoad(ctx.U32[1], ctx.patch_vertices);
case IR::Attribute::PackedHullInvocationInfo: {
ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl);
// [0:8]: patch id within VGT
// [8:12]: output control point id
// But 0:8 should be treated as 0 for attribute addressing purposes
if (ctx.runtime_info.hs_info.IsPassthrough()) {
// Gcn shader would run with 1 thread, but we need to run a thread for
// each output control point.
// If Gcn shader uses this value, we should make sure all threads in the
// Vulkan shader use 0
return ctx.ConstU32(0u);
} else {
const Id invocation_id = ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
return ctx.OpShiftLeftLogical(ctx.U32[1], invocation_id, ctx.ConstU32(8u));
}
}
default:
throw NotImplementedException("Read U32 attribute {}", attr);
UNREACHABLE_MSG("Read U32 attribute {}", attr);
}
}
@ -287,6 +345,58 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen
}
}
Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index) {
const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.input_attr_array,
vertex_index, attr_index, comp_index));
}
void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index) {
// Implied vertex index is invocation_id
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]);
Id pointer =
ctx.OpAccessChain(component_ptr, ctx.output_attr_array,
ctx.OpLoad(ctx.U32[1], ctx.invocation_id), attr_index, comp_index);
ctx.OpStore(pointer, value);
}
Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) {
const u32 index{IR::GenericPatchIndex(patch)};
const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))};
const Id type{ctx.l_stage == LogicalStage::TessellationControl ? ctx.output_f32
: ctx.input_f32};
const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)};
return ctx.OpLoad(ctx.F32[1], pointer);
}
void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
const Id pointer{[&] {
if (IR::IsGeneric(patch)) {
const u32 index{IR::GenericPatchIndex(patch)};
const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))};
return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element);
}
switch (patch) {
case IR::Patch::TessellationLodLeft:
case IR::Patch::TessellationLodRight:
case IR::Patch::TessellationLodTop:
case IR::Patch::TessellationLodBottom: {
const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
const Id index_id{ctx.ConstU32(index)};
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id);
}
case IR::Patch::TessellationLodInteriorU:
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner,
ctx.u32_zero_value);
case IR::Patch::TessellationLodInteriorV:
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.ConstU32(1u));
default:
UNREACHABLE_MSG("Patch {}", u32(patch));
}
}()};
ctx.OpStore(pointer, value);
}
template <u32 N>
static Id EmitLoadBufferU32xN(EmitContext& ctx, u32 handle, Id address) {
auto& buffer = ctx.buffers[handle];

View File

@ -87,6 +87,14 @@ Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b));
}
Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return Decorate(ctx, inst, ctx.OpFDiv(ctx.F32[1], a, b));
}
Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return Decorate(ctx, inst, ctx.OpFDiv(ctx.F64[1], a, b));
}
Id EmitFPNeg16(EmitContext& ctx, Id value) {
return ctx.OpFNegate(ctx.F16[1], value);
}
@ -217,10 +225,34 @@ Id EmitFPTrunc64(EmitContext& ctx, Id value) {
return ctx.OpTrunc(ctx.F64[1], value);
}
Id EmitFPFract(EmitContext& ctx, Id value) {
Id EmitFPFract32(EmitContext& ctx, Id value) {
return ctx.OpFract(ctx.F32[1], value);
}
Id EmitFPFract64(EmitContext& ctx, Id value) {
return ctx.OpFract(ctx.F64[1], value);
}
Id EmitFPFrexpSig32(EmitContext& ctx, Id value) {
const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f32, value);
return ctx.OpCompositeExtract(ctx.F32[1], frexp, 0);
}
Id EmitFPFrexpSig64(EmitContext& ctx, Id value) {
const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f64, value);
return ctx.OpCompositeExtract(ctx.F64[1], frexp, 0);
}
Id EmitFPFrexpExp32(EmitContext& ctx, Id value) {
const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f32, value);
return ctx.OpCompositeExtract(ctx.U32[1], frexp, 1);
}
Id EmitFPFrexpExp64(EmitContext& ctx, Id value) {
const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f64, value);
return ctx.OpCompositeExtract(ctx.U32[1], frexp, 1);
}
Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpFOrdEqual(ctx.U1[1], lhs, rhs);
}

View File

@ -130,8 +130,8 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle,
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
ImageOperands operands;
operands.AddOffset(ctx, offset);
operands.Add(spv::ImageOperandsMask::Lod, lod);
operands.AddOffset(ctx, offset);
const Id sample = ctx.OpImageSampleDrefExplicitLod(result_type, sampled_image, coords, dref,
operands.mask, operands.operands);
const Id sample_typed = texture.is_integer ? ctx.OpBitcast(ctx.F32[1], sample) : sample;
@ -168,8 +168,8 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], texels) : texels;
}
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset,
Id lod, Id ms) {
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod,
const IR::Value& offset, Id ms) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id result_type = texture.data_types->Get(4);
@ -236,15 +236,22 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id
return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], sample) : sample;
}
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id lod) {
UNREACHABLE_MSG("SPIR-V Instruction");
}
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color) {
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id color) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id color_type = texture.data_types->Get(4);
ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color));
ImageOperands operands;
if (ctx.profile.supports_image_load_store_lod) {
operands.Add(spv::ImageOperandsMask::Lod, lod);
} else if (Sirit::ValidId(lod)) {
LOG_WARNING(Render, "Image write with LOD not supported by driver");
}
ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color), operands.mask,
operands.operands);
}
} // namespace Shader::Backend::SPIRV

View File

@ -9,6 +9,7 @@
namespace Shader::IR {
enum class Attribute : u64;
enum class ScalarReg : u32;
enum class Patch : u64;
class Inst;
class Value;
} // namespace Shader::IR
@ -27,8 +28,6 @@ Id EmitConditionRef(EmitContext& ctx, const IR::Value& value);
void EmitReference(EmitContext&);
void EmitPhiMove(EmitContext&);
void EmitJoin(EmitContext& ctx);
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
void EmitDeviceMemoryBarrier(EmitContext& ctx);
void EmitGetScc(EmitContext& ctx);
void EmitGetExec(EmitContext& ctx);
void EmitGetVcc(EmitContext& ctx);
@ -85,9 +84,13 @@ Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index);
void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index);
Id EmitGetPatch(EmitContext& ctx, IR::Patch patch);
void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value);
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
void EmitSetSampleMask(EmitContext& ctx, Id value);
void EmitSetFragDepth(EmitContext& ctx, Id value);
@ -189,6 +192,8 @@ Id EmitFPMin64(EmitContext& ctx, Id a, Id b);
Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPNeg16(EmitContext& ctx, Id value);
Id EmitFPNeg32(EmitContext& ctx, Id value);
Id EmitFPNeg64(EmitContext& ctx, Id value);
@ -220,7 +225,12 @@ Id EmitFPCeil64(EmitContext& ctx, Id value);
Id EmitFPTrunc16(EmitContext& ctx, Id value);
Id EmitFPTrunc32(EmitContext& ctx, Id value);
Id EmitFPTrunc64(EmitContext& ctx, Id value);
Id EmitFPFract(EmitContext& ctx, Id value);
Id EmitFPFract32(EmitContext& ctx, Id value);
Id EmitFPFract64(EmitContext& ctx, Id value);
Id EmitFPFrexpSig32(EmitContext& ctx, Id value);
Id EmitFPFrexpSig64(EmitContext& ctx, Id value);
Id EmitFPFrexpExp32(EmitContext& ctx, Id value);
Id EmitFPFrexpExp64(EmitContext& ctx, Id value);
Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs);
@ -385,14 +395,14 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
const IR::Value& offset);
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
const IR::Value& offset, Id dref);
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset,
Id lod, Id ms);
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod,
const IR::Value& offset, Id ms);
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips);
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords);
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id derivatives_dx,
Id derivatives_dy, const IR::Value& offset, const IR::Value& lod_clamp);
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color);
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id lod);
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id color);
Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);

View File

@ -6,6 +6,7 @@
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/ir/passes/srt.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/types.h"
#include <boost/container/static_vector.hpp>
@ -34,7 +35,7 @@ std::string_view StageName(Stage stage) {
case Stage::Compute:
return "cs";
}
throw InvalidArgument("Invalid stage {}", u32(stage));
UNREACHABLE_MSG("Invalid hw stage {}", u32(stage));
}
static constexpr u32 NumVertices(AmdGpu::PrimitiveType type) {
@ -65,7 +66,7 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar
EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
const Info& info_, Bindings& binding_)
: Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
profile{profile_}, stage{info.stage}, binding{binding_} {
profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} {
AddCapability(spv::Capability::Shader);
DefineArithmeticTypes();
DefineInterfaces();
@ -147,6 +148,10 @@ void EmitContext::DefineArithmeticTypes() {
full_result_i32x2 = Name(TypeStruct(S32[1], S32[1]), "full_result_i32x2");
full_result_u32x2 = Name(TypeStruct(U32[1], U32[1]), "full_result_u32x2");
frexp_result_f32 = Name(TypeStruct(F32[1], U32[1]), "frexp_result_f32");
if (info.uses_fp64) {
frexp_result_f64 = Name(TypeStruct(F64[1], U32[1]), "frexp_result_f64");
}
}
void EmitContext::DefineInterfaces() {
@ -264,9 +269,8 @@ void EmitContext::DefineInputs() {
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
}
switch (stage) {
case Stage::Export:
case Stage::Vertex: {
switch (l_stage) {
case LogicalStage::Vertex: {
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
@ -290,7 +294,7 @@ void EmitContext::DefineInputs() {
});
// Note that we pass index rather than Id
input_params[attrib.semantic] = SpirvAttribute{
.id = rate_idx,
.id = {rate_idx},
.pointer_type = input_u32,
.component_type = U32[1],
.num_components = std::min<u16>(attrib.num_elements, num_components),
@ -307,12 +311,11 @@ void EmitContext::DefineInputs() {
}
input_params[attrib.semantic] =
GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false);
interfaces.push_back(id);
}
}
break;
}
case Stage::Fragment:
case LogicalStage::Fragment:
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
@ -347,15 +350,14 @@ void EmitContext::DefineInputs() {
}
input_params[semantic] =
GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false);
interfaces.push_back(attr_id);
}
break;
case Stage::Compute:
case LogicalStage::Compute:
workgroup_id = DefineVariable(U32[3], spv::BuiltIn::WorkgroupId, spv::StorageClass::Input);
local_invocation_id =
DefineVariable(U32[3], spv::BuiltIn::LocalInvocationId, spv::StorageClass::Input);
break;
case Stage::Geometry: {
case LogicalStage::Geometry: {
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
const auto gl_per_vertex =
Name(TypeStruct(TypeVector(F32[1], 4), F32[1], TypeArray(F32[1], ConstU32(1u))),
@ -379,9 +381,50 @@ void EmitContext::DefineInputs() {
for (int param_id = 0; param_id < num_params; ++param_id) {
const Id type{TypeArray(F32[4], ConstU32(num_verts_in))};
const Id id{DefineInput(type, param_id)};
Name(id, fmt::format("in_attr{}", param_id));
Name(id, fmt::format("gs_in_attr{}", param_id));
input_params[param_id] = {id, input_f32, F32[1], 4};
interfaces.push_back(id);
}
break;
}
case LogicalStage::TessellationControl: {
invocation_id =
DefineVariable(U32[1], spv::BuiltIn::InvocationId, spv::StorageClass::Input);
patch_vertices =
DefineVariable(U32[1], spv::BuiltIn::PatchVertices, spv::StorageClass::Input);
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
const u32 num_attrs = runtime_info.hs_info.ls_stride >> 4;
if (num_attrs > 0) {
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
// The input vertex count isn't statically known, so make length 32 (what glslang does)
const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))};
input_attr_array = DefineInput(patch_array_type, 0);
Name(input_attr_array, "in_attrs");
}
break;
}
case LogicalStage::TessellationEval: {
tess_coord = DefineInput(F32[3], std::nullopt, spv::BuiltIn::TessCoord);
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
const u32 num_attrs = runtime_info.vs_info.hs_output_cp_stride >> 4;
if (num_attrs > 0) {
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
// The input vertex count isn't statically known, so make length 32 (what glslang does)
const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))};
input_attr_array = DefineInput(patch_array_type, 0);
Name(input_attr_array, "in_attrs");
}
u32 patch_base_location = runtime_info.vs_info.hs_output_cp_stride >> 4;
for (size_t index = 0; index < 30; ++index) {
if (!(info.uses_patches & (1U << index))) {
continue;
}
const Id id{DefineInput(F32[4], patch_base_location + index)};
Decorate(id, spv::Decoration::Patch);
Name(id, fmt::format("patch_in{}", index));
patches[index] = id;
}
break;
}
@ -391,9 +434,81 @@ void EmitContext::DefineInputs() {
}
void EmitContext::DefineOutputs() {
switch (stage) {
case Stage::Export:
case Stage::Vertex: {
switch (l_stage) {
case LogicalStage::Vertex: {
// No point in defining builtin outputs (i.e. position) unless next stage is fragment?
// Might cause problems linking with tcs
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
info.stores.Get(IR::Attribute::Position2) ||
info.stores.Get(IR::Attribute::Position3);
if (has_extra_pos_stores) {
const Id type{TypeArray(F32[1], ConstU32(8U))};
clip_distances =
DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output);
cull_distances =
DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output);
}
if (stage == Shader::Stage::Local && runtime_info.ls_info.links_with_tcs) {
const u32 num_attrs = runtime_info.ls_info.ls_stride >> 4;
if (num_attrs > 0) {
const Id type{TypeArray(F32[4], ConstU32(num_attrs))};
output_attr_array = DefineOutput(type, 0);
Name(output_attr_array, "out_attrs");
}
} else {
for (u32 i = 0; i < IR::NumParams; i++) {
const IR::Attribute param{IR::Attribute::Param0 + i};
if (!info.stores.GetAny(param)) {
continue;
}
const u32 num_components = info.stores.NumComponents(param);
const Id id{DefineOutput(F32[num_components], i)};
Name(id, fmt::format("out_attr{}", i));
output_params[i] =
GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true);
}
}
break;
}
case LogicalStage::TessellationControl: {
if (info.stores_tess_level_outer) {
const Id type{TypeArray(F32[1], ConstU32(4U))};
output_tess_level_outer =
DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelOuter);
Decorate(output_tess_level_outer, spv::Decoration::Patch);
}
if (info.stores_tess_level_inner) {
const Id type{TypeArray(F32[1], ConstU32(2U))};
output_tess_level_inner =
DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelInner);
Decorate(output_tess_level_inner, spv::Decoration::Patch);
}
const u32 num_attrs = runtime_info.hs_info.hs_output_cp_stride >> 4;
if (num_attrs > 0) {
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
// The input vertex count isn't statically known, so make length 32 (what glslang does)
const Id patch_array_type{TypeArray(
per_vertex_type, ConstU32(runtime_info.hs_info.NumOutputControlPoints()))};
output_attr_array = DefineOutput(patch_array_type, 0);
Name(output_attr_array, "out_attrs");
}
u32 patch_base_location = runtime_info.hs_info.hs_output_cp_stride >> 4;
for (size_t index = 0; index < 30; ++index) {
if (!(info.uses_patches & (1U << index))) {
continue;
}
const Id id{DefineOutput(F32[4], patch_base_location + index)};
Decorate(id, spv::Decoration::Patch);
Name(id, fmt::format("patch_out{}", index));
patches[index] = id;
}
break;
}
case LogicalStage::TessellationEval: {
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
info.stores.Get(IR::Attribute::Position2) ||
@ -415,11 +530,10 @@ void EmitContext::DefineOutputs() {
Name(id, fmt::format("out_attr{}", i));
output_params[i] =
GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true);
interfaces.push_back(id);
}
break;
}
case Stage::Fragment:
case LogicalStage::Fragment:
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
if (!info.stores.GetAny(mrt)) {
@ -431,22 +545,22 @@ void EmitContext::DefineOutputs() {
const Id id{DefineOutput(type, i)};
Name(id, fmt::format("frag_color{}", i));
frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true);
interfaces.push_back(id);
}
break;
case Stage::Geometry: {
case LogicalStage::Geometry: {
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) {
const Id id{DefineOutput(F32[4], attr_id)};
Name(id, fmt::format("out_attr{}", attr_id));
output_params[attr_id] = {id, output_f32, F32[1], 4u};
interfaces.push_back(id);
}
break;
}
default:
case LogicalStage::Compute:
break;
default:
UNREACHABLE();
}
}
@ -582,6 +696,10 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
return spv::ImageFormat::R32ui;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Sint) {
return spv::ImageFormat::R32i;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
return spv::ImageFormat::R32f;

View File

@ -46,14 +46,18 @@ public:
void DefineBufferOffsets();
void DefineInterpolatedAttribs();
[[nodiscard]] Id DefineInput(Id type, u32 location) {
const Id input_id{DefineVar(type, spv::StorageClass::Input)};
Decorate(input_id, spv::Decoration::Location, location);
[[nodiscard]] Id DefineInput(Id type, std::optional<u32> location = std::nullopt,
std::optional<spv::BuiltIn> builtin = std::nullopt) {
const Id input_id{DefineVariable(type, builtin, spv::StorageClass::Input)};
if (location) {
Decorate(input_id, spv::Decoration::Location, *location);
}
return input_id;
}
[[nodiscard]] Id DefineOutput(Id type, std::optional<u32> location = std::nullopt) {
const Id output_id{DefineVar(type, spv::StorageClass::Output)};
[[nodiscard]] Id DefineOutput(Id type, std::optional<u32> location = std::nullopt,
std::optional<spv::BuiltIn> builtin = std::nullopt) {
const Id output_id{DefineVariable(type, builtin, spv::StorageClass::Output)};
if (location) {
Decorate(output_id, spv::Decoration::Location, *location);
}
@ -131,7 +135,8 @@ public:
const Info& info;
const RuntimeInfo& runtime_info;
const Profile& profile;
Stage stage{};
Stage stage;
LogicalStage l_stage{};
Id void_id{};
Id U8{};
@ -148,6 +153,8 @@ public:
Id full_result_i32x2;
Id full_result_u32x2;
Id frexp_result_f32;
Id frexp_result_f64;
Id pi_x2;
@ -186,8 +193,15 @@ public:
Id clip_distances{};
Id cull_distances{};
Id patch_vertices{};
Id output_tess_level_outer{};
Id output_tess_level_inner{};
Id tess_coord;
std::array<Id, 30> patches{};
Id workgroup_id{};
Id local_invocation_id{};
Id invocation_id{}; // for instanced geoshaders or output vertices within TCS patch
Id subgroup_local_invocation_id{};
Id image_u32{};
@ -250,6 +264,8 @@ public:
bool is_loaded{};
s32 buffer_handle{-1};
};
Id input_attr_array;
Id output_attr_array;
std::array<SpirvAttribute, IR::NumParams> input_params{};
std::array<SpirvAttribute, IR::NumParams> output_params{};
std::array<SpirvAttribute, IR::NumRenderTargets> frag_outputs{};

View File

@ -80,6 +80,8 @@ void CFG::EmitLabels() {
if (inst.IsUnconditionalBranch()) {
const u32 target = inst.BranchTarget(pc);
AddLabel(target);
// Emit this label so that the block ends with s_branch instruction
AddLabel(pc + inst.length);
} else if (inst.IsConditionalBranch()) {
const u32 true_label = inst.BranchTarget(pc);
const u32 false_label = pc + inst.length;

View File

@ -0,0 +1,38 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
namespace Shader {
struct TessellationDataConstantBuffer {
u32 ls_stride;
u32 hs_cp_stride; // HullStateConstants::m_cpStride != 0 ? HullStateConstants::m_cpStride :
// ls_stride
u32 num_patches; // num patches submitted in threadgroup
u32 hs_output_base; // HullStateConstants::m_numInputCP::m_cpStride != 0 ?
// HullStateConstants::m_numInputCP * ls_stride * num_patches : 0
// basically 0 when passthrough
u32 patch_const_size; // 16 * num_patch_attrs
u32 patch_const_base; // hs_output_base + patch_output_size
u32 patch_output_size; // output_cp_stride * num_output_cp_per_patch
f32 off_chip_tessellation_factor_threshold;
u32 first_edge_tess_factor_index;
};
// Assign names to dword fields of TessellationDataConstantBuffer
enum class TessConstantAttribute : u32 {
LsStride,
HsCpStride,
HsNumPatch,
HsOutputBase,
PatchConstSize,
PatchConstBase,
PatchOutputSize,
OffChipTessellationFactorThreshold,
FirstEdgeTessFactorIndex,
};
} // namespace Shader

View File

@ -1,8 +1,8 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/runtime_info.h"
namespace Shader::Gcn {
@ -73,10 +73,11 @@ void Translator::EmitDataShare(const GcnInst& inst) {
void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {
const IR::U32 value{GetSrc(inst.src[0])};
if (info.stage != Stage::Compute) {
SetDst(inst.dst[0], value);
} else {
if (info.l_stage == LogicalStage::Compute ||
info.l_stage == LogicalStage::TessellationControl) {
SetDst(inst.dst[0], ir.ReadFirstLane(value));
} else {
SetDst(inst.dst[0], value);
}
}

View File

@ -13,6 +13,11 @@ void Translator::EmitExport(const GcnInst& inst) {
const auto& exp = inst.control.exp;
const IR::Attribute attrib{exp.target};
if (attrib == IR::Attribute::Depth && exp.en != 0 && exp.en != 1) {
LOG_WARNING(Render_Vulkan, "Unsupported depth export");
return;
}
const std::array vsrc = {
IR::VectorReg(inst.src[0].code),
IR::VectorReg(inst.src[1].code),

View File

@ -1,6 +1,8 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <bit>
#include "common/assert.h"
#include "shader_recompiler/frontend/translate/translate.h"
namespace Shader::Gcn {
@ -78,8 +80,10 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
return S_BFM_B32(inst);
case Opcode::S_MUL_I32:
return S_MUL_I32(inst);
case Opcode::S_BFE_I32:
return S_BFE(inst, true);
case Opcode::S_BFE_U32:
return S_BFE_U32(inst);
return S_BFE(inst, false);
case Opcode::S_ABSDIFF_I32:
return S_ABSDIFF_I32(inst);
@ -94,8 +98,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
break;
case Opcode::S_BREV_B32:
return S_BREV_B32(inst);
case Opcode::S_BCNT1_I32_B64:
return S_BCNT1_I32_B64(inst);
case Opcode::S_BCNT1_I32_B32:
return S_BCNT1_I32_B32(inst);
case Opcode::S_FF1_I32_B32:
return S_FF1_I32_B32(inst);
case Opcode::S_AND_SAVEEXEC_B64:
@ -157,8 +161,9 @@ void Translator::EmitSOPK(const GcnInst& inst) {
switch (inst.opcode) {
// SOPK
case Opcode::S_MOVK_I32:
return S_MOVK(inst);
return S_MOVK(inst, false);
case Opcode::S_CMOVK_I32:
return S_MOVK(inst, true);
case Opcode::S_CMPK_EQ_I32:
return S_CMPK(ConditionOp::EQ, true, inst);
case Opcode::S_CMPK_LG_I32:
@ -434,12 +439,12 @@ void Translator::S_MUL_I32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.IMul(GetSrc(inst.src[0]), GetSrc(inst.src[1])));
}
void Translator::S_BFE_U32(const GcnInst& inst) {
void Translator::S_BFE(const GcnInst& inst, bool is_signed) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 offset{ir.BitwiseAnd(src1, ir.Imm32(0x1F))};
const IR::U32 count{ir.BitFieldExtract(src1, ir.Imm32(16), ir.Imm32(7))};
const IR::U32 result{ir.BitFieldExtract(src0, offset, count)};
const IR::U32 result{ir.BitFieldExtract(src0, offset, count, is_signed)};
SetDst(inst.dst[0], result);
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
}
@ -454,13 +459,16 @@ void Translator::S_ABSDIFF_I32(const GcnInst& inst) {
// SOPK
void Translator::S_MOVK(const GcnInst& inst) {
const auto simm16 = inst.control.sopk.simm;
if (simm16 & (1 << 15)) {
// TODO: need to verify the case of imm sign extension
UNREACHABLE();
void Translator::S_MOVK(const GcnInst& inst, bool is_conditional) {
const s16 simm16 = inst.control.sopk.simm;
// do the sign extension
const s32 simm32 = static_cast<s32>(simm16);
IR::U32 val = ir.Imm32(simm32);
if (is_conditional) {
// if !SCC its a NOP
val = IR::U32{ir.Select(ir.GetScc(), val, GetSrc(inst.dst[0]))};
}
SetDst(inst.dst[0], ir.Imm32(simm16));
SetDst(inst.dst[0], val);
}
void Translator::S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst) {
@ -571,7 +579,7 @@ void Translator::S_BREV_B32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.BitReverse(GetSrc(inst.src[0])));
}
void Translator::S_BCNT1_I32_B64(const GcnInst& inst) {
void Translator::S_BCNT1_I32_B32(const GcnInst& inst) {
const IR::U32 result = ir.BitCount(GetSrc(inst.src[0]));
SetDst(inst.dst[0], result);
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
@ -594,6 +602,8 @@ void Translator::S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& in
return ir.GetVcc();
case OperandField::ScalarGPR:
return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code));
case OperandField::ExecLo:
return ir.GetExec();
default:
UNREACHABLE();
}

View File

@ -8,6 +8,8 @@
#include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/info.h"
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/amdgpu/types.h"
@ -34,9 +36,8 @@ void Translator::EmitPrologue() {
}
IR::VectorReg dst_vreg = IR::VectorReg::V0;
switch (info.stage) {
case Stage::Vertex:
case Stage::Export:
switch (info.l_stage) {
case LogicalStage::Vertex:
// v0: vertex ID, always present
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
// v1: instance ID, step rate 0
@ -52,7 +53,7 @@ void Translator::EmitPrologue() {
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId));
}
break;
case Stage::Fragment:
case LogicalStage::Fragment:
dst_vreg = IR::VectorReg::V0;
if (runtime_info.fs_info.addr_flags.persp_sample_ena) {
++dst_vreg; // I
@ -122,7 +123,30 @@ void Translator::EmitPrologue() {
}
}
break;
case Stage::Compute:
case LogicalStage::TessellationControl: {
// Should be laid out like:
// [0:8]: patch id within VGT
// [8:12]: output control point id
ir.SetVectorReg(IR::VectorReg::V1,
ir.GetAttributeU32(IR::Attribute::PackedHullInvocationInfo));
// TODO PrimitiveId is probably V2 but haven't seen it yet
break;
}
case LogicalStage::TessellationEval:
ir.SetVectorReg(IR::VectorReg::V0,
ir.GetAttribute(IR::Attribute::TessellationEvaluationPointU));
ir.SetVectorReg(IR::VectorReg::V1,
ir.GetAttribute(IR::Attribute::TessellationEvaluationPointV));
// V2 is similar to PrimitiveID but not the same. It seems to only be used in
// compiler-generated address calculations. Its probably the patch id within the
// patches running locally on a given VGT (or CU, whichever is the granularity of LDS
// memory)
// Set to 0. See explanation in comment describing hull/domain passes
ir.SetVectorReg(IR::VectorReg::V2, ir.Imm32(0u));
// V3 is the actual PrimitiveID as intended by the shader author.
ir.SetVectorReg(IR::VectorReg::V3, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
break;
case LogicalStage::Compute:
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 0));
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1));
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2));
@ -137,7 +161,7 @@ void Translator::EmitPrologue() {
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2));
}
break;
case Stage::Geometry:
case LogicalStage::Geometry:
switch (runtime_info.gs_info.out_primitive[0]) {
case AmdGpu::GsOutputPrimitiveType::TriangleStrip:
ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2
@ -152,7 +176,7 @@ void Translator::EmitPrologue() {
ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
break;
default:
throw NotImplementedException("Unknown shader stage");
UNREACHABLE_MSG("Unknown shader stage");
}
}
@ -415,7 +439,8 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra
ir.SetVectorReg(IR::VectorReg(operand.code + 1), hi);
return ir.SetVectorReg(IR::VectorReg(operand.code), lo);
case OperandField::VccLo:
UNREACHABLE();
ir.SetVccLo(lo);
return ir.SetVccHi(hi);
case OperandField::VccHi:
UNREACHABLE();
case OperandField::M0:
@ -503,7 +528,8 @@ void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Inf
// Special case for emitting fetch shader.
if (inst.opcode == Opcode::S_SWAPPC_B64) {
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export);
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export ||
info.stage == Stage::Local);
translator.EmitFetch(inst);
continue;
}

View File

@ -94,12 +94,13 @@ public:
void S_ASHR_I32(const GcnInst& inst);
void S_BFM_B32(const GcnInst& inst);
void S_MUL_I32(const GcnInst& inst);
void S_BFE_U32(const GcnInst& inst);
void S_BFE(const GcnInst& inst, bool is_signed);
void S_BFE_I32(const GcnInst& inst);
void S_ABSDIFF_I32(const GcnInst& inst);
void S_NOT_B32(const GcnInst& inst);
// SOPK
void S_MOVK(const GcnInst& inst);
void S_MOVK(const GcnInst& inst, bool is_conditional);
void S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst);
void S_ADDK_I32(const GcnInst& inst);
void S_MULK_I32(const GcnInst& inst);
@ -109,7 +110,7 @@ public:
void S_MOV_B64(const GcnInst& inst);
void S_NOT_B64(const GcnInst& inst);
void S_BREV_B32(const GcnInst& inst);
void S_BCNT1_I32_B64(const GcnInst& inst);
void S_BCNT1_I32_B32(const GcnInst& inst);
void S_FF1_I32_B32(const GcnInst& inst);
void S_GETPC_B64(u32 pc, const GcnInst& inst);
void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst);
@ -200,6 +201,11 @@ public:
void V_BFREV_B32(const GcnInst& inst);
void V_FFBH_U32(const GcnInst& inst);
void V_FFBL_B32(const GcnInst& inst);
void V_FREXP_EXP_I32_F64(const GcnInst& inst);
void V_FREXP_MANT_F64(const GcnInst& inst);
void V_FRACT_F64(const GcnInst& inst);
void V_FREXP_EXP_I32_F32(const GcnInst& inst);
void V_FREXP_MANT_F32(const GcnInst& inst);
void V_MOVRELD_B32(const GcnInst& inst);
void V_MOVRELS_B32(const GcnInst& inst);
void V_MOVRELSD_B32(const GcnInst& inst);
@ -212,7 +218,7 @@ public:
// VOP3a
void V_MAD_F32(const GcnInst& inst);
void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = false);
void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = true);
void V_MAD_U32_U24(const GcnInst& inst);
void V_CUBEID_F32(const GcnInst& inst);
void V_CUBESC_F32(const GcnInst& inst);
@ -271,7 +277,7 @@ public:
// Image Memory
// MIMG
void IMAGE_LOAD(bool has_mip, const GcnInst& inst);
void IMAGE_STORE(const GcnInst& inst);
void IMAGE_STORE(bool has_mip, const GcnInst& inst);
void IMAGE_GET_RESINFO(const GcnInst& inst);
void IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst);
void IMAGE_SAMPLE(const GcnInst& inst);

View File

@ -179,6 +179,16 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
return V_FFBH_U32(inst);
case Opcode::V_FFBL_B32:
return V_FFBL_B32(inst);
case Opcode::V_FREXP_EXP_I32_F64:
return V_FREXP_EXP_I32_F64(inst);
case Opcode::V_FREXP_MANT_F64:
return V_FREXP_MANT_F64(inst);
case Opcode::V_FRACT_F64:
return V_FRACT_F64(inst);
case Opcode::V_FREXP_EXP_I32_F32:
return V_FREXP_EXP_I32_F32(inst);
case Opcode::V_FREXP_MANT_F32:
return V_FREXP_MANT_F32(inst);
case Opcode::V_MOVRELD_B32:
return V_MOVRELD_B32(inst);
case Opcode::V_MOVRELS_B32:
@ -733,7 +743,7 @@ void Translator::V_CVT_F32_UBYTE(u32 index, const GcnInst& inst) {
void Translator::V_FRACT_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
SetDst(inst.dst[0], ir.Fract(src0));
SetDst(inst.dst[0], ir.FPFract(src0));
}
void Translator::V_TRUNC_F32(const GcnInst& inst) {
@ -822,6 +832,31 @@ void Translator::V_FFBL_B32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FindILsb(src0));
}
void Translator::V_FREXP_EXP_I32_F64(const GcnInst& inst) {
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
SetDst(inst.dst[0], ir.FPFrexpExp(src0));
}
void Translator::V_FREXP_MANT_F64(const GcnInst& inst) {
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
SetDst64(inst.dst[0], ir.FPFrexpSig(src0));
}
void Translator::V_FRACT_F64(const GcnInst& inst) {
const IR::F32 src0{GetSrc64<IR::F64>(inst.src[0])};
SetDst64(inst.dst[0], ir.FPFract(src0));
}
void Translator::V_FREXP_EXP_I32_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
SetDst(inst.dst[0], ir.FPFrexpExp(src0));
}
void Translator::V_FREXP_MANT_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
SetDst(inst.dst[0], ir.FPFrexpSig(src0));
}
void Translator::V_MOVRELD_B32(const GcnInst& inst) {
const IR::U32 src_val{GetSrc(inst.src[0])};
u32 dst_vgprno = inst.dst[0].code - static_cast<u32>(IR::VectorReg::V0);
@ -1025,8 +1060,14 @@ void Translator::V_CUBEMA_F32(const GcnInst& inst) {
void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{ir.BitwiseAnd(GetSrc(inst.src[1]), ir.Imm32(0x1F))};
const IR::U32 src2{ir.BitwiseAnd(GetSrc(inst.src[2]), ir.Imm32(0x1F))};
IR::U32 src1{GetSrc(inst.src[1])};
IR::U32 src2{GetSrc(inst.src[2])};
if (!src1.IsImmediate()) {
src1 = ir.BitwiseAnd(src1, ir.Imm32(0x1F));
}
if (!src2.IsImmediate()) {
src2 = ir.BitwiseAnd(src2, ir.Imm32(0x1F));
}
SetDst(inst.dst[0], ir.BitFieldExtract(src0, src1, src2, is_signed));
}

View File

@ -98,7 +98,9 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
// Buffer store operations
case Opcode::IMAGE_STORE:
return IMAGE_STORE(inst);
return IMAGE_STORE(false, inst);
case Opcode::IMAGE_STORE_MIP:
return IMAGE_STORE(true, inst);
// Image misc operations
case Opcode::IMAGE_GET_RESINFO:
@ -187,7 +189,8 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst)
buffer_info.index_enable.Assign(mtbuf.idxen);
buffer_info.offset_enable.Assign(mtbuf.offen);
buffer_info.inst_offset.Assign(mtbuf.offset);
buffer_info.ring_access.Assign(is_ring);
buffer_info.globally_coherent.Assign(mtbuf.glc);
buffer_info.system_coherent.Assign(mtbuf.slc);
if (is_typed) {
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
@ -245,11 +248,15 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
const IR::ScalarReg sharp{inst.src[2].code * 4};
const IR::Value soffset{GetSrc(inst.src[3])};
if (info.stage != Stage::Export && info.stage != Stage::Geometry) {
if (info.stage != Stage::Export && info.stage != Stage::Hull && info.stage != Stage::Geometry) {
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0,
"Non immediate offset not supported");
}
if (info.stage == Stage::Hull) {
// printf("here\n"); // break
}
IR::Value address = [&] -> IR::Value {
if (is_ring) {
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset);
@ -267,7 +274,8 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
buffer_info.index_enable.Assign(mtbuf.idxen);
buffer_info.offset_enable.Assign(mtbuf.offen);
buffer_info.inst_offset.Assign(mtbuf.offset);
buffer_info.ring_access.Assign(is_ring);
buffer_info.globally_coherent.Assign(mtbuf.glc);
buffer_info.system_coherent.Assign(mtbuf.slc);
if (is_typed) {
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
@ -423,7 +431,7 @@ void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) {
}
}
void Translator::IMAGE_STORE(const GcnInst& inst) {
void Translator::IMAGE_STORE(bool has_mip, const GcnInst& inst) {
const auto& mimg = inst.control.mimg;
IR::VectorReg addr_reg{inst.src[0].code};
IR::VectorReg data_reg{inst.dst[0].code};
@ -434,6 +442,9 @@ void Translator::IMAGE_STORE(const GcnInst& inst) {
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
IR::TextureInstInfo info{};
info.has_lod.Assign(has_mip);
boost::container::static_vector<IR::F32, 4> comps;
for (u32 i = 0; i < 4; i++) {
if (((mimg.dmask >> i) & 1) == 0) {
@ -443,7 +454,7 @@ void Translator::IMAGE_STORE(const GcnInst& inst) {
comps.push_back(ir.GetVectorReg<IR::F32>(data_reg++));
}
const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]);
ir.ImageWrite(handle, body, value, {});
ir.ImageWrite(handle, body, {}, value, info);
}
void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) {
@ -527,6 +538,7 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal
info.has_offset.Assign(flags.test(MimgModifier::Offset));
info.has_lod.Assign(flags.any(MimgModifier::Lod));
info.is_array.Assign(mimg.da);
info.is_unnormalized.Assign(mimg.unrm);
if (gather) {
info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1);

View File

@ -11,6 +11,7 @@
#include "common/types.h"
#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/frontend/copy_shader.h"
#include "shader_recompiler/frontend/tessellation.h"
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/passes/srt.h"
#include "shader_recompiler/ir/reg.h"
@ -163,6 +164,7 @@ struct Info {
UserDataMask ud_mask{};
CopyShaderData gs_copy_data;
u32 uses_patches{};
BufferResourceList buffers;
TextureBufferResourceList texture_buffers;
@ -173,8 +175,12 @@ struct Info {
PersistentSrtInfo srt_info;
std::vector<u32> flattened_ud_buf;
IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max;
s32 tess_consts_dword_offset = -1;
std::span<const u32> user_data;
Stage stage;
LogicalStage l_stage;
u64 pgm_hash{};
VAddr pgm_base;
@ -190,14 +196,16 @@ struct Info {
bool uses_shared{};
bool uses_fp16{};
bool uses_fp64{};
bool stores_tess_level_outer{};
bool stores_tess_level_inner{};
bool translation_failed{}; // indicates that shader has unsupported instructions
bool has_readconst{};
u8 mrt_mask{0u};
bool has_fetch_shader{false};
u32 fetch_shader_sgpr_base{0u};
explicit Info(Stage stage_, ShaderParams params)
: stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params)
: stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
user_data{params.user_data} {}
template <typename T>
@ -244,6 +252,16 @@ struct Info {
srt_info.walker_func(user_data.data(), flattened_ud_buf.data());
}
}
void ReadTessConstantBuffer(TessellationDataConstantBuffer& tess_constants) const {
ASSERT(tess_consts_dword_offset >= 0); // We've already tracked the V# UD
auto buf = ReadUdReg<AmdGpu::Buffer>(static_cast<u32>(tess_consts_ptr_base),
static_cast<u32>(tess_consts_dword_offset));
VAddr tess_constants_addr = buf.base_address;
memcpy(&tess_constants,
reinterpret_cast<TessellationDataConstantBuffer*>(tess_constants_addr),
sizeof(tess_constants));
}
};
constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {

View File

@ -104,6 +104,8 @@ std::string NameOf(Attribute attribute) {
return "VertexId";
case Attribute::InstanceId:
return "InstanceId";
case Attribute::PrimitiveId:
return "PrimitiveId";
case Attribute::FragCoord:
return "FragCoord";
case Attribute::IsFrontFace:
@ -114,6 +116,16 @@ std::string NameOf(Attribute attribute) {
return "LocalInvocationId";
case Attribute::LocalInvocationIndex:
return "LocalInvocationIndex";
case Attribute::InvocationId:
return "InvocationId";
case Attribute::PatchVertices:
return "PatchVertices";
case Attribute::TessellationEvaluationPointU:
return "TessellationEvaluationPointU";
case Attribute::TessellationEvaluationPointV:
return "TessellationEvaluationPointV";
case Attribute::PackedHullInvocationInfo:
return "PackedHullInvocationInfo";
default:
break;
}

View File

@ -72,8 +72,13 @@ enum class Attribute : u64 {
LocalInvocationId = 75,
LocalInvocationIndex = 76,
FragCoord = 77,
InstanceId0 = 78, // step rate 0
InstanceId1 = 79, // step rate 1
InstanceId0 = 78, // step rate 0
InstanceId1 = 79, // step rate 1
InvocationId = 80, // TCS id in output patch and instanced geometry shader id
PatchVertices = 81,
TessellationEvaluationPointU = 82,
TessellationEvaluationPointV = 83,
PackedHullInvocationInfo = 84, // contains patch id within the VGT and invocation ID
Max,
};
@ -85,6 +90,11 @@ constexpr bool IsPosition(Attribute attribute) noexcept {
return attribute >= Attribute::Position0 && attribute <= Attribute::Position3;
}
constexpr bool IsTessCoord(Attribute attribute) noexcept {
return attribute >= Attribute::TessellationEvaluationPointU &&
attribute <= Attribute::TessellationEvaluationPointV;
}
constexpr bool IsParam(Attribute attribute) noexcept {
return attribute >= Attribute::Param0 && attribute <= Attribute::Param31;
}

View File

@ -94,6 +94,8 @@ static std::string ArgToIndex(std::map<const Inst*, size_t>& inst_to_index, size
return fmt::format("{}", arg.VectorReg());
case Type::Attribute:
return fmt::format("{}", arg.Attribute());
case Type::Patch:
return fmt::format("{}", arg.Patch());
default:
return "<unknown immediate type>";
}

View File

@ -266,8 +266,8 @@ void IREmitter::SetM0(const U32& value) {
Inst(Opcode::SetM0, value);
}
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, u32 index) {
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), Imm32(index));
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, IR::Value index) {
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), index);
}
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) {
@ -278,6 +278,24 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp
Inst(Opcode::SetAttribute, attribute, value, Imm32(comp));
}
F32 IREmitter::GetTessGenericAttribute(const U32& vertex_index, const U32& attr_index,
const U32& comp_index) {
return Inst<F32>(IR::Opcode::GetTessGenericAttribute, vertex_index, attr_index, comp_index);
}
void IREmitter::SetTcsGenericAttribute(const F32& value, const U32& attr_index,
const U32& comp_index) {
Inst(Opcode::SetTcsGenericAttribute, value, attr_index, comp_index);
}
F32 IREmitter::GetPatch(Patch patch) {
return Inst<F32>(Opcode::GetPatch, patch);
}
void IREmitter::SetPatch(Patch patch, const F32& value) {
Inst(Opcode::SetPatch, patch, value);
}
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
switch (bit_size) {
case 32:
@ -552,6 +570,19 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu
}
}
Value IREmitter::CompositeConstruct(std::span<const Value> elements) {
switch (elements.size()) {
case 2:
return CompositeConstruct(elements[0], elements[1]);
case 3:
return CompositeConstruct(elements[0], elements[1], elements[2]);
case 4:
return CompositeConstruct(elements[0], elements[1], elements[2], elements[3]);
default:
UNREACHABLE_MSG("Composite construct with greater than 4 elements");
}
}
Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
const auto read{[&](Opcode opcode, size_t limit) -> Value {
if (element >= limit) {
@ -692,6 +723,20 @@ F32F64 IREmitter::FPMul(const F32F64& a, const F32F64& b) {
}
}
F32F64 IREmitter::FPDiv(const F32F64& a, const F32F64& b) {
if (a.Type() != b.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
}
switch (a.Type()) {
case Type::F32:
return Inst<F32>(Opcode::FPDiv32, a, b);
case Type::F64:
return Inst<F64>(Opcode::FPDiv64, a, b);
default:
ThrowInvalidType(a.Type());
}
}
F32F64 IREmitter::FPFma(const F32F64& a, const F32F64& b, const F32F64& c) {
if (a.Type() != b.Type() || a.Type() != c.Type()) {
UNREACHABLE_MSG("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type());
@ -855,8 +900,37 @@ F32F64 IREmitter::FPTrunc(const F32F64& value) {
}
}
F32 IREmitter::Fract(const F32& value) {
return Inst<F32>(Opcode::FPFract, value);
F32F64 IREmitter::FPFract(const F32F64& value) {
switch (value.Type()) {
case Type::F32:
return Inst<F32>(Opcode::FPFract32, value);
case Type::F64:
return Inst<F64>(Opcode::FPFract64, value);
default:
ThrowInvalidType(value.Type());
}
}
F32F64 IREmitter::FPFrexpSig(const F32F64& value) {
switch (value.Type()) {
case Type::F32:
return Inst<F32>(Opcode::FPFrexpSig32, value);
case Type::F64:
return Inst<F64>(Opcode::FPFrexpSig64, value);
default:
ThrowInvalidType(value.Type());
}
}
U32 IREmitter::FPFrexpExp(const F32F64& value) {
switch (value.Type()) {
case Type::F32:
return Inst<U32>(Opcode::FPFrexpExp32, value);
case Type::F64:
return Inst<U32>(Opcode::FPFrexpExp64, value);
default:
ThrowInvalidType(value.Type());
}
}
U1 IREmitter::FPEqual(const F32F64& lhs, const F32F64& rhs, bool ordered) {
@ -1556,9 +1630,9 @@ Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const
return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, dref);
}
Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset,
const U32& lod, const U32& multisampling, TextureInstInfo info) {
return Inst(Opcode::ImageFetch, Flags{info}, handle, coords, offset, lod, multisampling);
Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const U32& lod,
const Value& offset, const U32& multisampling, TextureInstInfo info) {
return Inst(Opcode::ImageFetch, Flags{info}, handle, coords, lod, offset, multisampling);
}
Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod,
@ -1582,13 +1656,14 @@ Value IREmitter::ImageGradient(const Value& handle, const Value& coords,
offset, lod_clamp);
}
Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) {
return Inst(Opcode::ImageRead, Flags{info}, handle, coords);
Value IREmitter::ImageRead(const Value& handle, const Value& coords, const U32& lod,
TextureInstInfo info) {
return Inst(Opcode::ImageRead, Flags{info}, handle, coords, lod);
}
void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color,
TextureInstInfo info) {
Inst(Opcode::ImageWrite, Flags{info}, handle, coords, color);
void IREmitter::ImageWrite(const Value& handle, const Value& coords, const U32& lod,
const Value& color, TextureInstInfo info) {
Inst(Opcode::ImageWrite, Flags{info}, handle, coords, lod, color);
}
// Debug print maps to SPIRV's NonSemantic DebugPrintf instruction

View File

@ -10,6 +10,7 @@
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/condition.h"
#include "shader_recompiler/ir/patch.h"
#include "shader_recompiler/ir/value.h"
namespace Shader::IR {
@ -80,10 +81,18 @@ public:
[[nodiscard]] U1 Condition(IR::Condition cond);
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, u32 index = 0);
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0,
IR::Value index = IR::Value(u32(0u)));
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
[[nodiscard]] F32 GetTessGenericAttribute(const U32& vertex_index, const U32& attr_index,
const U32& comp_index);
void SetTcsGenericAttribute(const F32& value, const U32& attr_index, const U32& comp_index);
[[nodiscard]] F32 GetPatch(Patch patch);
void SetPatch(Patch patch, const F32& value);
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
void WriteShared(int bit_size, const Value& value, const U32& offset);
@ -138,6 +147,8 @@ public:
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
const Value& e4);
[[nodiscard]] Value CompositeConstruct(std::span<const Value> values);
[[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
[[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);
@ -158,6 +169,7 @@ public:
[[nodiscard]] F32F64 FPAdd(const F32F64& a, const F32F64& b);
[[nodiscard]] F32F64 FPSub(const F32F64& a, const F32F64& b);
[[nodiscard]] F32F64 FPMul(const F32F64& a, const F32F64& b);
[[nodiscard]] F32F64 FPDiv(const F32F64& a, const F32F64& b);
[[nodiscard]] F32F64 FPFma(const F32F64& a, const F32F64& b, const F32F64& c);
[[nodiscard]] F32F64 FPAbs(const F32F64& value);
@ -179,7 +191,9 @@ public:
[[nodiscard]] F32F64 FPFloor(const F32F64& value);
[[nodiscard]] F32F64 FPCeil(const F32F64& value);
[[nodiscard]] F32F64 FPTrunc(const F32F64& value);
[[nodiscard]] F32 Fract(const F32& value);
[[nodiscard]] F32F64 FPFract(const F32F64& value);
[[nodiscard]] F32F64 FPFrexpSig(const F32F64& value);
[[nodiscard]] U32 FPFrexpExp(const F32F64& value);
[[nodiscard]] U1 FPEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
[[nodiscard]] U1 FPNotEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
@ -311,14 +325,16 @@ public:
TextureInstInfo info);
[[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords,
const Value& offset, const F32& dref, TextureInstInfo info);
[[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset,
const U32& lod, const U32& multisampling, TextureInstInfo info);
[[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const U32& lod,
const Value& offset, const U32& multisampling,
TextureInstInfo info);
[[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords,
const Value& derivatives_dx, const Value& derivatives_dy,
const Value& offset, const F32& lod_clamp,
TextureInstInfo info);
[[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info);
void ImageWrite(const Value& handle, const Value& coords, const Value& color,
[[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, const U32& lod,
TextureInstInfo info);
void ImageWrite(const Value& handle, const Value& coords, const U32& lod, const Value& color,
TextureInstInfo info);
void EmitVertex();
@ -330,6 +346,7 @@ private:
template <typename T = Value, typename... Args>
T Inst(Opcode op, Args... args) {
auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})};
it->SetParent(block);
return T{Value{&*it}};
}
@ -347,6 +364,7 @@ private:
u32 raw_flags{};
std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy));
auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
it->SetParent(block);
return T{Value{&*it}};
}
};

View File

@ -52,6 +52,8 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::Discard:
case Opcode::DiscardCond:
case Opcode::SetAttribute:
case Opcode::SetTcsGenericAttribute:
case Opcode::SetPatch:
case Opcode::StoreBufferU32:
case Opcode::StoreBufferU32x2:
case Opcode::StoreBufferU32x3:

View File

@ -30,7 +30,7 @@ constexpr Type Opaque{Type::Opaque};
constexpr Type ScalarReg{Type::ScalarReg};
constexpr Type VectorReg{Type::VectorReg};
constexpr Type Attribute{Type::Attribute};
constexpr Type SystemValue{Type::SystemValue};
constexpr Type Patch{Type::Patch};
constexpr Type U1{Type::U1};
constexpr Type U8{Type::U8};
constexpr Type U16{Type::U16};

View File

@ -60,6 +60,10 @@ OPCODE(SetGotoVariable, Void, U32,
OPCODE(GetAttribute, F32, Attribute, U32, U32, )
OPCODE(GetAttributeU32, U32, Attribute, U32, )
OPCODE(SetAttribute, Void, Attribute, F32, U32, )
OPCODE(GetPatch, F32, Patch, )
OPCODE(SetPatch, Void, Patch, F32, )
OPCODE(GetTessGenericAttribute, F32, U32, U32, U32, )
OPCODE(SetTcsGenericAttribute, Void, F32, U32, U32, )
// Flags
OPCODE(GetScc, U1, Void, )
@ -184,6 +188,8 @@ OPCODE(FPMin32, F32, F32,
OPCODE(FPMin64, F64, F64, F64, )
OPCODE(FPMul32, F32, F32, F32, )
OPCODE(FPMul64, F64, F64, F64, )
OPCODE(FPDiv32, F32, F32, F32, )
OPCODE(FPDiv64, F64, F64, F64, )
OPCODE(FPNeg32, F32, F32, )
OPCODE(FPNeg64, F64, F64, )
OPCODE(FPRecip32, F32, F32, )
@ -208,7 +214,12 @@ OPCODE(FPCeil32, F32, F32,
OPCODE(FPCeil64, F64, F64, )
OPCODE(FPTrunc32, F32, F32, )
OPCODE(FPTrunc64, F64, F64, )
OPCODE(FPFract, F32, F32, )
OPCODE(FPFract32, F32, F32, )
OPCODE(FPFract64, F64, F64, )
OPCODE(FPFrexpSig32, F32, F32, )
OPCODE(FPFrexpSig64, F64, F64, )
OPCODE(FPFrexpExp32, U32, F32, )
OPCODE(FPFrexpExp64, U32, F64, )
OPCODE(FPOrdEqual32, U1, F32, F32, )
OPCODE(FPOrdEqual64, U1, F64, F64, )
@ -327,12 +338,12 @@ OPCODE(ImageSampleDrefImplicitLod, F32x4, Opaq
OPCODE(ImageSampleDrefExplicitLod, F32x4, Opaque, Opaque, F32, F32, Opaque, )
OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, )
OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, F32, )
OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, )
OPCODE(ImageFetch, F32x4, Opaque, Opaque, U32, Opaque, Opaque, )
OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, )
OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, )
OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, F32, )
OPCODE(ImageRead, U32x4, Opaque, Opaque, )
OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
OPCODE(ImageRead, U32x4, Opaque, Opaque, U32, )
OPCODE(ImageWrite, Void, Opaque, Opaque, U32, U32x4, )
// Image atomic operations
OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, )

View File

@ -216,6 +216,18 @@ void FoldAdd(IR::Block& block, IR::Inst& inst) {
}
}
template <typename T>
void FoldMul(IR::Block& block, IR::Inst& inst) {
if (!FoldCommutative<T>(inst, [](T a, T b) { return a * b; })) {
return;
}
const IR::Value rhs{inst.Arg(1)};
if (rhs.IsImmediate() && Arg<T>(rhs) == 0) {
inst.ReplaceUsesWithAndRemove(IR::Value(0u));
return;
}
}
void FoldCmpClass(IR::Block& block, IR::Inst& inst) {
ASSERT_MSG(inst.Arg(1).IsImmediate(), "Unable to resolve compare operation");
const auto class_mask = static_cast<IR::FloatClassFunc>(inst.Arg(1).U32());
@ -292,7 +304,19 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
FoldWhenAllImmediates(inst, [](u32 a) { return static_cast<float>(a); });
return;
case IR::Opcode::IMul32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
FoldMul<u32>(block, inst);
return;
case IR::Opcode::UDiv32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) {
ASSERT_MSG(b != 0, "Folding UDiv32 with divisor 0");
return a / b;
});
return;
case IR::Opcode::UMod32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) {
ASSERT_MSG(b != 0, "Folding UMod32 with modulo 0");
return a % b;
});
return;
case IR::Opcode::FPCmpClass32:
FoldCmpClass(block, inst);

View File

@ -0,0 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once

View File

@ -0,0 +1,746 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "shader_recompiler/info.h"
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/breadth_first_search.h"
#include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/ir/opcodes.h"
#include "shader_recompiler/ir/pattern_matching.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/runtime_info.h"
namespace Shader::Optimization {
/**
* Tessellation shaders pass outputs to the next shader using LDS.
* The Hull shader stage receives input control points stored in LDS.
*
* These passes attempt to resolve LDS accesses to attribute accesses and correctly
* write to the tessellation factor tables.
*
* The LDS layout is:
* - TCS inputs for patch 0
* - TCS inputs for patch 1
* - TCS inputs for patch 2
* - ...
* - TCS outputs for patch 0
* - TCS outputs for patch 1
* - TCS outputs for patch 2
* - ...
* - PatchConst TCS outputs for patch 0
* - PatchConst TCS outputs for patch 1
* - PatchConst TCS outputs for patch 2
*
*
* If the Hull stage does not write any new control points the driver will
* optimize LDS layout so input and output control point spaces overlap.
* (Passthrough)
*
* The gnm driver requires a V# holding special constants to be bound
* for reads by the shader.
* The Hull and Domain shaders read values from this buffer which
* contain size and offset information required to address input, output,
* or PatchConst attributes within the current patch.
* See the TessellationDataConstantBuffer struct to see the layout of this V#.
*
* Tessellation factors are stored to a special tessellation factor V# that is automatically bound
* by the driver. This is the input to the fixed function tessellator that actually subdivides the
* domain. We translate these to writes to SPIR-V builtins for tessellation factors in the Hull
* shader.
* The offset into the tess factor buffer determines which factor the shader is writing.
* Additionally, most hull shaders seem to redundantly write tess factors to PatchConst
* attributes, even if dead in the domain shader. We just treat these as generic PatchConst writes.
*
* LDS reads in the Hull shader can be from input control points, and in the the Domain shader can
* be hs output control points (output from the perspective of the Hull shader) and patchconst
* values.
* LDS stores in the Hull shader can either be output control point writes or per-patch
* (PatchConst) data writes. The Domain shader exports attributes using EXP instructions, unless its
* followed by the geometry stage (but we havent seen this yet), so nothing special there.
* The address calculations can vary significantly and can't be easily pattern matched. We are at
* the mercy of instruction selection the ps4 compiler wanted to use.
* Generally though, they could look something like this:
* Input control point:
* addr = PatchIdInVgt * input_cp_stride * #input_cp_per_patch + index * input_cp_stride
* + attr# * 16 + component
* Output control point:
* addr = #patches * input_cp_stride * #input_cp_per_patch
* + PatchIdInVgt * output_patch_stride + InvocationID * output_cp_stride
+ attr# * 16 + component
* Per patch output:
* addr = #patches * input_cp_stride * #cp_per_input_patch
* + #patches * output_patch_stride
* + PatchIdInVgt * per_patch_output_stride + attr# * 16 + component
*
* output_patch_stride and output_cp_stride are usually compile time constants in the gcn
*
* Hull shaders can probably also read output control points corresponding to other threads, like
* shared memory (but we havent seen this yet).
* ^ This is an UNREACHABLE for now. We may need to insert additional barriers if this happens.
* They should also be able to read PatchConst values,
* although not sure if this happens in practice.
*
* To determine which type of attribute (input, output, patchconst) we the check the users of
* TessConstants V# reads to deduce which type of attribute a given load/store to LDS
* is touching.
*
* In the Hull shader, both the PatchId within the VGT group (PatchIdInVgt) and the output control
* point id (InvocationId) are packed in VGPR1 by the driver like
* V1 = InvocationId << 8 | PatchIdInVgt
* The shader typically uses V_BFE_(U|S)32 to extract them. We use the starting bit_pos to determine
* which is which.
*
* This pass does not attempt to deduce the exact attribute referenced in a LDS load/store.
* Instead, it feeds the address in the LDS load/store to the get/set Insts we use for TCS in/out's,
* TES in's, and PatchConst in/out's.
*
* TCS/TES Input attributes:
* We define input attributes using an array in the shader roughly like this:
* // equivalent GLSL in TCS
* layout (location = 0) in vec4 in_attrs[][NUM_INPUT_ATTRIBUTES];
*
* Here the NUM_INPUT_ATTRIBUTES is derived from the ls_stride member of the TessConstants V#.
* We divide ls_stride (in bytes) by 16 to get the number of vec4 attributes.
* For TES, the number of attributes comes from hs_cp_stride / 16.
* The first (outer) dimension is unsized but corresponds to the number of vertices in the hs input
* patch (for Hull) or the hs output patch (for Domain).
*
* For input reads in TCS or TES, we emit SPIR-V like:
* float value = in_attrs[addr / ls_stride][(addr % ls_stride) >> 4][(addr & 0xF) >> 2];
*
* For output writes, we assume the control point index is InvocationId, since high level languages
* impose that restriction (although maybe it's technically possible on hardware). So SPIR-V looks
* like this:
* layout (location = 0) in vec4 in_attrs[][NUM_OUTPUT_ATTRIBUTES];
* out_attrs[InvocationId][(addr % hs_cp_stride) >> 4][(addr & 0xF) >> 2] = value;
*
* NUM_OUTPUT_ATTRIBUTES is derived by hs_cp_stride / 16, so it can link with the TES in_attrs
* variable.
*
* Another challenge is the fact that the GCN shader needs to address attributes from LDS as a whole
* which contains the attributes from many patches. On the other hand, higher level shading
* languages restrict attribute access to the patch of the current thread, which is naturally a
* restriction in SPIR-V also.
* The addresses the ps4 compiler generates for loads/stores and the fact that LDS holds many
* patches' attributes are just implementation details of the ps4 driver/compiler. To deal with
* this, we can replace certain TessConstant V# reads with 0, which only contribute to the base
* address of the current patch's attributes in LDS and not the indexes within the local patch.
*
* (A perfect implementation might need emulation of the VGTs in mesh/compute, loading/storing
* attributes to buffers and not caring about whether they are hs input, hs output, or patchconst
* attributes)
*
*/
namespace {
using namespace Shader::Optimiation::PatternMatching;
static void InitTessConstants(IR::ScalarReg sharp_ptr_base, s32 sharp_dword_offset,
Shader::Info& info, Shader::RuntimeInfo& runtime_info,
TessellationDataConstantBuffer& tess_constants) {
info.tess_consts_ptr_base = sharp_ptr_base;
info.tess_consts_dword_offset = sharp_dword_offset;
info.ReadTessConstantBuffer(tess_constants);
if (info.l_stage == LogicalStage::TessellationControl) {
runtime_info.hs_info.InitFromTessConstants(tess_constants);
} else {
runtime_info.vs_info.InitFromTessConstants(tess_constants);
}
return;
}
struct TessSharpLocation {
IR::ScalarReg ptr_base;
u32 dword_off;
};
std::optional<TessSharpLocation> FindTessConstantSharp(IR::Inst* read_const_buffer) {
IR::Value sharp_ptr_base;
IR::Value sharp_dword_offset;
IR::Value rv = IR::Value{read_const_buffer};
IR::Value handle = read_const_buffer->Arg(0);
if (M_COMPOSITECONSTRUCTU32X4(M_GETUSERDATA(MatchImm(sharp_dword_offset)), MatchIgnore(),
MatchIgnore(), MatchIgnore())
.Match(handle)) {
return TessSharpLocation{.ptr_base = IR::ScalarReg::Max,
.dword_off = static_cast<u32>(sharp_dword_offset.ScalarReg())};
} else if (M_COMPOSITECONSTRUCTU32X4(
M_READCONST(M_COMPOSITECONSTRUCTU32X2(M_GETUSERDATA(MatchImm(sharp_ptr_base)),
MatchIgnore()),
MatchImm(sharp_dword_offset)),
MatchIgnore(), MatchIgnore(), MatchIgnore())
.Match(handle)) {
return TessSharpLocation{.ptr_base = sharp_ptr_base.ScalarReg(),
.dword_off = sharp_dword_offset.U32()};
}
return {};
}
// Walker that helps deduce what type of attribute a DS instruction is reading
// or writing, which could be an input control point, output control point,
// or per-patch constant (PatchConst).
// For certain ReadConstBuffer instructions using the tess constants V#,, we visit the users
// recursively and increment a counter on the Load/WriteShared users.
// Namely NumPatch (from m_hsNumPatch), HsOutputBase (m_hsOutputBase),
// and PatchConstBase (m_patchConstBase).
// In addr calculations, the term NumPatch * ls_stride * #input_cp_in_patch
// is used as an addend to skip the region for input control points, and similarly
// NumPatch * hs_cp_stride * #output_cp_in_patch is used to skip the region
// for output control points.
//
// TODO: this will break if AMD compiler used distributive property like
// TcsNumPatches * (ls_stride * #input_cp_in_patch + hs_cp_stride * #output_cp_in_patch)
class TessConstantUseWalker {
public:
void MarkTessAttributeUsers(IR::Inst* read_const_buffer, TessConstantAttribute attr) {
u32 inc;
switch (attr) {
case TessConstantAttribute::HsNumPatch:
case TessConstantAttribute::HsOutputBase:
inc = 1;
break;
case TessConstantAttribute::PatchConstBase:
inc = 2;
break;
default:
UNREACHABLE();
}
for (IR::Use use : read_const_buffer->Uses()) {
MarkTessAttributeUsersHelper(use, inc);
}
++seq_num;
}
private:
void MarkTessAttributeUsersHelper(IR::Use use, u32 inc) {
IR::Inst* inst = use.user;
switch (use.user->GetOpcode()) {
case IR::Opcode::LoadSharedU32:
case IR::Opcode::LoadSharedU64:
case IR::Opcode::LoadSharedU128:
case IR::Opcode::WriteSharedU32:
case IR::Opcode::WriteSharedU64:
case IR::Opcode::WriteSharedU128: {
u32 counter = inst->Flags<u32>();
inst->SetFlags<u32>(counter + inc);
// Stop here
return;
}
case IR::Opcode::Phi: {
struct PhiCounter {
u16 seq_num;
u8 unique_edge;
u8 counter;
};
PhiCounter count = inst->Flags<PhiCounter>();
ASSERT_MSG(count.counter == 0 || count.unique_edge == use.operand);
// the point of seq_num is to tell us if we've already traversed this
// phi on the current walk. Alternatively we could keep a set of phi's
// seen on the current walk. This is to handle phi cycles
if (count.seq_num == 0) {
// First time we've encountered this phi
count.seq_num = seq_num;
// Mark the phi as having been traversed originally through this edge
count.unique_edge = use.operand;
count.counter = inc;
} else if (count.seq_num < seq_num) {
count.seq_num = seq_num;
// For now, assume we are visiting this phi via the same edge
// as on other walks. If not, some dataflow analysis might be necessary
ASSERT(count.unique_edge == use.operand);
count.counter += inc;
} else {
// count.seq_num == seq_num
// there's a cycle, and we've already been here on this walk
return;
}
inst->SetFlags<PhiCounter>(count);
break;
}
default:
break;
}
for (IR::Use use : inst->Uses()) {
MarkTessAttributeUsersHelper(use, inc);
}
}
u32 seq_num{1u};
};
enum class AttributeRegion : u32 { InputCP, OutputCP, PatchConst };
static AttributeRegion GetAttributeRegionKind(IR::Inst* ring_access, const Shader::Info& info,
const Shader::RuntimeInfo& runtime_info) {
u32 count = ring_access->Flags<u32>();
if (count == 0) {
return AttributeRegion::InputCP;
} else if (info.l_stage == LogicalStage::TessellationControl &&
runtime_info.hs_info.IsPassthrough()) {
ASSERT(count <= 1);
return AttributeRegion::PatchConst;
} else {
ASSERT(count <= 2);
return AttributeRegion(count);
}
}
static bool IsDivisibleByStride(IR::Value term, u32 stride) {
IR::Value a, b;
if (MatchU32(stride).Match(term)) {
return true;
} else if (M_BITFIELDUEXTRACT(MatchValue(a), MatchU32(0), MatchU32(24)).Match(term) ||
M_BITFIELDSEXTRACT(MatchValue(a), MatchU32(0), MatchU32(24)).Match(term)) {
return IsDivisibleByStride(a, stride);
} else if (M_IMUL32(MatchValue(a), MatchValue(b)).Match(term)) {
return IsDivisibleByStride(a, stride) || IsDivisibleByStride(b, stride);
}
return false;
}
// Return true if we can eliminate any addends
static bool TryOptimizeAddendInModulo(IR::Value addend, u32 stride, std::vector<IR::U32>& addends) {
IR::Value a, b;
if (M_IADD32(MatchValue(a), MatchValue(b)).Match(addend)) {
bool ret = false;
ret = TryOptimizeAddendInModulo(a, stride, addends);
ret |= TryOptimizeAddendInModulo(b, stride, addends);
return ret;
} else if (!IsDivisibleByStride(addend, stride)) {
addends.push_back(IR::U32{addend});
return false;
} else {
return true;
}
}
// In calculation (a + b + ...) % stride
// Use this fact
// (a + b) mod N = (a mod N + b mod N) mod N
// If any addend is divisible by stride, then we can replace it with 0 in the attribute
// or component index calculation
static IR::U32 TryOptimizeAddressModulo(IR::U32 addr, u32 stride, IR::IREmitter& ir) {
std::vector<IR::U32> addends;
if (TryOptimizeAddendInModulo(addr, stride, addends)) {
addr = ir.Imm32(0);
for (auto& addend : addends) {
addr = ir.IAdd(addr, addend);
}
}
return addr;
}
// TODO: can optimize div in control point index similarly to mod
// Read a TCS input (InputCP region) or TES input (OutputCP region)
static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmitter& ir,
u32 off_dw) {
if (off_dw > 0) {
addr = ir.IAdd(addr, ir.Imm32(off_dw));
}
const IR::U32 control_point_index = ir.IDiv(addr, ir.Imm32(stride));
const IR::U32 addr_for_attrs = TryOptimizeAddressModulo(addr, stride, ir);
const IR::U32 attr_index =
ir.ShiftRightLogical(ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u));
const IR::U32 comp_index =
ir.ShiftRightLogical(ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u));
return ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index);
}
} // namespace
void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
const Info& info = program.info;
for (IR::Block* block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
const auto opcode = inst.GetOpcode();
switch (opcode) {
case IR::Opcode::StoreBufferU32:
case IR::Opcode::StoreBufferU32x2:
case IR::Opcode::StoreBufferU32x3:
case IR::Opcode::StoreBufferU32x4: {
const auto info = inst.Flags<IR::BufferInstInfo>();
if (!info.globally_coherent) {
break;
}
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto GetValue = [&](IR::Value data) -> IR::F32 {
if (auto* inst = data.TryInstRecursive();
inst && inst->GetOpcode() == IR::Opcode::BitCastU32F32) {
return IR::F32{inst->Arg(0)};
}
return ir.BitCast<IR::F32, IR::U32>(IR::U32{data});
};
const u32 num_dwords = u32(opcode) - u32(IR::Opcode::StoreBufferU32) + 1;
IR::U32 index = IR::U32{inst.Arg(1)};
ASSERT(index.IsImmediate());
const u32 gcn_factor_idx = (info.inst_offset.Value() + index.U32()) >> 2;
const IR::Value data = inst.Arg(2);
auto get_factor_attr = [&](u32 gcn_factor_idx) -> IR::Patch {
// The hull outputs tess factors in different formats depending on the shader.
// For triangle domains, it seems to pack the entries into 4 consecutive floats,
// with the 3 edge factors followed by the 1 interior factor.
// For quads, it does 4 edge factors then 2 interior.
// There is a tess factor stride member of the GNMX hull constants struct in
// a hull program shader binary archive, but this doesn't seem to be
// communicated to the driver.
// The layout seems to be implied by the type of the abstract domain.
switch (runtime_info.hs_info.tess_type) {
case AmdGpu::TessellationType::Isoline:
ASSERT(gcn_factor_idx < 2);
return IR::PatchFactor(gcn_factor_idx);
case AmdGpu::TessellationType::Triangle:
ASSERT(gcn_factor_idx < 4);
if (gcn_factor_idx == 3) {
return IR::Patch::TessellationLodInteriorU;
}
return IR::PatchFactor(gcn_factor_idx);
case AmdGpu::TessellationType::Quad:
ASSERT(gcn_factor_idx < 6);
return IR::PatchFactor(gcn_factor_idx);
default:
UNREACHABLE();
}
};
inst.Invalidate();
if (num_dwords == 1) {
ir.SetPatch(get_factor_attr(gcn_factor_idx), GetValue(data));
break;
}
auto* inst = data.TryInstRecursive();
ASSERT(inst && (inst->GetOpcode() == IR::Opcode::CompositeConstructU32x2 ||
inst->GetOpcode() == IR::Opcode::CompositeConstructU32x3 ||
inst->GetOpcode() == IR::Opcode::CompositeConstructU32x4));
for (s32 i = 0; i < num_dwords; i++) {
ir.SetPatch(get_factor_attr(gcn_factor_idx + i), GetValue(inst->Arg(i)));
}
break;
}
case IR::Opcode::WriteSharedU32:
case IR::Opcode::WriteSharedU64:
case IR::Opcode::WriteSharedU128: {
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
const u32 num_dwords = opcode == IR::Opcode::WriteSharedU32
? 1
: (opcode == IR::Opcode::WriteSharedU64 ? 2 : 4);
const IR::U32 addr{inst.Arg(0)};
const IR::U32 data{inst.Arg(1).Resolve()};
const auto SetOutput = [&](IR::U32 addr, IR::U32 value, AttributeRegion output_kind,
u32 off_dw) {
const IR::F32 data_component = ir.BitCast<IR::F32, IR::U32>(value);
if (output_kind == AttributeRegion::OutputCP) {
if (off_dw > 0) {
addr = ir.IAdd(addr, ir.Imm32(off_dw));
}
u32 stride = runtime_info.hs_info.hs_output_cp_stride;
// Invocation ID array index is implicit, handled by SPIRV backend
const IR::U32 addr_for_attrs = TryOptimizeAddressModulo(addr, stride, ir);
const IR::U32 attr_index = ir.ShiftRightLogical(
ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u));
const IR::U32 comp_index = ir.ShiftRightLogical(
ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u));
ir.SetTcsGenericAttribute(data_component, attr_index, comp_index);
} else {
ASSERT(output_kind == AttributeRegion::PatchConst);
ASSERT_MSG(addr.IsImmediate(), "patch addr non imm, inst {}",
fmt::ptr(addr.Inst()));
ir.SetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw), data_component);
}
};
AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
if (num_dwords == 1) {
SetOutput(addr, data, region, 0);
} else {
for (auto i = 0; i < num_dwords; i++) {
SetOutput(addr, IR::U32{data.Inst()->Arg(i)}, region, i);
}
}
inst.Invalidate();
break;
}
case IR::Opcode::LoadSharedU32: {
case IR::Opcode::LoadSharedU64:
case IR::Opcode::LoadSharedU128:
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
const IR::U32 addr{inst.Arg(0)};
AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32
? 1
: (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4);
ASSERT_MSG(region == AttributeRegion::InputCP,
"Unhandled read of output or patchconst attribute in hull shader");
IR::Value attr_read;
if (num_dwords == 1) {
attr_read = ir.BitCast<IR::U32>(
ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, 0));
} else {
boost::container::static_vector<IR::Value, 4> read_components;
for (auto i = 0; i < num_dwords; i++) {
const IR::F32 component =
ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, i);
read_components.push_back(ir.BitCast<IR::U32>(component));
}
attr_read = ir.CompositeConstruct(read_components);
}
inst.ReplaceUsesWithAndRemove(attr_read);
break;
}
default:
break;
}
}
}
if (runtime_info.hs_info.IsPassthrough()) {
// Copy input attributes to output attributes, indexed by InvocationID
// Passthrough should imply that input and output patches have same number of vertices
IR::Block* entry_block = *program.blocks.begin();
auto it = std::ranges::find_if(entry_block->Instructions(), [](IR::Inst& inst) {
return inst.GetOpcode() == IR::Opcode::Prologue;
});
ASSERT(it != entry_block->end());
++it;
ASSERT(it != entry_block->end());
++it;
// Prologue
// SetExec #true
// <- insert here
// ...
IR::IREmitter ir{*entry_block, it};
ASSERT(runtime_info.hs_info.ls_stride % 16 == 0);
u32 num_attributes = runtime_info.hs_info.ls_stride / 16;
const auto invocation_id = ir.GetAttributeU32(IR::Attribute::InvocationId);
for (u32 attr_no = 0; attr_no < num_attributes; attr_no++) {
for (u32 comp = 0; comp < 4; comp++) {
IR::F32 attr_read =
ir.GetTessGenericAttribute(invocation_id, ir.Imm32(attr_no), ir.Imm32(comp));
// InvocationId is implicit index for output control point writes
ir.SetTcsGenericAttribute(attr_read, ir.Imm32(attr_no), ir.Imm32(comp));
}
}
// We could wrap the rest of the program in an if stmt
// CopyInputAttrsToOutputs(); // psuedocode
// if (InvocationId == 0) {
// PatchConstFunction();
// }
// But as long as we treat invocation ID as 0 for all threads, shouldn't matter functionally
}
}
void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
Info& info = program.info;
for (IR::Block* block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto opcode = inst.GetOpcode();
switch (inst.GetOpcode()) {
case IR::Opcode::LoadSharedU32: {
case IR::Opcode::LoadSharedU64:
case IR::Opcode::LoadSharedU128:
const IR::U32 addr{inst.Arg(0)};
AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32
? 1
: (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4);
const auto GetInput = [&](IR::U32 addr, u32 off_dw) -> IR::F32 {
if (region == AttributeRegion::OutputCP) {
return ReadTessInputComponent(
addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw);
} else {
ASSERT(region == AttributeRegion::PatchConst);
return ir.GetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw));
}
};
IR::Value attr_read;
if (num_dwords == 1) {
attr_read = ir.BitCast<IR::U32>(GetInput(addr, 0));
} else {
boost::container::static_vector<IR::Value, 4> read_components;
for (auto i = 0; i < num_dwords; i++) {
const IR::F32 component = GetInput(addr, i);
read_components.push_back(ir.BitCast<IR::U32>(component));
}
attr_read = ir.CompositeConstruct(read_components);
}
inst.ReplaceUsesWithAndRemove(attr_read);
break;
}
default:
break;
}
}
}
}
// Run before either hull or domain transform
void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info) {
TessellationDataConstantBuffer tess_constants;
Shader::Info& info = program.info;
// Find the TessellationDataConstantBuffer V#
for (IR::Block* block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
auto found_tess_consts_sharp = [&]() -> bool {
switch (inst.GetOpcode()) {
case IR::Opcode::LoadSharedU32:
case IR::Opcode::LoadSharedU64:
case IR::Opcode::LoadSharedU128:
case IR::Opcode::WriteSharedU32:
case IR::Opcode::WriteSharedU64:
case IR::Opcode::WriteSharedU128: {
IR::Value addr = inst.Arg(0);
auto read_const_buffer = IR::BreadthFirstSearch(
addr, [](IR::Inst* maybe_tess_const) -> std::optional<IR::Inst*> {
if (maybe_tess_const->GetOpcode() == IR::Opcode::ReadConstBuffer) {
return maybe_tess_const;
}
return std::nullopt;
});
if (read_const_buffer) {
auto sharp_location = FindTessConstantSharp(read_const_buffer.value());
if (sharp_location) {
if (info.tess_consts_dword_offset >= 0) {
// Its possible theres a readconstbuffer that contributes to an
// LDS address and isnt a TessConstant V# read. Could improve on
// this somehow
ASSERT_MSG(static_cast<s32>(sharp_location->dword_off) ==
info.tess_consts_dword_offset &&
sharp_location->ptr_base ==
info.tess_consts_ptr_base,
"TessConstants V# is ambiguous");
}
InitTessConstants(sharp_location->ptr_base,
static_cast<s32>(sharp_location->dword_off), info,
runtime_info, tess_constants);
return true;
}
UNREACHABLE_MSG("Failed to match tess constant sharp");
}
return false;
}
default:
return false;
}
}();
if (found_tess_consts_sharp) {
break;
}
}
}
ASSERT(info.tess_consts_dword_offset >= 0);
TessConstantUseWalker walker;
for (IR::Block* block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
auto sharp_location = FindTessConstantSharp(&inst);
if (sharp_location && sharp_location->ptr_base == info.tess_consts_ptr_base &&
sharp_location->dword_off == info.tess_consts_dword_offset) {
// The shader is reading from the TessConstants V#
IR::Value index = inst.Arg(1);
ASSERT_MSG(index.IsImmediate(),
"Tessellation constant read with dynamic index");
u32 off_dw = index.U32();
ASSERT(off_dw <=
static_cast<u32>(TessConstantAttribute::FirstEdgeTessFactorIndex));
auto tess_const_attr = static_cast<TessConstantAttribute>(off_dw);
switch (tess_const_attr) {
case TessConstantAttribute::LsStride:
// If not, we may need to make this runtime state for TES
ASSERT(info.l_stage == LogicalStage::TessellationControl);
inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.ls_stride));
break;
case TessConstantAttribute::HsCpStride:
inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.hs_cp_stride));
break;
case TessConstantAttribute::HsNumPatch:
case TessConstantAttribute::HsOutputBase:
case TessConstantAttribute::PatchConstBase:
walker.MarkTessAttributeUsers(&inst, tess_const_attr);
// We should be able to safely set these to 0 so that indexing happens only
// within the local patch in the recompiled Vulkan shader. This assumes
// these values only contribute to address calculations for in/out
// attributes in the original gcn shader.
// See the explanation for why we set V2 to 0 when emitting the prologue.
inst.ReplaceUsesWithAndRemove(IR::Value(0u));
break;
case Shader::TessConstantAttribute::PatchConstSize:
case Shader::TessConstantAttribute::PatchOutputSize:
case Shader::TessConstantAttribute::OffChipTessellationFactorThreshold:
case Shader::TessConstantAttribute::FirstEdgeTessFactorIndex:
// May need to replace PatchConstSize and PatchOutputSize with 0
break;
default:
UNREACHABLE_MSG("Read past end of TessConstantsBuffer");
}
}
}
}
}
// These pattern matching are neccessary for now unless we support dynamic indexing of
// PatchConst attributes and tess factors. PatchConst should be easy, turn those into a single
// vec4 array like in/out attrs. Not sure about tess factors.
if (info.l_stage == LogicalStage::TessellationControl) {
// Replace the BFEs on V1 (packed with patch id within VGT and output cp id)
for (IR::Block* block : program.blocks) {
for (auto it = block->Instructions().begin(); it != block->Instructions().end(); it++) {
IR::Inst& inst = *it;
if (M_BITFIELDUEXTRACT(
M_GETATTRIBUTEU32(MatchAttribute(IR::Attribute::PackedHullInvocationInfo),
MatchIgnore()),
MatchU32(0), MatchU32(8))
.Match(IR::Value{&inst})) {
IR::IREmitter emit(*block, it);
// This is the patch id within the VGT, not the actual PrimitiveId
// in the draw
IR::Value replacement(0u);
inst.ReplaceUsesWithAndRemove(replacement);
} else if (M_BITFIELDUEXTRACT(
M_GETATTRIBUTEU32(
MatchAttribute(IR::Attribute::PackedHullInvocationInfo),
MatchIgnore()),
MatchU32(8), MatchU32(5))
.Match(IR::Value{&inst})) {
IR::IREmitter ir(*block, it);
IR::Value replacement;
if (runtime_info.hs_info.IsPassthrough()) {
// Deal with annoying pattern in BB where InvocationID use makes no
// sense (in addr calculation for patchconst or tess factor write)
replacement = ir.Imm32(0);
} else {
replacement = ir.GetAttributeU32(IR::Attribute::InvocationId);
}
inst.ReplaceUsesWithAndRemove(replacement);
}
}
}
}
}
} // namespace Shader::Optimization

View File

@ -6,6 +6,10 @@
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h"
namespace Shader {
struct Profile;
}
namespace Shader::Optimization {
void SsaRewritePass(IR::BlockList& program);
@ -18,5 +22,9 @@ void CollectShaderInfoPass(IR::Program& program);
void LowerSharedMemToRegisters(IR::Program& program);
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
Stage stage);
void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info);
void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile);
} // namespace Shader::Optimization

View File

@ -137,6 +137,35 @@ bool IsImageInstruction(const IR::Inst& inst) {
}
}
IR::Value SwizzleVector(IR::IREmitter& ir, auto sharp, IR::Value texel) {
boost::container::static_vector<IR::Value, 4> comps;
for (u32 i = 0; i < 4; i++) {
switch (sharp.GetSwizzle(i)) {
case AmdGpu::CompSwizzle::Zero:
comps.emplace_back(ir.Imm32(0.f));
break;
case AmdGpu::CompSwizzle::One:
comps.emplace_back(ir.Imm32(1.f));
break;
case AmdGpu::CompSwizzle::Red:
comps.emplace_back(ir.CompositeExtract(texel, 0));
break;
case AmdGpu::CompSwizzle::Green:
comps.emplace_back(ir.CompositeExtract(texel, 1));
break;
case AmdGpu::CompSwizzle::Blue:
comps.emplace_back(ir.CompositeExtract(texel, 2));
break;
case AmdGpu::CompSwizzle::Alpha:
comps.emplace_back(ir.CompositeExtract(texel, 3));
break;
default:
UNREACHABLE();
}
}
return ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]);
};
class Descriptors {
public:
explicit Descriptors(Info& info_)
@ -388,6 +417,15 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(0, ir.Imm32(binding));
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
// Apply dst_sel swizzle on formatted buffer instructions
if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) {
inst.SetArg(2, SwizzleVector(ir, buffer, inst.Arg(2)));
} else {
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info);
inst.ReplaceUsesWith(SwizzleVector(ir, buffer, texel));
}
}
IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t,
@ -420,26 +458,29 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
Descriptors& descriptors, const IR::Inst* producer,
const u32 image_binding, const AmdGpu::Image& image) {
// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
const u32 sampler_binding = [&] {
const auto [sampler_binding, sampler] = [&] -> std::pair<u32, AmdGpu::Sampler> {
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
const IR::Value& handle = producer->Arg(1);
// Inline sampler resource.
if (handle.IsImmediate()) {
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
return descriptors.Add(SamplerResource{
const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()};
const auto binding = descriptors.Add(SamplerResource{
.sharp_idx = std::numeric_limits<u32>::max(),
.inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()},
.inline_sampler = inline_sampler,
});
return {binding, inline_sampler};
}
// Normal sampler resource.
const auto ssharp_handle = handle.InstRecursive();
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
const auto ssharp = TrackSharp(ssharp_ud, info);
return descriptors.Add(SamplerResource{
const auto binding = descriptors.Add(SamplerResource{
.sharp_idx = ssharp,
.associated_image = image_binding,
.disable_aniso = disable_aniso,
});
return {binding, info.ReadUdSharp<AmdGpu::Sampler>(ssharp)};
}();
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
@ -539,28 +580,47 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
}
}();
const auto unnormalized = sampler.force_unnormalized || inst_info.is_unnormalized;
// Query dimensions of image if needed for normalization.
// We can't use the image sharp because it could be bound to a different image later.
const auto dimensions =
unnormalized ? ir.ImageQueryDimension(ir.Imm32(image_binding), ir.Imm32(0u), ir.Imm1(false))
: IR::Value{};
const auto get_coord = [&](u32 coord_idx, u32 dim_idx) -> IR::Value {
const auto coord = get_addr_reg(coord_idx);
if (unnormalized) {
// Normalize the coordinate for sampling, dividing by its corresponding dimension.
const auto dim =
ir.ConvertUToF(32, 32, IR::U32{ir.CompositeExtract(dimensions, dim_idx)});
return ir.FPDiv(coord, dim);
}
return coord;
};
// Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler
const IR::Value coords = [&] -> IR::Value {
switch (image.GetType()) {
case AmdGpu::ImageType::Color1D: // x
addr_reg = addr_reg + 1;
return get_addr_reg(addr_reg - 1);
return get_coord(addr_reg - 1, 0);
case AmdGpu::ImageType::Color1DArray: // x, slice
[[fallthrough]];
case AmdGpu::ImageType::Color2D: // x, y
addr_reg = addr_reg + 2;
return ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1));
return ir.CompositeConstruct(get_coord(addr_reg - 2, 0), get_coord(addr_reg - 1, 1));
case AmdGpu::ImageType::Color2DArray: // x, y, slice
[[fallthrough]];
case AmdGpu::ImageType::Color2DMsaa: // x, y, frag
[[fallthrough]];
addr_reg = addr_reg + 3;
return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
get_addr_reg(addr_reg - 1));
case AmdGpu::ImageType::Color3D: // x, y, z
addr_reg = addr_reg + 3;
return ir.CompositeConstruct(get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2),
get_addr_reg(addr_reg - 1));
return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
get_coord(addr_reg - 1, 2));
case AmdGpu::ImageType::Cube: // x, y, face
addr_reg = addr_reg + 3;
return PatchCubeCoord(ir, get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2),
return PatchCubeCoord(ir, get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
get_addr_reg(addr_reg - 1), false, inst_info.is_array);
default:
UNREACHABLE();
@ -711,11 +771,17 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
}();
inst.SetArg(1, coords);
if (inst.GetOpcode() == IR::Opcode::ImageWrite) {
inst.SetArg(3, SwizzleVector(ir, image, inst.Arg(3)));
}
if (inst_info.has_lod) {
ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch);
ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch ||
inst.GetOpcode() == IR::Opcode::ImageRead ||
inst.GetOpcode() == IR::Opcode::ImageWrite);
ASSERT(image.GetType() != AmdGpu::ImageType::Color2DMsaa &&
image.GetType() != AmdGpu::ImageType::Color2DMsaaArray);
inst.SetArg(3, arg);
inst.SetArg(2, arg);
} else if (image.GetType() == AmdGpu::ImageType::Color2DMsaa ||
image.GetType() == AmdGpu::ImageType::Color2DMsaaArray) {
inst.SetArg(4, arg);

View File

@ -1,11 +1,13 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/ir/opcodes.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/recompiler.h"
#include "shader_recompiler/runtime_info.h"
namespace Shader::Optimization {
@ -23,12 +25,45 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
};
switch (stage) {
case Stage::Local: {
ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
const auto opcode = inst.GetOpcode();
switch (opcode) {
case IR::Opcode::WriteSharedU64:
case IR::Opcode::WriteSharedU32: {
bool is_composite = opcode == IR::Opcode::WriteSharedU64;
u32 num_components = opcode == IR::Opcode::WriteSharedU32 ? 1 : 2;
u32 offset = 0;
const auto* addr = inst.Arg(0).InstRecursive();
if (addr->GetOpcode() == IR::Opcode::IAdd32) {
ASSERT(addr->Arg(1).IsImmediate());
offset = addr->Arg(1).U32();
}
IR::Value data = inst.Arg(1).Resolve();
for (s32 i = 0; i < num_components; i++) {
const auto attrib = IR::Attribute::Param0 + (offset / 16);
const auto comp = (offset / 4) % 4;
const IR::U32 value = IR::U32{is_composite ? data.Inst()->Arg(i) : data};
ir.SetAttribute(attrib, ir.BitCast<IR::F32, IR::U32>(value), comp);
offset += 4;
}
inst.Invalidate();
break;
}
default:
break;
}
});
break;
}
case Stage::Export: {
ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
const auto opcode = inst.GetOpcode();
switch (opcode) {
case IR::Opcode::StoreBufferU32: {
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
const auto info = inst.Flags<IR::BufferInstInfo>();
if (!info.system_coherent || !info.globally_coherent) {
break;
}
@ -61,12 +96,13 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
const auto opcode = inst.GetOpcode();
switch (opcode) {
case IR::Opcode::LoadBufferU32: {
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
const auto info = inst.Flags<IR::BufferInstInfo>();
if (!info.system_coherent || !info.globally_coherent) {
break;
}
const auto shl_inst = inst.Arg(1).TryInstRecursive();
const auto vertex_id = shl_inst->Arg(0).Resolve().U32() >> 2;
const auto vertex_id = ir.Imm32(shl_inst->Arg(0).Resolve().U32() >> 2);
const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1);
const auto bucket = offset.Resolve().U32() / 256u;
const auto attrib = bucket < 4 ? IR::Attribute::Position0
@ -80,7 +116,8 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
break;
}
case IR::Opcode::StoreBufferU32: {
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
const auto buffer_info = inst.Flags<IR::BufferInstInfo>();
if (!buffer_info.system_coherent || !buffer_info.globally_coherent) {
break;
}

View File

@ -17,6 +17,22 @@ void Visit(Info& info, IR::Inst& inst) {
case IR::Opcode::GetUserData:
info.ud_mask.Set(inst.Arg(0).ScalarReg());
break;
case IR::Opcode::SetPatch: {
const auto patch = inst.Arg(0).Patch();
if (patch <= IR::Patch::TessellationLodBottom) {
info.stores_tess_level_outer = true;
} else if (patch <= IR::Patch::TessellationLodInteriorV) {
info.stores_tess_level_inner = true;
} else {
info.uses_patches |= 1U << IR::GenericPatchIndex(patch);
}
break;
}
case IR::Opcode::GetPatch: {
const auto patch = inst.Arg(0).Patch();
info.uses_patches |= 1U << IR::GenericPatchIndex(patch);
break;
}
case IR::Opcode::LoadSharedU32:
case IR::Opcode::LoadSharedU64:
case IR::Opcode::WriteSharedU32:

View File

@ -0,0 +1,47 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/ir/breadth_first_search.h"
#include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/profile.h"
namespace Shader::Optimization {
void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) {
if (!program.info.uses_shared || !profile.needs_lds_barriers) {
return;
}
using Type = IR::AbstractSyntaxNode::Type;
u32 branch_depth{};
for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
if (node.type == Type::EndIf) {
--branch_depth;
continue;
}
if (node.type != Type::If) {
continue;
}
u32 curr_depth = branch_depth++;
if (curr_depth != 0) {
continue;
}
const IR::U1 cond = node.data.if_node.cond;
const auto insert_barrier =
IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional<bool> {
if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 &&
inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) {
return true;
}
return std::nullopt;
});
if (insert_barrier) {
IR::Block* const merge = node.data.if_node.merge;
auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi);
IR::IREmitter ir{*merge, insert_point};
ir.Barrier();
}
}
}
} // namespace Shader::Optimization

View File

@ -0,0 +1,28 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/ir/patch.h"
namespace Shader::IR {
std::string NameOf(Patch patch) {
switch (patch) {
case Patch::TessellationLodLeft:
return "TessellationLodLeft";
case Patch::TessellationLodTop:
return "TessellationLodTop";
case Patch::TessellationLodRight:
return "TessellationLodRight";
case Patch::TessellationLodBottom:
return "TessellationLodBottom";
case Patch::TessellationLodInteriorU:
return "TessellationLodInteriorU";
case Patch::TessellationLodInteriorV:
return "TessellationLodInteriorV";
default:
const u32 index = u32(patch) - u32(Patch::Component0);
return fmt::format("Component{}", index);
}
}
} // namespace Shader::IR

View File

@ -0,0 +1,173 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <fmt/format.h>
#include "common/types.h"
namespace Shader::IR {
enum class Patch : u64 {
TessellationLodLeft,
TessellationLodTop,
TessellationLodRight,
TessellationLodBottom,
TessellationLodInteriorU,
TessellationLodInteriorV,
Component0,
Component1,
Component2,
Component3,
Component4,
Component5,
Component6,
Component7,
Component8,
Component9,
Component10,
Component11,
Component12,
Component13,
Component14,
Component15,
Component16,
Component17,
Component18,
Component19,
Component20,
Component21,
Component22,
Component23,
Component24,
Component25,
Component26,
Component27,
Component28,
Component29,
Component30,
Component31,
Component32,
Component33,
Component34,
Component35,
Component36,
Component37,
Component38,
Component39,
Component40,
Component41,
Component42,
Component43,
Component44,
Component45,
Component46,
Component47,
Component48,
Component49,
Component50,
Component51,
Component52,
Component53,
Component54,
Component55,
Component56,
Component57,
Component58,
Component59,
Component60,
Component61,
Component62,
Component63,
Component64,
Component65,
Component66,
Component67,
Component68,
Component69,
Component70,
Component71,
Component72,
Component73,
Component74,
Component75,
Component76,
Component77,
Component78,
Component79,
Component80,
Component81,
Component82,
Component83,
Component84,
Component85,
Component86,
Component87,
Component88,
Component89,
Component90,
Component91,
Component92,
Component93,
Component94,
Component95,
Component96,
Component97,
Component98,
Component99,
Component100,
Component101,
Component102,
Component103,
Component104,
Component105,
Component106,
Component107,
Component108,
Component109,
Component110,
Component111,
Component112,
Component113,
Component114,
Component115,
Component116,
Component117,
Component118,
Component119,
};
static_assert(static_cast<u64>(Patch::Component119) == 125);
constexpr bool IsGeneric(Patch patch) noexcept {
return patch >= Patch::Component0 && patch <= Patch::Component119;
}
constexpr Patch PatchFactor(u32 index) {
return static_cast<Patch>(index);
}
constexpr Patch PatchGeneric(u32 index) {
return static_cast<Patch>(static_cast<u32>(Patch::Component0) + index);
}
constexpr u32 GenericPatchIndex(Patch patch) {
return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) / 4;
}
constexpr u32 GenericPatchElement(Patch patch) {
return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) % 4;
}
[[nodiscard]] std::string NameOf(Patch patch);
} // namespace Shader::IR
template <>
struct fmt::formatter<Shader::IR::Patch> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(const Shader::IR::Patch patch, format_context& ctx) const {
return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(patch));
}
};

View File

@ -0,0 +1,127 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/value.h"
namespace Shader::Optimiation::PatternMatching {
// Attempt at pattern matching for Insts and Values
// Needs improvement, mostly a convenience
template <typename Derived>
struct MatchObject {
inline bool Match(IR::Value v) {
return static_cast<Derived*>(this)->Match(v);
}
};
struct MatchValue : MatchObject<MatchValue> {
MatchValue(IR::Value& return_val_) : return_val(return_val_) {}
inline bool Match(IR::Value v) {
return_val = v;
return true;
}
private:
IR::Value& return_val;
};
struct MatchIgnore : MatchObject<MatchIgnore> {
MatchIgnore() {}
inline bool Match(IR::Value v) {
return true;
}
};
struct MatchImm : MatchObject<MatchImm> {
MatchImm(IR::Value& v) : return_val(v) {}
inline bool Match(IR::Value v) {
if (!v.IsImmediate()) {
return false;
}
return_val = v;
return true;
}
private:
IR::Value& return_val;
};
struct MatchAttribute : MatchObject<MatchAttribute> {
MatchAttribute(IR::Attribute attribute_) : attribute(attribute_) {}
inline bool Match(IR::Value v) {
return v.Type() == IR::Type::Attribute && v.Attribute() == attribute;
}
private:
IR::Attribute attribute;
};
struct MatchU32 : MatchObject<MatchU32> {
MatchU32(u32 imm_) : imm(imm_) {}
inline bool Match(IR::Value v) {
return v.IsImmediate() && v.Type() == IR::Type::U32 && v.U32() == imm;
}
private:
u32 imm;
};
template <IR::Opcode opcode, typename... Args>
struct MatchInstObject : MatchObject<MatchInstObject<opcode>> {
static_assert(sizeof...(Args) == IR::NumArgsOf(opcode));
MatchInstObject(Args&&... args) : pattern(std::forward_as_tuple(args...)) {}
inline bool Match(IR::Value v) {
IR::Inst* inst = v.TryInstRecursive();
if (!inst || inst->GetOpcode() != opcode) {
return false;
}
bool matched = true;
[&]<std::size_t... Is>(std::index_sequence<Is...>) {
((matched = matched && std::get<Is>(pattern).Match(inst->Arg(Is))), ...);
}(std::make_index_sequence<sizeof...(Args)>{});
return matched;
}
private:
using MatchArgs = std::tuple<Args&...>;
MatchArgs pattern;
};
template <IR::Opcode opcode, typename... Args>
inline auto MakeInstPattern(Args&&... args) {
return MatchInstObject<opcode, Args...>(std::forward<Args>(args)...);
}
// Conveniences. TODO probably simpler way of doing this
#define M_READCONST(...) MakeInstPattern<IR::Opcode::ReadConst>(__VA_ARGS__)
#define M_GETUSERDATA(...) MakeInstPattern<IR::Opcode::GetUserData>(__VA_ARGS__)
#define M_BITFIELDUEXTRACT(...) MakeInstPattern<IR::Opcode::BitFieldUExtract>(__VA_ARGS__)
#define M_BITFIELDSEXTRACT(...) MakeInstPattern<IR::Opcode::BitFieldSExtract>(__VA_ARGS__)
#define M_GETATTRIBUTEU32(...) MakeInstPattern<IR::Opcode::GetAttributeU32>(__VA_ARGS__)
#define M_UMOD32(...) MakeInstPattern<IR::Opcode::UMod32>(__VA_ARGS__)
#define M_SHIFTRIGHTLOGICAL32(...) MakeInstPattern<IR::Opcode::ShiftRightLogical32>(__VA_ARGS__)
#define M_IADD32(...) MakeInstPattern<IR::Opcode::IAdd32>(__VA_ARGS__)
#define M_IMUL32(...) MakeInstPattern<IR::Opcode::IMul32>(__VA_ARGS__)
#define M_BITWISEAND32(...) MakeInstPattern<IR::Opcode::BitwiseAnd32>(__VA_ARGS__)
#define M_GETTESSGENERICATTRIBUTE(...) \
MakeInstPattern<IR::Opcode::GetTessGenericAttribute>(__VA_ARGS__)
#define M_SETTCSGENERICATTRIBUTE(...) \
MakeInstPattern<IR::Opcode::SetTcsGenericAttribute>(__VA_ARGS__)
#define M_COMPOSITECONSTRUCTU32X2(...) \
MakeInstPattern<IR::Opcode::CompositeConstructU32x2>(__VA_ARGS__)
#define M_COMPOSITECONSTRUCTU32X4(...) \
MakeInstPattern<IR::Opcode::CompositeConstructU32x4>(__VA_ARGS__)
} // namespace Shader::Optimiation::PatternMatching

View File

@ -40,7 +40,8 @@ union TextureInstInfo {
BitField<6, 2, u32> gather_comp;
BitField<8, 1, u32> has_derivatives;
BitField<9, 1, u32> is_array;
BitField<10, 1, u32> is_gather;
BitField<10, 1, u32> is_unnormalized;
BitField<11, 1, u32> is_gather;
};
union BufferInstInfo {
@ -48,7 +49,8 @@ union BufferInstInfo {
BitField<0, 1, u32> index_enable;
BitField<1, 1, u32> offset_enable;
BitField<2, 12, u32> inst_offset;
BitField<14, 1, u32> ring_access; // global + system coherency
BitField<14, 1, u32> system_coherent;
BitField<15, 1, u32> globally_coherent;
};
enum class ScalarReg : u32 {

View File

@ -15,7 +15,7 @@ enum class Type {
ScalarReg = 1 << 1,
VectorReg = 1 << 2,
Attribute = 1 << 3,
SystemValue = 1 << 4,
Patch = 1 << 4,
U1 = 1 << 5,
U8 = 1 << 6,
U16 = 1 << 7,

View File

@ -16,6 +16,8 @@ Value::Value(IR::VectorReg reg) noexcept : type{Type::VectorReg}, vreg{reg} {}
Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {}
Value::Value(IR::Patch patch) noexcept : type{Type::Patch}, patch{patch} {}
Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {}
Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {}

View File

@ -16,6 +16,7 @@
#include "shader_recompiler/exception.h"
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/opcodes.h"
#include "shader_recompiler/ir/patch.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/type.h"
@ -34,6 +35,7 @@ public:
explicit Value(IR::ScalarReg reg) noexcept;
explicit Value(IR::VectorReg reg) noexcept;
explicit Value(IR::Attribute value) noexcept;
explicit Value(IR::Patch patch) noexcept;
explicit Value(bool value) noexcept;
explicit Value(u8 value) noexcept;
explicit Value(u16 value) noexcept;
@ -56,6 +58,7 @@ public:
[[nodiscard]] IR::ScalarReg ScalarReg() const;
[[nodiscard]] IR::VectorReg VectorReg() const;
[[nodiscard]] IR::Attribute Attribute() const;
[[nodiscard]] IR::Patch Patch() const;
[[nodiscard]] bool U1() const;
[[nodiscard]] u8 U8() const;
[[nodiscard]] u16 U16() const;
@ -75,6 +78,7 @@ private:
IR::ScalarReg sreg;
IR::VectorReg vreg;
IR::Attribute attribute;
IR::Patch patch;
bool imm_u1;
u8 imm_u8;
u16 imm_u16;
@ -330,6 +334,11 @@ inline IR::Attribute Value::Attribute() const {
return attribute;
}
inline IR::Patch Value::Patch() const {
DEBUG_ASSERT(type == Type::Patch);
return patch;
}
inline bool Value::U1() const {
if (IsIdentity()) {
return inst->Arg(0).U1();

View File

@ -23,9 +23,11 @@ struct Profile {
bool support_fp32_denorm_flush{};
bool support_explicit_workgroup_layout{};
bool support_legacy_vertex_attributes{};
bool supports_image_load_store_lod{};
bool has_broken_spirv_clamp{};
bool lower_left_origin_mode{};
bool needs_manual_interpolation{};
bool needs_lds_barriers{};
u64 min_ssbo_alignment{};
};

View File

@ -1,6 +1,9 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/config.h"
#include "common/io_file.h"
#include "common/path_util.h"
#include "shader_recompiler/frontend/control_flow_graph.h"
#include "shader_recompiler/frontend/decode.h"
#include "shader_recompiler/frontend/structured_control_flow.h"
@ -29,7 +32,7 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
}
IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info,
const RuntimeInfo& runtime_info, const Profile& profile) {
RuntimeInfo& runtime_info, const Profile& profile) {
// Ensure first instruction is expected.
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
if (code[0] != token_mov_vcchi) {
@ -60,17 +63,35 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
// Run optimization passes
const auto stage = program.info.stage;
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
Shader::Optimization::IdentityRemovalPass(program.blocks);
if (info.l_stage == LogicalStage::TessellationControl) {
// Tess passes require previous const prop passes for now (for simplicity). TODO allow
// fine grained folding or opportunistic folding we set an operand to an immediate
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::TessellationPreprocess(program, runtime_info);
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::HullShaderTransform(program, runtime_info);
} else if (info.l_stage == LogicalStage::TessellationEval) {
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::TessellationPreprocess(program, runtime_info);
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::DomainShaderTransform(program, runtime_info);
}
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
if (program.info.stage != Stage::Compute) {
Shader::Optimization::RingAccessElimination(program, runtime_info, stage);
if (stage != Stage::Compute) {
Shader::Optimization::LowerSharedMemToRegisters(program);
}
Shader::Optimization::RingAccessElimination(program, runtime_info, program.info.stage);
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::FlattenExtendedUserdataPass(program);
Shader::Optimization::ResourceTrackingPass(program);
Shader::Optimization::IdentityRemovalPass(program.blocks);
Shader::Optimization::DeadCodeEliminationPass(program);
Shader::Optimization::CollectShaderInfoPass(program);
Shader::Optimization::SharedMemoryBarrierPass(program, profile);
return program;
}

View File

@ -28,6 +28,6 @@ struct Pools {
};
[[nodiscard]] IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info,
const RuntimeInfo& runtime_info, const Profile& profile);
RuntimeInfo& runtime_info, const Profile& profile);
} // namespace Shader

View File

@ -7,6 +7,7 @@
#include <span>
#include <boost/container/static_vector.hpp>
#include "common/types.h"
#include "shader_recompiler/frontend/tessellation.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/types.h"
@ -21,12 +22,31 @@ enum class Stage : u32 {
Local,
Compute,
};
constexpr u32 MaxStageTypes = 7;
// Vertex intentionally comes after TCS/TES due to order of compilation
enum class LogicalStage : u32 {
Fragment,
TessellationControl,
TessellationEval,
Vertex,
Geometry,
Compute,
NumLogicalStages
};
constexpr u32 MaxStageTypes = static_cast<u32>(LogicalStage::NumLogicalStages);
[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept {
return static_cast<Stage>(index);
}
struct LocalRuntimeInfo {
u32 ls_stride;
bool links_with_tcs;
auto operator<=>(const LocalRuntimeInfo&) const noexcept = default;
};
struct ExportRuntimeInfo {
u32 vertex_data_size;
@ -64,9 +84,57 @@ struct VertexRuntimeInfo {
u32 num_outputs;
std::array<VsOutputMap, 3> outputs;
bool emulate_depth_negative_one_to_one{};
// Domain
AmdGpu::TessellationType tess_type;
AmdGpu::TessellationTopology tess_topology;
AmdGpu::TessellationPartitioning tess_partitioning;
u32 hs_output_cp_stride{};
bool operator==(const VertexRuntimeInfo& other) const noexcept {
return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one;
return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one &&
tess_type == other.tess_type && tess_topology == other.tess_topology &&
tess_partitioning == other.tess_partitioning &&
hs_output_cp_stride == other.hs_output_cp_stride;
}
void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
hs_output_cp_stride = tess_constants.hs_cp_stride;
}
};
struct HullRuntimeInfo {
// from registers
u32 num_input_control_points;
u32 num_threads;
AmdGpu::TessellationType tess_type;
// from tess constants buffer
u32 ls_stride;
u32 hs_output_cp_stride;
u32 hs_output_base;
auto operator<=>(const HullRuntimeInfo&) const noexcept = default;
// It might be possible for a non-passthrough TCS to have these conditions, in some
// dumb situation.
// In that case, it should be fine to assume passthrough and declare some extra
// output control points and attributes that shouldnt be read by the TES anyways
bool IsPassthrough() const {
return hs_output_base == 0 && ls_stride == hs_output_cp_stride && num_threads == 1;
};
// regs.ls_hs_config.hs_output_control_points contains the number of threads, which
// isn't exactly the number of output control points.
// For passthrough shaders, the register field is set to 1, so use the number of
// input control points
u32 NumOutputControlPoints() const {
return IsPassthrough() ? num_input_control_points : num_threads;
}
void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
ls_stride = tess_constants.ls_stride;
hs_output_cp_stride = tess_constants.hs_cp_stride;
hs_output_base = tess_constants.hs_output_base;
}
};
@ -150,8 +218,10 @@ struct RuntimeInfo {
AmdGpu::FpDenormMode fp_denorm_mode32;
AmdGpu::FpRoundMode fp_round_mode32;
union {
LocalRuntimeInfo ls_info;
ExportRuntimeInfo es_info;
VertexRuntimeInfo vs_info;
HullRuntimeInfo hs_info;
GeometryRuntimeInfo gs_info;
FragmentRuntimeInfo fs_info;
ComputeRuntimeInfo cs_info;
@ -174,6 +244,10 @@ struct RuntimeInfo {
return es_info == other.es_info;
case Stage::Geometry:
return gs_info == other.gs_info;
case Stage::Hull:
return hs_info == other.hs_info;
case Stage::Local:
return ls_info == other.ls_info;
default:
return true;
}

View File

@ -31,6 +31,7 @@ struct BufferSpecialization {
struct TextureBufferSpecialization {
bool is_integer = false;
u32 dst_select = 0;
auto operator<=>(const TextureBufferSpecialization&) const = default;
};
@ -38,8 +39,12 @@ struct TextureBufferSpecialization {
struct ImageSpecialization {
AmdGpu::ImageType type = AmdGpu::ImageType::Color2D;
bool is_integer = false;
u32 dst_select = 0;
auto operator<=>(const ImageSpecialization&) const = default;
bool operator==(const ImageSpecialization& other) const {
return type == other.type && is_integer == other.is_integer &&
(dst_select != 0 ? dst_select == other.dst_select : true);
}
};
struct FMaskSpecialization {
@ -49,6 +54,12 @@ struct FMaskSpecialization {
auto operator<=>(const FMaskSpecialization&) const = default;
};
struct SamplerSpecialization {
bool force_unnormalized = false;
auto operator<=>(const SamplerSpecialization&) const = default;
};
/**
* Alongside runtime information, this structure also checks bound resources
* for compatibility. Can be used as a key for storing shader permutations.
@ -67,6 +78,7 @@ struct StageSpecialization {
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
boost::container::small_vector<ImageSpecialization, 16> images;
boost::container::small_vector<FMaskSpecialization, 8> fmasks;
boost::container::small_vector<SamplerSpecialization, 16> samplers;
Backend::Bindings start{};
explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_,
@ -96,17 +108,37 @@ struct StageSpecialization {
ForEachSharp(binding, tex_buffers, info->texture_buffers,
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
spec.dst_select = sharp.DstSelect();
});
ForEachSharp(binding, images, info->images,
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
spec.type = sharp.GetBoundType();
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
if (desc.is_storage) {
spec.dst_select = sharp.DstSelect();
}
});
ForEachSharp(binding, fmasks, info->fmasks,
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
spec.width = sharp.width;
spec.height = sharp.height;
});
ForEachSharp(samplers, info->samplers,
[](auto& spec, const auto& desc, AmdGpu::Sampler sharp) {
spec.force_unnormalized = sharp.force_unnormalized;
});
// Initialize runtime_info fields that rely on analysis in tessellation passes
if (info->l_stage == LogicalStage::TessellationControl ||
info->l_stage == LogicalStage::TessellationEval) {
Shader::TessellationDataConstantBuffer tess_constants;
info->ReadTessConstantBuffer(tess_constants);
if (info->l_stage == LogicalStage::TessellationControl) {
runtime_info.hs_info.InitFromTessConstants(tess_constants);
} else {
runtime_info.vs_info.InitFromTessConstants(tess_constants);
}
}
}
void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) {
@ -175,6 +207,11 @@ struct StageSpecialization {
return false;
}
}
for (u32 i = 0; i < samplers.size(); i++) {
if (samplers[i] != other.samplers[i]) {
return false;
}
}
return true;
}
};

View File

@ -1,6 +1,8 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <boost/preprocessor/stringize.hpp>
#include "common/assert.h"
#include "common/config.h"
#include "common/debug.h"
@ -18,7 +20,32 @@ namespace AmdGpu {
static const char* dcb_task_name{"DCB_TASK"};
static const char* ccb_task_name{"CCB_TASK"};
static const char* acb_task_name{"ACB_TASK"};
#define MAX_NAMES 56
static_assert(Liverpool::NumComputeRings <= MAX_NAMES);
#define NAME_NUM(z, n, name) BOOST_PP_STRINGIZE(name) BOOST_PP_STRINGIZE(n),
#define NAME_ARRAY(name, num) {BOOST_PP_REPEAT(num, NAME_NUM, name)}
static const char* acb_task_name[] = NAME_ARRAY(ACB_TASK, MAX_NAMES);
#define YIELD(name) \
FIBER_EXIT; \
co_yield {}; \
FIBER_ENTER(name);
#define YIELD_CE() YIELD(ccb_task_name)
#define YIELD_GFX() YIELD(dcb_task_name)
#define YIELD_ASC(id) YIELD(acb_task_name[id])
#define RESUME(task, name) \
FIBER_EXIT; \
task.handle.resume(); \
FIBER_ENTER(name);
#define RESUME_CE(task) RESUME(task, ccb_task_name)
#define RESUME_GFX(task) RESUME(task, dcb_task_name)
#define RESUME_ASC(task, id) RESUME(task, acb_task_name[id])
std::array<u8, 48_KB> Liverpool::ConstantEngine::constants_heap;
@ -60,7 +87,7 @@ void Liverpool::Process(std::stop_token stoken) {
VideoCore::StartCapture();
int qid = -1;
curr_qid = -1;
while (num_submits || num_commands) {
@ -79,9 +106,9 @@ void Liverpool::Process(std::stop_token stoken) {
--num_commands;
}
qid = (qid + 1) % NumTotalQueues;
curr_qid = (curr_qid + 1) % num_mapped_queues;
auto& queue = mapped_queues[qid];
auto& queue = mapped_queues[curr_qid];
Task::Handle task{};
{
@ -119,7 +146,7 @@ void Liverpool::Process(std::stop_token stoken) {
}
Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
TracyFiberEnter(ccb_task_name);
FIBER_ENTER(ccb_task_name);
while (!ccb.empty()) {
const auto* header = reinterpret_cast<const PM4Header*>(ccb.data());
@ -155,9 +182,7 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
case PM4ItOpcode::WaitOnDeCounterDiff: {
const auto diff = it_body[0];
while ((cblock.de_count - cblock.ce_count) >= diff) {
TracyFiberLeave;
co_yield {};
TracyFiberEnter(ccb_task_name);
YIELD_CE();
}
break;
}
@ -165,13 +190,12 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
auto task =
ProcessCeUpdate({indirect_buffer->Address<const u32>(), indirect_buffer->ib_size});
while (!task.handle.done()) {
task.handle.resume();
RESUME_CE(task);
TracyFiberLeave;
co_yield {};
TracyFiberEnter(ccb_task_name);
};
while (!task.handle.done()) {
YIELD_CE();
RESUME_CE(task);
}
break;
}
default:
@ -182,11 +206,11 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
ccb = NextPacket(ccb, header->type3.NumWords() + 1);
}
TracyFiberLeave;
FIBER_EXIT;
}
Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb) {
TracyFiberEnter(dcb_task_name);
FIBER_ENTER(dcb_task_name);
cblock.Reset();
@ -197,9 +221,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
if (!ccb.empty()) {
// In case of CCB provided kick off CE asap to have the constant heap ready to use
ce_task = ProcessCeUpdate(ccb);
TracyFiberLeave;
ce_task.handle.resume();
TracyFiberEnter(dcb_task_name);
RESUME_GFX(ce_task);
}
const auto base_addr = reinterpret_cast<uintptr_t>(dcb.data());
@ -353,8 +375,18 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
}
case PM4ItOpcode::SetShReg: {
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
std::memcpy(&regs.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
(count - 1) * sizeof(u32));
const auto set_size = (count - 1) * sizeof(u32);
if (set_data->reg_offset >= 0x200 &&
set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) {
ASSERT(set_size <= sizeof(ComputeProgram));
auto* addr = reinterpret_cast<u32*>(&mapped_queues[GfxQueueId].cs_state) +
(set_data->reg_offset - 0x200);
std::memcpy(addr, header + 2, set_size);
} else {
std::memcpy(&regs.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
set_size);
}
break;
}
case PM4ItOpcode::SetUconfigReg: {
@ -474,15 +506,16 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
}
case PM4ItOpcode::DispatchDirect: {
const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header);
regs.cs_program.dim_x = dispatch_direct->dim_x;
regs.cs_program.dim_y = dispatch_direct->dim_y;
regs.cs_program.dim_z = dispatch_direct->dim_z;
regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
auto& cs_program = GetCsRegs();
cs_program.dim_x = dispatch_direct->dim_x;
cs_program.dim_y = dispatch_direct->dim_y;
cs_program.dim_z = dispatch_direct->dim_z;
cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs,
true);
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
cs_program);
}
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:Dispatch", cmd_address));
rasterizer->DispatchDirect();
@ -493,14 +526,15 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
case PM4ItOpcode::DispatchIndirect: {
const auto* dispatch_indirect =
reinterpret_cast<const PM4CmdDispatchIndirect*>(header);
auto& cs_program = GetCsRegs();
const auto offset = dispatch_indirect->data_offset;
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions);
if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs,
true);
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
cs_program);
}
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(
fmt::format("dcb:{}:DispatchIndirect", cmd_address));
@ -613,11 +647,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
case PM4ItOpcode::Rewind: {
const PM4CmdRewind* rewind = reinterpret_cast<const PM4CmdRewind*>(header);
while (!rewind->Valid()) {
mapped_queues[GfxQueueId].cs_state = regs.cs_program;
TracyFiberLeave;
co_yield {};
TracyFiberEnter(dcb_task_name);
regs.cs_program = mapped_queues[GfxQueueId].cs_state;
YIELD_GFX();
}
break;
}
@ -629,15 +659,12 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
// there are no other submits to yield to we can sleep the thread
// instead and allow other tasks to run.
const u64* wait_addr = wait_reg_mem->Address<u64*>();
if (vo_port->IsVoLabel(wait_addr) && num_submits == 1) {
if (vo_port->IsVoLabel(wait_addr) &&
num_submits == mapped_queues[GfxQueueId].submits.size()) {
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); });
}
while (!wait_reg_mem->Test()) {
mapped_queues[GfxQueueId].cs_state = regs.cs_program;
TracyFiberLeave;
co_yield {};
TracyFiberEnter(dcb_task_name);
regs.cs_program = mapped_queues[GfxQueueId].cs_state;
YIELD_GFX();
}
break;
}
@ -645,13 +672,12 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
auto task = ProcessGraphics(
{indirect_buffer->Address<const u32>(), indirect_buffer->ib_size}, {});
while (!task.handle.done()) {
task.handle.resume();
RESUME_GFX(task);
TracyFiberLeave;
co_yield {};
TracyFiberEnter(dcb_task_name);
};
while (!task.handle.done()) {
YIELD_GFX();
RESUME_GFX(task);
}
break;
}
case PM4ItOpcode::IncrementDeCounter: {
@ -660,9 +686,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
}
case PM4ItOpcode::WaitOnCeCounter: {
while (cblock.ce_count <= cblock.de_count) {
TracyFiberLeave;
ce_task.handle.resume();
TracyFiberEnter(dcb_task_name);
RESUME_GFX(ce_task);
}
break;
}
@ -686,11 +710,13 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
ce_task.handle.destroy();
}
TracyFiberLeave;
FIBER_EXIT;
}
Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
TracyFiberEnter(acb_task_name);
template <bool is_indirect>
Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
FIBER_ENTER(acb_task_name[vqid]);
const auto& queue = asc_queues[{vqid}];
auto base_addr = reinterpret_cast<uintptr_t>(acb.data());
while (!acb.empty()) {
@ -711,15 +737,14 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
}
case PM4ItOpcode::IndirectBuffer: {
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
auto task = ProcessCompute(
auto task = ProcessCompute<true>(
{indirect_buffer->Address<const u32>(), indirect_buffer->ib_size}, vqid);
while (!task.handle.done()) {
task.handle.resume();
RESUME_ASC(task, vqid);
TracyFiberLeave;
co_yield {};
TracyFiberEnter(acb_task_name);
};
while (!task.handle.done()) {
YIELD_ASC(vqid);
RESUME_ASC(task, vqid);
}
break;
}
case PM4ItOpcode::DmaData: {
@ -757,30 +782,38 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
case PM4ItOpcode::Rewind: {
const PM4CmdRewind* rewind = reinterpret_cast<const PM4CmdRewind*>(header);
while (!rewind->Valid()) {
mapped_queues[vqid].cs_state = regs.cs_program;
TracyFiberLeave;
co_yield {};
TracyFiberEnter(acb_task_name);
regs.cs_program = mapped_queues[vqid].cs_state;
YIELD_ASC(vqid);
}
break;
}
case PM4ItOpcode::SetShReg: {
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
std::memcpy(&regs.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
(count - 1) * sizeof(u32));
const auto set_size = (count - 1) * sizeof(u32);
if (set_data->reg_offset >= 0x200 &&
set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) {
ASSERT(set_size <= sizeof(ComputeProgram));
auto* addr = reinterpret_cast<u32*>(&mapped_queues[vqid + 1].cs_state) +
(set_data->reg_offset - 0x200);
std::memcpy(addr, header + 2, set_size);
} else {
std::memcpy(&regs.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
set_size);
}
break;
}
case PM4ItOpcode::DispatchDirect: {
const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header);
regs.cs_program.dim_x = dispatch_direct->dim_x;
regs.cs_program.dim_y = dispatch_direct->dim_y;
regs.cs_program.dim_z = dispatch_direct->dim_z;
regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
auto& cs_program = GetCsRegs();
cs_program.dim_x = dispatch_direct->dim_x;
cs_program.dim_y = dispatch_direct->dim_y;
cs_program.dim_z = dispatch_direct->dim_z;
cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs, true);
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
cs_program);
}
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address));
rasterizer->DispatchDirect();
@ -803,17 +836,13 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
while (!wait_reg_mem->Test()) {
mapped_queues[vqid].cs_state = regs.cs_program;
TracyFiberLeave;
co_yield {};
TracyFiberEnter(acb_task_name);
regs.cs_program = mapped_queues[vqid].cs_state;
YIELD_ASC(vqid);
}
break;
}
case PM4ItOpcode::ReleaseMem: {
const auto* release_mem = reinterpret_cast<const PM4CmdReleaseMem*>(header);
release_mem->SignalFence(Platform::InterruptId::Compute0RelMem); // <---
release_mem->SignalFence(static_cast<Platform::InterruptId>(queue.pipe_id));
break;
}
default:
@ -821,10 +850,16 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
static_cast<u32>(opcode), count);
}
acb = NextPacket(acb, header->type3.NumWords() + 1);
const auto packet_size_dw = header->type3.NumWords() + 1;
acb = NextPacket(acb, packet_size_dw);
if constexpr (!is_indirect) {
*queue.read_addr += packet_size_dw;
*queue.read_addr %= queue.ring_size_dw;
}
}
TracyFiberLeave;
FIBER_EXIT;
}
std::pair<std::span<const u32>, std::span<const u32>> Liverpool::CopyCmdBuffers(
@ -881,10 +916,11 @@ void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb) {
submit_cv.notify_one();
}
void Liverpool::SubmitAsc(u32 vqid, std::span<const u32> acb) {
ASSERT_MSG(vqid >= 0 && vqid < NumTotalQueues, "Invalid virtual ASC queue index");
auto& queue = mapped_queues[vqid];
void Liverpool::SubmitAsc(u32 gnm_vqid, std::span<const u32> acb) {
ASSERT_MSG(gnm_vqid > 0 && gnm_vqid < NumTotalQueues, "Invalid virtual ASC queue index");
auto& queue = mapped_queues[gnm_vqid];
const auto vqid = gnm_vqid - 1;
const auto& task = ProcessCompute(acb, vqid);
{
std::scoped_lock lock{queue.m_access};
@ -892,6 +928,7 @@ void Liverpool::SubmitAsc(u32 vqid, std::span<const u32> acb) {
}
std::scoped_lock lk{submit_mutex};
num_mapped_queues = std::max(num_mapped_queues, gnm_vqid + 1);
++num_submits;
submit_cv.notify_one();
}

View File

@ -16,6 +16,7 @@
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/polyfill_thread.h"
#include "common/slot_vector.h"
#include "common/types.h"
#include "common/unique_function.h"
#include "shader_recompiler/params.h"
@ -45,7 +46,8 @@ struct Liverpool {
static constexpr u32 NumGfxRings = 1u; // actually 2, but HP is reserved by system software
static constexpr u32 NumComputePipes = 7u; // actually 8, but #7 is reserved by system software
static constexpr u32 NumQueuesPerPipe = 8u;
static constexpr u32 NumTotalQueues = NumGfxRings + (NumComputePipes * NumQueuesPerPipe);
static constexpr u32 NumComputeRings = NumComputePipes * NumQueuesPerPipe;
static constexpr u32 NumTotalQueues = NumGfxRings + NumComputeRings;
static_assert(NumTotalQueues < 64u); // need to fit into u64 bitmap for ffs
static constexpr u32 NumColorBuffers = 8;
@ -143,6 +145,13 @@ struct Liverpool {
}
};
struct HsTessFactorClamp {
// I've only seen min=0.0, max=1.0 so far.
// TODO why is max set to 1.0? Makes no sense
float hs_max_tess;
float hs_min_tess;
};
struct ComputeProgram {
u32 dispatch_initiator;
u32 dim_x;
@ -431,6 +440,10 @@ struct Liverpool {
return u64(z_read_base) << 8;
}
u64 StencilAddress() const {
return u64(stencil_read_base) << 8;
}
u32 NumSamples() const {
return 1u << z_info.num_samples; // spec doesn't say it is a log2
}
@ -952,6 +965,7 @@ struct Liverpool {
enum VgtStages : u32 {
Vs = 0u, // always enabled
EsGs = 0xB0u,
LsHs = 0x45u,
};
VgtStages raw;
@ -959,7 +973,8 @@ struct Liverpool {
BitField<2, 1, u32> hs_en;
BitField<3, 2, u32> es_en;
BitField<5, 1, u32> gs_en;
BitField<6, 1, u32> vs_en;
BitField<6, 2, u32> vs_en;
BitField<8, 1, u32> dynamic_hs;
bool IsStageEnabled(u32 stage) const {
switch (stage) {
@ -1055,6 +1070,28 @@ struct Liverpool {
};
};
union LsHsConfig {
u32 raw;
BitField<0, 8, u32> num_patches;
BitField<8, 6, u32> hs_input_control_points;
BitField<14, 6, u32> hs_output_control_points;
};
union TessellationConfig {
u32 raw;
BitField<0, 2, TessellationType> type;
BitField<2, 3, TessellationPartitioning> partitioning;
BitField<5, 3, TessellationTopology> topology;
};
union TessFactorMemoryBase {
u32 base;
u64 MemoryBase() const {
return static_cast<u64>(base) << 8;
}
};
union Eqaa {
u32 raw;
BitField<0, 1, u32> max_anchor_samples;
@ -1105,10 +1142,10 @@ struct Liverpool {
ShaderProgram es_program;
INSERT_PADDING_WORDS(0x2C);
ShaderProgram hs_program;
INSERT_PADDING_WORDS(0x2C);
INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20);
ShaderProgram ls_program;
INSERT_PADDING_WORDS(0xA4);
ComputeProgram cs_program;
ComputeProgram cs_program; // shadowed by `cs_state` in `mapped_queues`
INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5);
DepthRenderControl depth_render_control;
INSERT_PADDING_WORDS(1);
@ -1172,7 +1209,9 @@ struct Liverpool {
PolygonControl polygon_control;
ViewportControl viewport_control;
VsOutputControl vs_output_control;
INSERT_PADDING_WORDS(0xA290 - 0xA207 - 1);
INSERT_PADDING_WORDS(0xA287 - 0xA207 - 1);
HsTessFactorClamp hs_clamp;
INSERT_PADDING_WORDS(0xA290 - 0xA287 - 2);
GsMode vgt_gs_mode;
INSERT_PADDING_WORDS(1);
ModeControl mode_control;
@ -1196,9 +1235,10 @@ struct Liverpool {
BitField<0, 11, u32> vgt_gs_max_vert_out;
INSERT_PADDING_WORDS(0xA2D5 - 0xA2CE - 1);
ShaderStageEnable stage_enable;
INSERT_PADDING_WORDS(1);
LsHsConfig ls_hs_config;
u32 vgt_gs_vert_itemsize[4];
INSERT_PADDING_WORDS(4);
TessellationConfig tess_config;
INSERT_PADDING_WORDS(3);
PolygonOffset poly_offset;
GsInstances vgt_gs_instance_cnt;
StreamOutConfig vgt_strmout_config;
@ -1212,6 +1252,8 @@ struct Liverpool {
INSERT_PADDING_WORDS(0xC24C - 0xC243);
u32 num_indices;
VgtNumInstances num_instances;
INSERT_PADDING_WORDS(0xC250 - 0xC24D - 1);
TessFactorMemoryBase vgt_tf_memory_base;
};
std::array<u32, NumRegs> reg_array{};
@ -1258,7 +1300,7 @@ public:
~Liverpool();
void SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb);
void SubmitAsc(u32 vqid, std::span<const u32> acb);
void SubmitAsc(u32 gnm_vqid, std::span<const u32> acb);
void SubmitDone() noexcept {
std::scoped_lock lk{submit_mutex};
@ -1301,6 +1343,18 @@ public:
gfx_queue.dcb_buffer.reserve(GfxReservedSize);
}
inline ComputeProgram& GetCsRegs() {
return mapped_queues[curr_qid].cs_state;
}
struct AscQueueInfo {
VAddr map_addr;
u32* read_addr;
u32 ring_size_dw;
u32 pipe_id;
};
Common::SlotVector<AscQueueInfo> asc_queues{};
private:
struct Task {
struct promise_type {
@ -1338,7 +1392,8 @@ private:
std::span<const u32> ccb);
Task ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb);
Task ProcessCeUpdate(std::span<const u32> ccb);
Task ProcessCompute(std::span<const u32> acb, int vqid);
template <bool is_indirect = false>
Task ProcessCompute(std::span<const u32> acb, u32 vqid);
void Process(std::stop_token stoken);
@ -1353,6 +1408,7 @@ private:
VAddr indirect_args_addr{};
};
std::array<GpuQueue, NumTotalQueues> mapped_queues{};
u32 num_mapped_queues{1u}; // GFX is always available
struct ConstantEngine {
void Reset() {
@ -1381,6 +1437,7 @@ private:
std::mutex submit_mutex;
std::condition_variable_any submit_cv;
std::queue<Common::UniqueFunction<void>> command_queue{};
int curr_qid{-1};
};
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
@ -1427,6 +1484,7 @@ static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202);
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
static_assert(GFX6_3D_REG_INDEX(hs_clamp) == 0xA287);
static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290);
static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292);
static_assert(GFX6_3D_REG_INDEX(vgt_gs_out_prim_type) == 0xA29B);
@ -1441,6 +1499,7 @@ static_assert(GFX6_3D_REG_INDEX(vgt_gsvs_ring_itemsize) == 0xA2AC);
static_assert(GFX6_3D_REG_INDEX(vgt_gs_max_vert_out) == 0xA2CE);
static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5);
static_assert(GFX6_3D_REG_INDEX(vgt_gs_vert_itemsize[0]) == 0xA2D7);
static_assert(GFX6_3D_REG_INDEX(tess_config) == 0xA2DB);
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
static_assert(GFX6_3D_REG_INDEX(vgt_gs_instance_cnt) == 0xA2E4);
static_assert(GFX6_3D_REG_INDEX(vgt_strmout_config) == 0xA2E5);
@ -1452,6 +1511,7 @@ static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A);
static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381);
static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242);
static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D);
static_assert(GFX6_3D_REG_INDEX(vgt_tf_memory_base) == 0xc250);
#undef GFX6_3D_REG_INDEX

View File

@ -52,6 +52,10 @@ struct Buffer {
return std::memcmp(this, &other, sizeof(Buffer)) == 0;
}
u32 DstSelect() const {
return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9);
}
CompSwizzle GetSwizzle(u32 comp) const noexcept {
const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w};
return static_cast<CompSwizzle>(select[comp]);
@ -204,6 +208,11 @@ struct Image {
return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9);
}
CompSwizzle GetSwizzle(u32 comp) const noexcept {
const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w};
return static_cast<CompSwizzle>(select[comp]);
}
static char SelectComp(u32 sel) {
switch (sel) {
case 0:

View File

@ -3,6 +3,8 @@
#pragma once
#include <string_view>
#include <fmt/format.h>
#include "common/types.h"
namespace AmdGpu {
@ -21,6 +23,69 @@ enum class FpDenormMode : u32 {
InOutAllow = 3,
};
enum class TessellationType : u32 {
Isoline = 0,
Triangle = 1,
Quad = 2,
};
constexpr std::string_view NameOf(TessellationType type) {
switch (type) {
case TessellationType::Isoline:
return "Isoline";
case TessellationType::Triangle:
return "Triangle";
case TessellationType::Quad:
return "Quad";
default:
return "Unknown";
}
}
enum class TessellationPartitioning : u32 {
Integer = 0,
Pow2 = 1,
FracOdd = 2,
FracEven = 3,
};
constexpr std::string_view NameOf(TessellationPartitioning partitioning) {
switch (partitioning) {
case TessellationPartitioning::Integer:
return "Integer";
case TessellationPartitioning::Pow2:
return "Pow2";
case TessellationPartitioning::FracOdd:
return "FracOdd";
case TessellationPartitioning::FracEven:
return "FracEven";
default:
return "Unknown";
}
}
enum class TessellationTopology : u32 {
Point = 0,
Line = 1,
TriangleCw = 2,
TriangleCcw = 3,
};
constexpr std::string_view NameOf(TessellationTopology topology) {
switch (topology) {
case TessellationTopology::Point:
return "Point";
case TessellationTopology::Line:
return "Line";
case TessellationTopology::TriangleCw:
return "TriangleCw";
case TessellationTopology::TriangleCcw:
return "TriangleCcw";
default:
return "Unknown";
}
}
// See `VGT_PRIMITIVE_TYPE` description in [Radeon Sea Islands 3D/Compute Register Reference Guide]
enum class PrimitiveType : u32 {
None = 0,
@ -118,3 +183,33 @@ enum class NumberFormat : u32 {
};
} // namespace AmdGpu
template <>
struct fmt::formatter<AmdGpu::TessellationType> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(AmdGpu::TessellationType type, format_context& ctx) const {
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
}
};
template <>
struct fmt::formatter<AmdGpu::TessellationPartitioning> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(AmdGpu::TessellationPartitioning type, format_context& ctx) const {
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
}
};
template <>
struct fmt::formatter<AmdGpu::TessellationTopology> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(AmdGpu::TessellationTopology type, format_context& ctx) const {
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
}
};

View File

@ -65,6 +65,33 @@ vk::CompareOp CompareOp(Liverpool::CompareFunc func) {
}
}
bool IsPrimitiveCulled(AmdGpu::PrimitiveType type) {
switch (type) {
case AmdGpu::PrimitiveType::TriangleList:
case AmdGpu::PrimitiveType::TriangleFan:
case AmdGpu::PrimitiveType::TriangleStrip:
case AmdGpu::PrimitiveType::PatchPrimitive:
case AmdGpu::PrimitiveType::AdjTriangleList:
case AmdGpu::PrimitiveType::AdjTriangleStrip:
case AmdGpu::PrimitiveType::QuadList:
case AmdGpu::PrimitiveType::QuadStrip:
case AmdGpu::PrimitiveType::Polygon:
return true;
case AmdGpu::PrimitiveType::None:
case AmdGpu::PrimitiveType::PointList:
case AmdGpu::PrimitiveType::LineList:
case AmdGpu::PrimitiveType::LineStrip:
case AmdGpu::PrimitiveType::AdjLineList:
case AmdGpu::PrimitiveType::AdjLineStrip:
case AmdGpu::PrimitiveType::RectList: // Screen-aligned rectangles that are not culled
case AmdGpu::PrimitiveType::LineLoop:
return false;
default:
UNREACHABLE();
return true;
}
}
vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type) {
switch (type) {
case AmdGpu::PrimitiveType::PointList:
@ -672,15 +699,6 @@ vk::Format AdjustColorBufferFormat(vk::Format base_format,
default:
break;
}
} else if (comp_swap_reverse) {
switch (base_format) {
case vk::Format::eR8G8B8A8Unorm:
return vk::Format::eA8B8G8R8UnormPack32;
case vk::Format::eR8G8B8A8Srgb:
return vk::Format::eA8B8G8R8SrgbPack32;
default:
break;
}
}
return base_format;
}

View File

@ -18,6 +18,8 @@ vk::StencilOp StencilOp(Liverpool::StencilFunc op);
vk::CompareOp CompareOp(Liverpool::CompareFunc func);
bool IsPrimitiveCulled(AmdGpu::PrimitiveType type);
vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type);
vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode);

View File

@ -3,10 +3,6 @@
#pragma once
#if defined(__APPLE__) && !USE_SYSTEM_VULKAN_LOADER
#define VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL 0
#endif
// Include vulkan-hpp header
#define VK_ENABLE_BETA_EXTENSIONS
#define VK_NO_PROTOTYPES

View File

@ -16,7 +16,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
ComputePipelineKey compute_key_, const Shader::Info& info_,
vk::ShaderModule module)
: Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache, true}, compute_key{compute_key_} {
auto& info = stages[int(Shader::Stage::Compute)];
auto& info = stages[int(Shader::LogicalStage::Compute)];
info = &info_;
const vk::PipelineShaderStageCreateInfo shader_ci = {

View File

@ -8,6 +8,7 @@
#include "common/assert.h"
#include "common/scope_exit.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
@ -52,7 +53,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings;
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes;
if (fetch_shader && !instance.IsVertexInputDynamicState()) {
const auto& vs_info = GetStage(Shader::Stage::Vertex);
const auto& vs_info = GetStage(Shader::LogicalStage::Vertex);
for (const auto& attrib : fetch_shader->attributes) {
if (attrib.UsesStepRates()) {
// Skip attribute binding as the data will be pulled by shader
@ -106,11 +107,17 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
key.primitive_restart_index == 0xFFFFFFFF,
"Primitive restart index other than -1 is not supported yet");
const vk::PipelineTessellationStateCreateInfo tessellation_state = {
.patchControlPoints = key.patch_control_points,
};
const vk::PipelineRasterizationStateCreateInfo raster_state = {
.depthClampEnable = false,
.rasterizerDiscardEnable = false,
.polygonMode = LiverpoolToVK::PolygonMode(key.polygon_mode),
.cullMode = LiverpoolToVK::CullMode(key.cull_mode),
.cullMode = LiverpoolToVK::IsPrimitiveCulled(key.prim_type)
? LiverpoolToVK::CullMode(key.cull_mode)
: vk::CullModeFlagBits::eNone,
.frontFace = key.front_face == Liverpool::FrontFace::Clockwise
? vk::FrontFace::eClockwise
: vk::FrontFace::eCounterClockwise,
@ -202,7 +209,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
boost::container::static_vector<vk::PipelineShaderStageCreateInfo, MaxShaderStages>
shader_stages;
auto stage = u32(Shader::Stage::Vertex);
auto stage = u32(Shader::LogicalStage::Vertex);
if (infos[stage]) {
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eVertex,
@ -210,7 +217,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
.pName = "main",
});
}
stage = u32(Shader::Stage::Geometry);
stage = u32(Shader::LogicalStage::Geometry);
if (infos[stage]) {
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eGeometry,
@ -218,7 +225,23 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
.pName = "main",
});
}
stage = u32(Shader::Stage::Fragment);
stage = u32(Shader::LogicalStage::TessellationControl);
if (infos[stage]) {
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eTessellationControl,
.module = modules[stage],
.pName = "main",
});
}
stage = u32(Shader::LogicalStage::TessellationEval);
if (infos[stage]) {
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eTessellationEvaluation,
.module = modules[stage],
.pName = "main",
});
}
stage = u32(Shader::LogicalStage::Fragment);
if (infos[stage]) {
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eFragment,
@ -227,17 +250,15 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
});
}
const auto it = std::ranges::find(key.color_formats, vk::Format::eUndefined);
const u32 num_color_formats = std::distance(key.color_formats.begin(), it);
const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = {
.colorAttachmentCount = num_color_formats,
.colorAttachmentCount = key.num_color_attachments,
.pColorAttachmentFormats = key.color_formats.data(),
.depthAttachmentFormat = key.depth_format,
.stencilAttachmentFormat = key.stencil_format,
};
std::array<vk::PipelineColorBlendAttachmentState, Liverpool::NumColorBuffers> attachments;
for (u32 i = 0; i < num_color_formats; i++) {
for (u32 i = 0; i < key.num_color_attachments; i++) {
const auto& control = key.blend_controls[i];
const auto src_color = LiverpoolToVK::BlendFactor(control.color_src_factor);
const auto dst_color = LiverpoolToVK::BlendFactor(control.color_dst_factor);
@ -290,7 +311,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
const vk::PipelineColorBlendStateCreateInfo color_blending = {
.logicOpEnable = false,
.logicOp = vk::LogicOp::eCopy,
.attachmentCount = num_color_formats,
.attachmentCount = key.num_color_attachments,
.pAttachments = attachments.data(),
.blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f},
};
@ -301,6 +322,8 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
.pStages = shader_stages.data(),
.pVertexInputState = !instance.IsVertexInputDynamicState() ? &vertex_input_info : nullptr,
.pInputAssemblyState = &input_assembly,
.pTessellationState =
stages[u32(Shader::LogicalStage::TessellationControl)] ? &tessellation_state : nullptr,
.pViewportState = &viewport_info,
.pRasterizationState = &raster_state,
.pMultisampleState = &multisampling,
@ -327,7 +350,6 @@ void GraphicsPipeline::BuildDescSetLayout() {
if (!stage) {
continue;
}
if (stage->has_readconst) {
bindings.push_back({
.binding = binding++,

View File

@ -29,6 +29,7 @@ using Liverpool = AmdGpu::Liverpool;
struct GraphicsPipelineKey {
std::array<size_t, MaxShaderStages> stage_hashes;
u32 num_color_attachments;
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
std::array<AmdGpu::NumberFormat, Liverpool::NumColorBuffers> color_num_formats;
std::array<Liverpool::ColorBuffer::SwapMode, Liverpool::NumColorBuffers> mrt_swizzles;
@ -51,6 +52,7 @@ struct GraphicsPipelineKey {
std::array<Liverpool::BlendControl, Liverpool::NumColorBuffers> blend_controls;
std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks;
std::array<vk::Format, MaxVertexBufferCount> vertex_buffer_formats;
u32 patch_control_points;
bool operator==(const GraphicsPipelineKey& key) const noexcept {
return std::memcmp(this, &key, sizeof(key)) == 0;
@ -72,7 +74,7 @@ public:
bool IsEmbeddedVs() const noexcept {
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash;
return key.stage_hashes[u32(Shader::LogicalStage::Vertex)] == EmbeddedVsHash;
}
auto GetWriteMasks() const {

View File

@ -9,6 +9,7 @@
#include "common/assert.h"
#include "common/config.h"
#include "common/debug.h"
#include "sdl_window.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h"
@ -68,11 +69,10 @@ std::unordered_map<vk::Format, vk::FormatProperties3> GetFormatProperties(
}
// Other miscellaneous formats, e.g. for color buffers, swizzles, or compatibility
static constexpr std::array misc_formats = {
vk::Format::eA2R10G10B10UnormPack32, vk::Format::eA8B8G8R8UnormPack32,
vk::Format::eA8B8G8R8SrgbPack32, vk::Format::eB8G8R8A8Unorm,
vk::Format::eB8G8R8A8Snorm, vk::Format::eB8G8R8A8Uint,
vk::Format::eB8G8R8A8Sint, vk::Format::eB8G8R8A8Srgb,
vk::Format::eR5G6B5UnormPack16, vk::Format::eD24UnormS8Uint,
vk::Format::eA2R10G10B10UnormPack32,
vk::Format::eB8G8R8A8Unorm,
vk::Format::eB8G8R8A8Srgb,
vk::Format::eD24UnormS8Uint,
};
for (const auto& format : misc_formats) {
if (!format_properties.contains(format)) {
@ -262,11 +262,13 @@ bool Instance::CreateDevice() {
// The next two extensions are required to be available together in order to support write masks
color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
const bool calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME);
const bool calibrated_timestamps =
TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false;
const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME);
maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME);
image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME);
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
// with extensions.
@ -327,6 +329,7 @@ bool Instance::CreateDevice() {
.imageCubeArray = features.imageCubeArray,
.independentBlend = features.independentBlend,
.geometryShader = features.geometryShader,
.tessellationShader = features.tessellationShader,
.logicOp = features.logicOp,
.depthBiasClamp = features.depthBiasClamp,
.fillModeNonSolid = features.fillModeNonSolid,
@ -580,42 +583,22 @@ bool Instance::IsFormatSupported(const vk::Format format,
return (GetFormatFeatureFlags(format) & flags) == flags;
}
static vk::Format GetAlternativeFormat(const vk::Format format) {
switch (format) {
case vk::Format::eB5G6R5UnormPack16:
return vk::Format::eR5G6B5UnormPack16;
case vk::Format::eD16UnormS8Uint:
return vk::Format::eD24UnormS8Uint;
default:
return format;
}
}
vk::Format Instance::GetSupportedFormat(const vk::Format format,
const vk::FormatFeatureFlags2 flags) const {
if (IsFormatSupported(format, flags)) [[likely]] {
return format;
}
const vk::Format alternative = GetAlternativeFormat(format);
if (IsFormatSupported(alternative, flags)) [[likely]] {
return alternative;
if (!IsFormatSupported(format, flags)) [[unlikely]] {
switch (format) {
case vk::Format::eD16UnormS8Uint:
if (IsFormatSupported(vk::Format::eD24UnormS8Uint, flags)) {
return vk::Format::eD24UnormS8Uint;
}
if (IsFormatSupported(vk::Format::eD32SfloatS8Uint, flags)) {
return vk::Format::eD32SfloatS8Uint;
}
default:
break;
}
}
return format;
}
vk::ComponentMapping Instance::GetSupportedComponentSwizzle(
const vk::Format format, const vk::ComponentMapping swizzle,
const vk::FormatFeatureFlags2 flags) const {
if (IsFormatSupported(format, flags)) [[likely]] {
return swizzle;
}
vk::ComponentMapping supported_swizzle = swizzle;
if (format == vk::Format::eB5G6R5UnormPack16) {
// B5G6R5 -> R5G6B5
std::swap(supported_swizzle.r, supported_swizzle.b);
}
return supported_swizzle;
}
} // namespace Vulkan

View File

@ -33,10 +33,6 @@ public:
[[nodiscard]] vk::Format GetSupportedFormat(vk::Format format,
vk::FormatFeatureFlags2 flags) const;
/// Re-orders a component swizzle for format compatibility, if needed.
[[nodiscard]] vk::ComponentMapping GetSupportedComponentSwizzle(
vk::Format format, vk::ComponentMapping swizzle, vk::FormatFeatureFlags2 flags) const;
/// Returns the Vulkan instance
vk::Instance GetInstance() const {
return *instance;
@ -158,6 +154,11 @@ public:
return legacy_vertex_attributes;
}
/// Returns true when VK_AMD_shader_image_load_store_lod is supported.
bool IsImageLoadStoreLodSupported() const {
return image_load_store_lod;
}
/// Returns true when geometry shaders are supported by the device
bool IsGeometryStageSupported() const {
return features.geometryShader;
@ -327,6 +328,7 @@ private:
bool maintenance5{};
bool list_restart{};
bool legacy_vertex_attributes{};
bool image_load_store_lod{};
u64 min_imported_host_pointer_alignment{};
u32 subgroup_size{};
bool tooling_info{};

View File

@ -22,6 +22,8 @@ extern std::unique_ptr<Vulkan::Presenter> presenter;
namespace Vulkan {
using Shader::LogicalStage;
using Shader::Stage;
using Shader::VsOutput;
constexpr static std::array DescriptorHeapSizes = {
@ -78,7 +80,7 @@ void GatherVertexOutputs(Shader::VertexRuntimeInfo& info,
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
}
Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_stage) {
auto info = Shader::RuntimeInfo{stage};
const auto& regs = liverpool->regs;
const auto BuildCommon = [&](const auto& program) {
@ -89,20 +91,47 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
info.fp_round_mode32 = program.settings.fp_round_mode32;
};
switch (stage) {
case Shader::Stage::Export: {
case Stage::Local: {
BuildCommon(regs.ls_program);
if (regs.stage_enable.IsStageEnabled(static_cast<u32>(Stage::Hull))) {
info.ls_info.links_with_tcs = true;
Shader::TessellationDataConstantBuffer tess_constants;
const auto* pgm = regs.ProgramForStage(static_cast<u32>(Stage::Hull));
const auto params = Liverpool::GetParams(*pgm);
const auto& hull_info = program_cache.at(params.hash)->info;
hull_info.ReadTessConstantBuffer(tess_constants);
info.ls_info.ls_stride = tess_constants.ls_stride;
}
break;
}
case Stage::Hull: {
BuildCommon(regs.hs_program);
info.hs_info.num_input_control_points = regs.ls_hs_config.hs_input_control_points.Value();
info.hs_info.num_threads = regs.ls_hs_config.hs_output_control_points.Value();
info.hs_info.tess_type = regs.tess_config.type;
// We need to initialize most hs_info fields after finding the V# with tess constants
break;
}
case Stage::Export: {
BuildCommon(regs.es_program);
info.es_info.vertex_data_size = regs.vgt_esgs_ring_itemsize;
break;
}
case Shader::Stage::Vertex: {
case Stage::Vertex: {
BuildCommon(regs.vs_program);
GatherVertexOutputs(info.vs_info, regs.vs_output_control);
info.vs_info.emulate_depth_negative_one_to_one =
!instance.IsDepthClipControlSupported() &&
regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW;
if (l_stage == LogicalStage::TessellationEval) {
info.vs_info.tess_type = regs.tess_config.type;
info.vs_info.tess_topology = regs.tess_config.topology;
info.vs_info.tess_partitioning = regs.tess_config.partitioning;
}
break;
}
case Shader::Stage::Geometry: {
case Stage::Geometry: {
BuildCommon(regs.gs_program);
auto& gs_info = info.gs_info;
gs_info.output_vertices = regs.vgt_gs_max_vert_out;
@ -121,7 +150,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
DumpShader(gs_info.vs_copy, gs_info.vs_copy_hash, Shader::Stage::Vertex, 0, "copy.bin");
break;
}
case Shader::Stage::Fragment: {
case Stage::Fragment: {
BuildCommon(regs.ps_program);
info.fs_info.en_flags = regs.ps_input_ena;
info.fs_info.addr_flags = regs.ps_input_addr;
@ -143,10 +172,10 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
}
break;
}
case Shader::Stage::Compute: {
const auto& cs_pgm = regs.cs_program;
case Stage::Compute: {
const auto& cs_pgm = liverpool->GetCsRegs();
info.num_user_data = cs_pgm.settings.num_user_regs;
info.num_allocated_vgprs = regs.cs_program.settings.num_vgprs * 4;
info.num_allocated_vgprs = cs_pgm.settings.num_vgprs * 4;
info.cs_info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
cs_pgm.num_thread_z.full};
info.cs_info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1),
@ -172,8 +201,10 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
.support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32),
.support_explicit_workgroup_layout = true,
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
.supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(),
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
};
auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({});
ASSERT_MSG(cache_result == vk::Result::eSuccess, "Failed to create pipeline cache: {}",
@ -268,6 +299,7 @@ bool PipelineCache::RefreshGraphicsKey() {
// `RenderingInfo` is assumed to be initialized with a contiguous array of valid color
// attachments. This might be not a case as HW color buffers can be bound in an arbitrary
// order. We need to do some arrays compaction at this stage
key.num_color_attachments = 0;
key.color_formats.fill(vk::Format::eUndefined);
key.color_num_formats.fill(AmdGpu::NumberFormat::Unorm);
key.blend_controls.fill({});
@ -275,13 +307,26 @@ bool PipelineCache::RefreshGraphicsKey() {
key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard);
key.vertex_buffer_formats.fill(vk::Format::eUndefined);
key.patch_control_points = 0;
if (regs.stage_enable.hs_en.Value()) {
key.patch_control_points = regs.ls_hs_config.hs_input_control_points.Value();
}
// First pass of bindings check to idenitfy formats and swizzles and pass them to rhe shader
// recompiler.
for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
auto const& col_buf = regs.color_buffers[cb];
if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb)) {
if (skip_cb_binding || !col_buf) {
// No attachment bound and no incremented index.
continue;
}
const auto remapped_cb = key.num_color_attachments++;
if (!regs.color_target_mask.GetMask(cb)) {
// Bound to null handle, skip over this attachment index.
continue;
}
const auto base_format =
LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat());
key.color_formats[remapped_cb] =
@ -290,14 +335,12 @@ bool PipelineCache::RefreshGraphicsKey() {
if (base_format == key.color_formats[remapped_cb]) {
key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value();
}
++remapped_cb;
}
fetch_shader = std::nullopt;
Shader::Backend::Bindings binding{};
const auto& TryBindStageRemap = [&](Shader::Stage stage_in, Shader::Stage stage_out) -> bool {
const auto& TryBindStage = [&](Shader::Stage stage_in, Shader::LogicalStage stage_out) -> bool {
const auto stage_in_idx = static_cast<u32>(stage_in);
const auto stage_out_idx = static_cast<u32>(stage_out);
if (!regs.stage_enable.IsStageEnabled(stage_in_idx)) {
@ -324,23 +367,23 @@ bool PipelineCache::RefreshGraphicsKey() {
auto params = Liverpool::GetParams(*pgm);
std::optional<Shader::Gcn::FetchShaderData> fetch_shader_;
std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_,
key.stage_hashes[stage_out_idx]) = GetProgram(stage_in, params, binding);
key.stage_hashes[stage_out_idx]) =
GetProgram(stage_in, stage_out, params, binding);
if (fetch_shader_) {
fetch_shader = fetch_shader_;
}
return true;
};
const auto& TryBindStage = [&](Shader::Stage stage) { return TryBindStageRemap(stage, stage); };
const auto& IsGsFeaturesSupported = [&]() -> bool {
// These checks are temporary until all functionality is implemented.
return !regs.vgt_gs_mode.onchip && !regs.vgt_strmout_config.raw;
};
TryBindStage(Shader::Stage::Fragment);
infos.fill(nullptr);
TryBindStage(Stage::Fragment, LogicalStage::Fragment);
const auto* fs_info = infos[static_cast<u32>(Shader::Stage::Fragment)];
const auto* fs_info = infos[static_cast<u32>(LogicalStage::Fragment)];
key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u;
switch (regs.stage_enable.raw) {
@ -348,22 +391,36 @@ bool PipelineCache::RefreshGraphicsKey() {
if (!instance.IsGeometryStageSupported() || !IsGsFeaturesSupported()) {
return false;
}
if (!TryBindStageRemap(Shader::Stage::Export, Shader::Stage::Vertex)) {
if (!TryBindStage(Stage::Export, LogicalStage::Vertex)) {
return false;
}
if (!TryBindStage(Shader::Stage::Geometry)) {
if (!TryBindStage(Stage::Geometry, LogicalStage::Geometry)) {
return false;
}
break;
}
case Liverpool::ShaderStageEnable::VgtStages::LsHs: {
if (!instance.IsTessellationSupported()) {
break;
}
if (!TryBindStage(Stage::Hull, LogicalStage::TessellationControl)) {
return false;
}
if (!TryBindStage(Stage::Vertex, LogicalStage::TessellationEval)) {
return false;
}
if (!TryBindStage(Stage::Local, LogicalStage::Vertex)) {
return false;
}
break;
}
default: {
TryBindStage(Shader::Stage::Vertex);
infos[static_cast<u32>(Shader::Stage::Geometry)] = nullptr;
TryBindStage(Stage::Vertex, LogicalStage::Vertex);
break;
}
}
const auto vs_info = infos[static_cast<u32>(Shader::Stage::Vertex)];
const auto vs_info = infos[static_cast<u32>(Shader::LogicalStage::Vertex)];
if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) {
u32 vertex_binding = 0;
for (const auto& attrib : fetch_shader->attributes) {
@ -385,10 +442,18 @@ bool PipelineCache::RefreshGraphicsKey() {
// Second pass to fill remain CB pipeline key data
for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
auto const& col_buf = regs.color_buffers[cb];
if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb) ||
(key.mrt_mask & (1u << cb)) == 0) {
key.color_formats[cb] = vk::Format::eUndefined;
key.mrt_swizzles[cb] = Liverpool::ColorBuffer::SwapMode::Standard;
if (skip_cb_binding || !col_buf) {
// No attachment bound and no incremented index.
continue;
}
if (!regs.color_target_mask.GetMask(cb) || (key.mrt_mask & (1u << cb)) == 0) {
// Attachment is masked out by either color_target_mask or shader mrt_mask. In the case
// of the latter we need to change format to undefined, and either way we need to
// increment the index for the null attachment binding.
key.color_formats[remapped_cb] = vk::Format::eUndefined;
key.mrt_swizzles[remapped_cb] = Liverpool::ColorBuffer::SwapMode::Standard;
++remapped_cb;
continue;
}
@ -397,10 +462,9 @@ bool PipelineCache::RefreshGraphicsKey() {
!col_buf.info.blend_bypass);
key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)};
key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb));
++remapped_cb;
num_samples = std::max(num_samples, 1u << col_buf.attrib.num_samples_log2);
++remapped_cb;
}
// It seems that the number of samples > 1 set in the AA config doesn't mean we're always
@ -409,19 +473,18 @@ bool PipelineCache::RefreshGraphicsKey() {
key.num_samples = num_samples;
return true;
}
} // namespace Vulkan
bool PipelineCache::RefreshComputeKey() {
Shader::Backend::Bindings binding{};
const auto* cs_pgm = &liverpool->regs.cs_program;
const auto cs_params = Liverpool::GetParams(*cs_pgm);
const auto& cs_pgm = liverpool->GetCsRegs();
const auto cs_params = Liverpool::GetParams(cs_pgm);
std::tie(infos[0], modules[0], fetch_shader, compute_key.value) =
GetProgram(Shader::Stage::Compute, cs_params, binding);
GetProgram(Shader::Stage::Compute, LogicalStage::Compute, cs_params, binding);
return true;
}
vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
const Shader::RuntimeInfo& runtime_info,
vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info,
std::span<const u32> code, size_t perm_idx,
Shader::Backend::Bindings& binding) {
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash,
@ -446,19 +509,19 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
const auto name = fmt::format("{}_{:#018x}_{}", info.stage, info.pgm_hash, perm_idx);
Vulkan::SetObjectName(instance.GetDevice(), module, name);
if (Config::collectShadersForDebug()) {
DebugState.CollectShader(name, module, spv, code, patch ? *patch : std::span<const u32>{},
is_patched);
DebugState.CollectShader(name, info.l_stage, module, spv, code,
patch ? *patch : std::span<const u32>{}, is_patched);
}
return module;
}
std::tuple<const Shader::Info*, vk::ShaderModule, std::optional<Shader::Gcn::FetchShaderData>, u64>
PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
Shader::Backend::Bindings& binding) {
const auto runtime_info = BuildRuntimeInfo(stage);
PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stage,
Shader::ShaderParams params,
Shader::Backend::Bindings& binding) {
auto runtime_info = BuildRuntimeInfo(stage, l_stage);
auto [it_pgm, new_program] = program_cache.try_emplace(params.hash);
if (new_program) {
it_pgm.value() = std::make_unique<Program>(stage, params);
it_pgm.value() = std::make_unique<Program>(stage, l_stage, params);
auto& program = it_pgm.value();
auto start = binding;
const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding);
@ -467,6 +530,7 @@ PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
return std::make_tuple(&program->info, module, spec.fetch_shader_data,
HashCombine(params.hash, 0));
}
it_pgm.value()->info.user_data = params.user_data;
auto& program = it_pgm.value();
auto& info = program->info;
@ -477,7 +541,7 @@ PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec);
if (it == program->modules.end()) {
auto new_info = Shader::Info(stage, params);
auto new_info = Shader::Info(stage, l_stage, params);
module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding);
program->AddPermut(module, std::move(spec));
} else {

Some files were not shown because too many files have changed in this diff Show More