mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-05 00:42:48 +00:00
Merge branch 'main' into compat-gui
This commit is contained in:
commit
c5910d3a30
26
.github/workflows/build.yml
vendored
26
.github/workflows/build.yml
vendored
@ -89,7 +89,7 @@ jobs:
|
|||||||
arch: amd64
|
arch: amd64
|
||||||
|
|
||||||
- name: Configure CMake
|
- name: Configure CMake
|
||||||
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $env:NUMBER_OF_PROCESSORS
|
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $env:NUMBER_OF_PROCESSORS
|
||||||
@ -143,7 +143,7 @@ jobs:
|
|||||||
arch: amd64
|
arch: amd64
|
||||||
|
|
||||||
- name: Configure CMake
|
- name: Configure CMake
|
||||||
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $env:NUMBER_OF_PROCESSORS
|
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $env:NUMBER_OF_PROCESSORS
|
||||||
@ -174,11 +174,6 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
xcode-version: latest
|
xcode-version: latest
|
||||||
|
|
||||||
- name: Install MoltenVK
|
|
||||||
run: |
|
|
||||||
arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
|
|
||||||
arch -x86_64 /usr/local/bin/brew install molten-vk
|
|
||||||
|
|
||||||
- name: Cache CMake Configuration
|
- name: Cache CMake Configuration
|
||||||
uses: actions/cache@v4
|
uses: actions/cache@v4
|
||||||
env:
|
env:
|
||||||
@ -201,7 +196,7 @@ jobs:
|
|||||||
variant: sccache
|
variant: sccache
|
||||||
|
|
||||||
- name: Configure CMake
|
- name: Configure CMake
|
||||||
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
|
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu)
|
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu)
|
||||||
@ -210,7 +205,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
mkdir upload
|
mkdir upload
|
||||||
mv ${{github.workspace}}/build/shadps4 upload
|
mv ${{github.workspace}}/build/shadps4 upload
|
||||||
cp $(arch -x86_64 /usr/local/bin/brew --prefix)/opt/molten-vk/lib/libMoltenVK.dylib upload
|
cp ${{github.workspace}}/build/externals/MoltenVK/libMoltenVK.dylib upload
|
||||||
tar cf shadps4-macos-sdl.tar.gz -C upload .
|
tar cf shadps4-macos-sdl.tar.gz -C upload .
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
@ -230,11 +225,8 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
xcode-version: latest
|
xcode-version: latest
|
||||||
|
|
||||||
- name: Install MoltenVK and Setup Qt
|
- name: Setup Qt
|
||||||
run: |
|
uses: jurplel/install-qt-action@v4
|
||||||
arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
|
|
||||||
arch -x86_64 /usr/local/bin/brew install molten-vk
|
|
||||||
- uses: jurplel/install-qt-action@v4
|
|
||||||
with:
|
with:
|
||||||
version: 6.7.3
|
version: 6.7.3
|
||||||
host: mac
|
host: mac
|
||||||
@ -265,7 +257,7 @@ jobs:
|
|||||||
variant: sccache
|
variant: sccache
|
||||||
|
|
||||||
- name: Configure CMake
|
- name: Configure CMake
|
||||||
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
|
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu)
|
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu)
|
||||||
@ -312,7 +304,7 @@ jobs:
|
|||||||
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
|
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
|
||||||
|
|
||||||
- name: Configure CMake
|
- name: Configure CMake
|
||||||
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc)
|
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc)
|
||||||
@ -368,7 +360,7 @@ jobs:
|
|||||||
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
|
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
|
||||||
|
|
||||||
- name: Configure CMake
|
- name: Configure CMake
|
||||||
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc)
|
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc)
|
||||||
|
12
.gitmodules
vendored
12
.gitmodules
vendored
@ -107,3 +107,15 @@
|
|||||||
path = externals/libpng
|
path = externals/libpng
|
||||||
url = https://github.com/pnggroup/libpng
|
url = https://github.com/pnggroup/libpng
|
||||||
shallow = true
|
shallow = true
|
||||||
|
[submodule "externals/MoltenVK/SPIRV-Cross"]
|
||||||
|
path = externals/MoltenVK/SPIRV-Cross
|
||||||
|
url = https://github.com/KhronosGroup/SPIRV-Cross
|
||||||
|
shallow = true
|
||||||
|
[submodule "externals/MoltenVK/MoltenVK"]
|
||||||
|
path = externals/MoltenVK/MoltenVK
|
||||||
|
url = https://github.com/KhronosGroup/MoltenVK
|
||||||
|
shallow = true
|
||||||
|
[submodule "externals/MoltenVK/cereal"]
|
||||||
|
path = externals/MoltenVK/cereal
|
||||||
|
url = https://github.com/USCiLab/cereal
|
||||||
|
shallow = true
|
||||||
|
@ -664,12 +664,14 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
|||||||
src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
|
src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
|
||||||
src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp
|
src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp
|
||||||
src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp
|
src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp
|
||||||
|
src/shader_recompiler/ir/passes/hull_shader_transform.cpp
|
||||||
src/shader_recompiler/ir/passes/identity_removal_pass.cpp
|
src/shader_recompiler/ir/passes/identity_removal_pass.cpp
|
||||||
src/shader_recompiler/ir/passes/ir_passes.h
|
src/shader_recompiler/ir/passes/ir_passes.h
|
||||||
src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp
|
src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp
|
||||||
src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
|
src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
|
||||||
src/shader_recompiler/ir/passes/ring_access_elimination.cpp
|
src/shader_recompiler/ir/passes/ring_access_elimination.cpp
|
||||||
src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
|
src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
|
||||||
|
src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp
|
||||||
src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp
|
src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp
|
||||||
src/shader_recompiler/ir/abstract_syntax_list.h
|
src/shader_recompiler/ir/abstract_syntax_list.h
|
||||||
src/shader_recompiler/ir/attribute.cpp
|
src/shader_recompiler/ir/attribute.cpp
|
||||||
@ -683,6 +685,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
|||||||
src/shader_recompiler/ir/opcodes.cpp
|
src/shader_recompiler/ir/opcodes.cpp
|
||||||
src/shader_recompiler/ir/opcodes.h
|
src/shader_recompiler/ir/opcodes.h
|
||||||
src/shader_recompiler/ir/opcodes.inc
|
src/shader_recompiler/ir/opcodes.inc
|
||||||
|
src/shader_recompiler/ir/patch.cpp
|
||||||
|
src/shader_recompiler/ir/patch.h
|
||||||
src/shader_recompiler/ir/post_order.cpp
|
src/shader_recompiler/ir/post_order.cpp
|
||||||
src/shader_recompiler/ir/post_order.h
|
src/shader_recompiler/ir/post_order.h
|
||||||
src/shader_recompiler/ir/program.cpp
|
src/shader_recompiler/ir/program.cpp
|
||||||
@ -877,7 +881,7 @@ endif()
|
|||||||
create_target_directory_groups(shadps4)
|
create_target_directory_groups(shadps4)
|
||||||
|
|
||||||
target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG)
|
target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG)
|
||||||
target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::SPIRV glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers)
|
target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers)
|
||||||
|
|
||||||
target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h")
|
target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h")
|
||||||
target_compile_definitions(Dear_ImGui PRIVATE IMGUI_USER_CONFIG="${PROJECT_SOURCE_DIR}/src/imgui/imgui_config.h")
|
target_compile_definitions(Dear_ImGui PRIVATE IMGUI_USER_CONFIG="${PROJECT_SOURCE_DIR}/src/imgui/imgui_config.h")
|
||||||
@ -891,13 +895,17 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (APPLE)
|
if (APPLE)
|
||||||
option(USE_SYSTEM_VULKAN_LOADER "Enables using the system Vulkan loader instead of directly linking with MoltenVK. Useful for loading validation layers." OFF)
|
if (ENABLE_QT_GUI)
|
||||||
if (USE_SYSTEM_VULKAN_LOADER)
|
# Include MoltenVK in the app bundle, along with an ICD file so it can be found by the system Vulkan loader if used for loading layers.
|
||||||
target_compile_definitions(shadps4 PRIVATE USE_SYSTEM_VULKAN_LOADER=1)
|
target_sources(shadps4 PRIVATE externals/MoltenVK/MoltenVK_icd.json)
|
||||||
|
set_source_files_properties(externals/MoltenVK/MoltenVK_icd.json
|
||||||
|
PROPERTIES MACOSX_PACKAGE_LOCATION Resources/vulkan/icd.d)
|
||||||
|
add_custom_command(TARGET shadps4 POST_BUILD
|
||||||
|
COMMAND cmake -E copy $<TARGET_LINKER_FILE:MoltenVK> $<TARGET_BUNDLE_DIR:shadps4>/Contents/Frameworks/libMoltenVK.dylib)
|
||||||
|
set_property(TARGET shadps4 APPEND PROPERTY BUILD_RPATH "@executable_path/../Frameworks")
|
||||||
else()
|
else()
|
||||||
# Link MoltenVK for Vulkan support
|
# For non-bundled SDL build, just do a normal library link.
|
||||||
find_library(MOLTENVK MoltenVK REQUIRED)
|
target_link_libraries(shadps4 PRIVATE MoltenVK)
|
||||||
target_link_libraries(shadps4 PRIVATE ${MOLTENVK})
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (ARCHITECTURE STREQUAL "x86_64")
|
if (ARCHITECTURE STREQUAL "x86_64")
|
||||||
|
@ -76,6 +76,13 @@ For more information on how to test, debug and report issues with the emulator o
|
|||||||
|
|
||||||
# Keyboard mapping
|
# Keyboard mapping
|
||||||
|
|
||||||
|
| Button | Function |
|
||||||
|
|-------------|-------------|
|
||||||
|
F10 | FPS Counter
|
||||||
|
Ctrl+F10 | Video Debug Info
|
||||||
|
F11 | Fullscreen
|
||||||
|
F12 | Trigger RenderDoc Capture
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
> Xbox and DualShock controllers work out of the box.
|
> Xbox and DualShock controllers work out of the box.
|
||||||
|
|
||||||
|
@ -15,6 +15,7 @@ path = [
|
|||||||
"documents/changelog.md",
|
"documents/changelog.md",
|
||||||
"documents/Quickstart/2.png",
|
"documents/Quickstart/2.png",
|
||||||
"documents/Screenshots/*",
|
"documents/Screenshots/*",
|
||||||
|
"externals/MoltenVK/MoltenVK_icd.json",
|
||||||
"scripts/ps4_names.txt",
|
"scripts/ps4_names.txt",
|
||||||
"src/images/about_icon.png",
|
"src/images/about_icon.png",
|
||||||
"src/images/controller_icon.png",
|
"src/images/controller_icon.png",
|
||||||
|
@ -24,23 +24,21 @@ eval $(/opt/homebrew/bin/brew shellenv)
|
|||||||
brew install clang-format cmake
|
brew install clang-format cmake
|
||||||
```
|
```
|
||||||
|
|
||||||
Next, install x86_64 Homebrew and libraries.
|
Next, install x86_64 Qt. You can skip these steps and move on to **Cloning and compiling** if you do not intend to build the Qt GUI.
|
||||||
|
|
||||||
**If you are on an ARM Mac:**
|
**If you are on an ARM Mac:**
|
||||||
```
|
```
|
||||||
# Installs x86_64 Homebrew to /usr/local
|
# Installs x86_64 Homebrew to /usr/local
|
||||||
arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
|
arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
|
||||||
# Installs libraries.
|
# Installs libraries.
|
||||||
arch -x86_64 /usr/local/bin/brew install molten-vk qt@6
|
arch -x86_64 /usr/local/bin/brew install qt@6
|
||||||
```
|
```
|
||||||
|
|
||||||
**If you are on an x86_64 Mac:**
|
**If you are on an x86_64 Mac:**
|
||||||
```
|
```
|
||||||
brew install molten-vk qt@6
|
brew install qt@6
|
||||||
```
|
```
|
||||||
|
|
||||||
If you don't need the Qt GUI you can remove `qt@6` from the last command.
|
|
||||||
|
|
||||||
### Cloning and compiling:
|
### Cloning and compiling:
|
||||||
|
|
||||||
Clone the repository recursively:
|
Clone the repository recursively:
|
||||||
|
31
externals/CMakeLists.txt
vendored
31
externals/CMakeLists.txt
vendored
@ -8,6 +8,9 @@ set_directory_properties(PROPERTIES
|
|||||||
SYSTEM ON
|
SYSTEM ON
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Set CMP0069 policy to "NEW" in order to ensure consistent behavior when building external targets with LTO enabled
|
||||||
|
set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
|
||||||
|
|
||||||
if (MSVC)
|
if (MSVC)
|
||||||
# Silence "deprecation" warnings
|
# Silence "deprecation" warnings
|
||||||
add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE -D_SCL_SECURE_NO_WARNINGS)
|
add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE -D_SCL_SECURE_NO_WARNINGS)
|
||||||
@ -107,7 +110,7 @@ if (NOT TARGET glslang::glslang)
|
|||||||
set(ENABLE_OPT OFF CACHE BOOL "")
|
set(ENABLE_OPT OFF CACHE BOOL "")
|
||||||
add_subdirectory(glslang)
|
add_subdirectory(glslang)
|
||||||
file(COPY glslang/SPIRV DESTINATION glslang/glslang FILES_MATCHING PATTERN "*.h")
|
file(COPY glslang/SPIRV DESTINATION glslang/glslang FILES_MATCHING PATTERN "*.h")
|
||||||
target_include_directories(SPIRV INTERFACE "${CMAKE_CURRENT_BINARY_DIR}/glslang")
|
target_include_directories(glslang INTERFACE "${CMAKE_CURRENT_BINARY_DIR}/glslang")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Robin-map
|
# Robin-map
|
||||||
@ -174,15 +177,6 @@ if (NOT TARGET PNG::PNG)
|
|||||||
add_library(PNG::PNG ALIAS png_static)
|
add_library(PNG::PNG ALIAS png_static)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (APPLE)
|
|
||||||
# date
|
|
||||||
if (NOT TARGET date::date-tz)
|
|
||||||
option(BUILD_TZ_LIB "" ON)
|
|
||||||
option(USE_SYSTEM_TZ_DB "" ON)
|
|
||||||
add_subdirectory(date)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Dear ImGui
|
# Dear ImGui
|
||||||
add_library(Dear_ImGui
|
add_library(Dear_ImGui
|
||||||
dear_imgui/imgui.cpp
|
dear_imgui/imgui.cpp
|
||||||
@ -199,7 +193,7 @@ option(TRACY_ENABLE "" ON)
|
|||||||
option(TRACY_NO_CRASH_HANDLER "" ON) # Otherwise texture cache exceptions will be treaten as a crash
|
option(TRACY_NO_CRASH_HANDLER "" ON) # Otherwise texture cache exceptions will be treaten as a crash
|
||||||
option(TRACY_ON_DEMAND "" ON)
|
option(TRACY_ON_DEMAND "" ON)
|
||||||
option(TRACY_NO_FRAME_IMAGE "" ON)
|
option(TRACY_NO_FRAME_IMAGE "" ON)
|
||||||
option(TRACY_FIBERS "" ON) # For AmdGpu frontend profiling
|
option(TRACY_FIBERS "" OFF) # For AmdGpu frontend profiling, disabled due to instability
|
||||||
option(TRACY_NO_SYSTEM_TRACING "" ON)
|
option(TRACY_NO_SYSTEM_TRACING "" ON)
|
||||||
option(TRACY_NO_CALLSTACK "" ON)
|
option(TRACY_NO_CALLSTACK "" ON)
|
||||||
option(TRACY_NO_CODE_TRANSFER "" ON)
|
option(TRACY_NO_CODE_TRANSFER "" ON)
|
||||||
@ -229,3 +223,18 @@ if (NOT TARGET stb::headers)
|
|||||||
target_include_directories(stb INTERFACE stb)
|
target_include_directories(stb INTERFACE stb)
|
||||||
add_library(stb::headers ALIAS stb)
|
add_library(stb::headers ALIAS stb)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# Apple-only dependencies
|
||||||
|
if (APPLE)
|
||||||
|
# date
|
||||||
|
if (NOT TARGET date::date-tz)
|
||||||
|
option(BUILD_TZ_LIB "" ON)
|
||||||
|
option(USE_SYSTEM_TZ_DB "" ON)
|
||||||
|
add_subdirectory(date)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# MoltenVK
|
||||||
|
if (NOT TARGET MoltenVK)
|
||||||
|
add_subdirectory(MoltenVK)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
93
externals/MoltenVK/CMakeLists.txt
vendored
Normal file
93
externals/MoltenVK/CMakeLists.txt
vendored
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
|
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
# Prepare MoltenVK Git revision
|
||||||
|
find_package(Git)
|
||||||
|
if(GIT_FOUND)
|
||||||
|
execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD
|
||||||
|
OUTPUT_VARIABLE MVK_GIT_REV
|
||||||
|
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK
|
||||||
|
ERROR_QUIET
|
||||||
|
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||||
|
endif()
|
||||||
|
set(MVK_GENERATED_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}/Generated)
|
||||||
|
file(WRITE ${MVK_GENERATED_INCLUDES}/mvkGitRevDerived.h "static const char* mvkRevString = \"${MVK_GIT_REV}\";")
|
||||||
|
message(STATUS "MoltenVK revision: ${MVK_GIT_REV}")
|
||||||
|
|
||||||
|
# Prepare MoltenVK version
|
||||||
|
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/MoltenVK/MoltenVK/API/mvk_private_api.h MVK_PRIVATE_API)
|
||||||
|
string(REGEX MATCH "#define MVK_VERSION_MAJOR [0-9]+" MVK_VERSION_MAJOR_LINE "${MVK_PRIVATE_API}")
|
||||||
|
string(REGEX MATCH "[0-9]+" MVK_VERSION_MAJOR "${MVK_VERSION_MAJOR_LINE}")
|
||||||
|
string(REGEX MATCH "#define MVK_VERSION_MINOR [0-9]+" MVK_VERSION_MINOR_LINE "${MVK_PRIVATE_API}")
|
||||||
|
string(REGEX MATCH "[0-9]+" MVK_VERSION_MINOR "${MVK_VERSION_MINOR_LINE}")
|
||||||
|
string(REGEX MATCH "#define MVK_VERSION_PATCH [0-9]+" MVK_VERSION_PATCH_LINE "${MVK_PRIVATE_API}")
|
||||||
|
string(REGEX MATCH "[0-9]+" MVK_VERSION_PATCH "${MVK_VERSION_PATCH_LINE}")
|
||||||
|
set(MVK_VERSION "${MVK_VERSION_MAJOR}.${MVK_VERSION_MINOR}.${MVK_VERSION_PATCH}")
|
||||||
|
message(STATUS "MoltenVK version: ${MVK_VERSION}")
|
||||||
|
|
||||||
|
# Find required system libraries
|
||||||
|
find_library(APPKIT_LIBRARY AppKit REQUIRED)
|
||||||
|
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
|
||||||
|
find_library(IOKIT_LIBRARY IOKit REQUIRED)
|
||||||
|
find_library(IOSURFACE_LIBRARY IOSurface REQUIRED)
|
||||||
|
find_library(METAL_LIBRARY Metal REQUIRED)
|
||||||
|
find_library(QUARTZCORE_LIBRARY QuartzCore REQUIRED)
|
||||||
|
|
||||||
|
# cereal
|
||||||
|
option(SKIP_PORTABILITY_TEST "" ON)
|
||||||
|
option(BUILD_DOC "" OFF)
|
||||||
|
option(BUILD_SANDBOX "" OFF)
|
||||||
|
option(SKIP_PERFORMANCE_COMPARISON "" ON)
|
||||||
|
option(SPIRV_CROSS_SKIP_INSTALL "" ON)
|
||||||
|
add_subdirectory(cereal)
|
||||||
|
|
||||||
|
# SPIRV-Cross
|
||||||
|
option(SPIRV_CROSS_CLI "" OFF)
|
||||||
|
option(SPIRV_CROSS_ENABLE_TESTS "" OFF)
|
||||||
|
option(SPIRV_CROSS_ENABLE_HLSL "" OFF)
|
||||||
|
option(SPIRV_CROSS_ENABLE_CPP "" OFF)
|
||||||
|
option(SPIRV_CROSS_SKIP_INSTALL "" ON)
|
||||||
|
add_subdirectory(SPIRV-Cross)
|
||||||
|
|
||||||
|
# Common
|
||||||
|
set(MVK_COMMON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/Common)
|
||||||
|
file(GLOB_RECURSE MVK_COMMON_SOURCES CONFIGURE_DEPENDS
|
||||||
|
${MVK_COMMON_DIR}/*.cpp
|
||||||
|
${MVK_COMMON_DIR}/*.m
|
||||||
|
${MVK_COMMON_DIR}/*.mm)
|
||||||
|
set(MVK_COMMON_INCLUDES ${MVK_COMMON_DIR})
|
||||||
|
|
||||||
|
add_library(MoltenVKCommon STATIC ${MVK_COMMON_SOURCES})
|
||||||
|
target_include_directories(MoltenVKCommon PUBLIC ${MVK_COMMON_INCLUDES})
|
||||||
|
target_compile_options(MoltenVKCommon PRIVATE -w)
|
||||||
|
|
||||||
|
# MoltenVKShaderConverter
|
||||||
|
set(MVK_SHADER_CONVERTER_DIR ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/MoltenVKShaderConverter)
|
||||||
|
file(GLOB_RECURSE MVK_SHADER_CONVERTER_SOURCES CONFIGURE_DEPENDS
|
||||||
|
${MVK_SHADER_CONVERTER_DIR}/MoltenVKShaderConverter/*.cpp
|
||||||
|
${MVK_SHADER_CONVERTER_DIR}/MoltenVKShaderConverter/*.m
|
||||||
|
${MVK_SHADER_CONVERTER_DIR}/MoltenVKShaderConverter/*.mm)
|
||||||
|
set(MVK_SHADER_CONVERTER_INCLUDES ${MVK_SHADER_CONVERTER_DIR} ${MVK_SHADER_CONVERTER_DIR}/include)
|
||||||
|
|
||||||
|
add_library(MoltenVKShaderConverter STATIC ${MVK_SHADER_CONVERTER_SOURCES})
|
||||||
|
target_include_directories(MoltenVKShaderConverter PUBLIC ${MVK_SHADER_CONVERTER_INCLUDES})
|
||||||
|
target_compile_options(MoltenVKShaderConverter PRIVATE -w)
|
||||||
|
target_link_libraries(MoltenVKShaderConverter PRIVATE spirv-cross-msl spirv-cross-reflect MoltenVKCommon)
|
||||||
|
target_compile_definitions(MoltenVKShaderConverter PRIVATE MVK_EXCLUDE_SPIRV_TOOLS=1)
|
||||||
|
|
||||||
|
# MoltenVK
|
||||||
|
set(MVK_DIR ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/MoltenVK)
|
||||||
|
file(GLOB_RECURSE MVK_SOURCES CONFIGURE_DEPENDS
|
||||||
|
${MVK_DIR}/MoltenVK/*.cpp
|
||||||
|
${MVK_DIR}/MoltenVK/*.m
|
||||||
|
${MVK_DIR}/MoltenVK/*.mm)
|
||||||
|
file(GLOB MVK_SRC_INCLUDES LIST_DIRECTORIES ON ${MVK_DIR}/MoltenVK/*)
|
||||||
|
set(MVK_INCLUDES ${MVK_SRC_INCLUDES} ${MVK_GENERATED_INCLUDES} ${MVK_DIR}/include)
|
||||||
|
|
||||||
|
add_library(MoltenVK SHARED ${MVK_SOURCES})
|
||||||
|
target_include_directories(MoltenVK PRIVATE ${MVK_INCLUDES})
|
||||||
|
target_compile_options(MoltenVK PRIVATE -w)
|
||||||
|
target_link_libraries(MoltenVK PRIVATE
|
||||||
|
${APPKIT_LIBRARY} ${FOUNDATION_LIBRARY} ${IOKIT_LIBRARY} ${IOSURFACE_LIBRARY} ${METAL_LIBRARY} ${QUARTZCORE_LIBRARY}
|
||||||
|
Vulkan::Headers cereal::cereal spirv-cross-msl MoltenVKCommon MoltenVKShaderConverter)
|
||||||
|
target_compile_definitions(MoltenVK PRIVATE MVK_FRAMEWORK_VERSION=${MVK_VERSION} MVK_USE_METAL_PRIVATE_API=1)
|
1
externals/MoltenVK/MoltenVK
vendored
Submodule
1
externals/MoltenVK/MoltenVK
vendored
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 5ad3ee5d2f84342950c3fe93dec97719574d1932
|
8
externals/MoltenVK/MoltenVK_icd.json
vendored
Normal file
8
externals/MoltenVK/MoltenVK_icd.json
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"file_format_version": "1.0.0",
|
||||||
|
"ICD": {
|
||||||
|
"library_path": "../../../Frameworks/libMoltenVK.dylib",
|
||||||
|
"api_version": "1.2.0",
|
||||||
|
"is_portability_driver": true
|
||||||
|
}
|
||||||
|
}
|
1
externals/MoltenVK/SPIRV-Cross
vendored
Submodule
1
externals/MoltenVK/SPIRV-Cross
vendored
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 6173e24b31f09a0c3217103a130e74c4ddec14a6
|
1
externals/MoltenVK/cereal
vendored
Submodule
1
externals/MoltenVK/cereal
vendored
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit d1fcec807b372f04e4c1041b3058e11c12853e6e
|
2
externals/sirit
vendored
2
externals/sirit
vendored
@ -1 +1 @@
|
|||||||
Subproject commit 6cecb95d679c82c413d1f989e0b7ad9af130600d
|
Subproject commit 1e74f4ef8d2a0e3221a4de51977663f342b53c35
|
@ -422,6 +422,10 @@ void setEmulatorLanguage(std::string language) {
|
|||||||
emulator_language = language;
|
emulator_language = language;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void setGameInstallDirs(const std::vector<std::filesystem::path>& settings_install_dirs_config) {
|
||||||
|
settings_install_dirs = settings_install_dirs_config;
|
||||||
|
}
|
||||||
|
|
||||||
u32 getMainWindowGeometryX() {
|
u32 getMainWindowGeometryX() {
|
||||||
return main_window_geometry_x;
|
return main_window_geometry_x;
|
||||||
}
|
}
|
||||||
@ -673,14 +677,6 @@ void save(const std::filesystem::path& path) {
|
|||||||
data["Vulkan"]["crashDiagnostic"] = vkCrashDiagnostic;
|
data["Vulkan"]["crashDiagnostic"] = vkCrashDiagnostic;
|
||||||
data["Debug"]["DebugDump"] = isDebugDump;
|
data["Debug"]["DebugDump"] = isDebugDump;
|
||||||
data["Debug"]["CollectShader"] = isShaderDebug;
|
data["Debug"]["CollectShader"] = isShaderDebug;
|
||||||
data["GUI"]["theme"] = mw_themes;
|
|
||||||
data["GUI"]["iconSize"] = m_icon_size;
|
|
||||||
data["GUI"]["sliderPos"] = m_slider_pos;
|
|
||||||
data["GUI"]["iconSizeGrid"] = m_icon_size_grid;
|
|
||||||
data["GUI"]["sliderPosGrid"] = m_slider_pos_grid;
|
|
||||||
data["GUI"]["gameTableMode"] = m_table_mode;
|
|
||||||
data["GUI"]["mw_width"] = m_window_size_W;
|
|
||||||
data["GUI"]["mw_height"] = m_window_size_H;
|
|
||||||
|
|
||||||
std::vector<std::string> install_dirs;
|
std::vector<std::string> install_dirs;
|
||||||
for (const auto& dirString : settings_install_dirs) {
|
for (const auto& dirString : settings_install_dirs) {
|
||||||
@ -690,6 +686,44 @@ void save(const std::filesystem::path& path) {
|
|||||||
|
|
||||||
data["GUI"]["addonInstallDir"] =
|
data["GUI"]["addonInstallDir"] =
|
||||||
std::string{fmt::UTF(settings_addon_install_dir.u8string()).data};
|
std::string{fmt::UTF(settings_addon_install_dir.u8string()).data};
|
||||||
|
data["GUI"]["emulatorLanguage"] = emulator_language;
|
||||||
|
data["Settings"]["consoleLanguage"] = m_language;
|
||||||
|
|
||||||
|
std::ofstream file(path, std::ios::binary);
|
||||||
|
file << data;
|
||||||
|
file.close();
|
||||||
|
saveMainWindow(path);
|
||||||
|
}
|
||||||
|
|
||||||
|
void saveMainWindow(const std::filesystem::path& path) {
|
||||||
|
toml::value data;
|
||||||
|
|
||||||
|
std::error_code error;
|
||||||
|
if (std::filesystem::exists(path, error)) {
|
||||||
|
try {
|
||||||
|
std::ifstream ifs;
|
||||||
|
ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit);
|
||||||
|
ifs.open(path, std::ios_base::binary);
|
||||||
|
data = toml::parse(ifs, std::string{fmt::UTF(path.filename().u8string()).data});
|
||||||
|
} catch (const std::exception& ex) {
|
||||||
|
fmt::print("Exception trying to parse config file. Exception: {}\n", ex.what());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (error) {
|
||||||
|
fmt::print("Filesystem error: {}\n", error.message());
|
||||||
|
}
|
||||||
|
fmt::print("Saving new configuration file {}\n", fmt::UTF(path.u8string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
data["GUI"]["mw_width"] = m_window_size_W;
|
||||||
|
data["GUI"]["mw_height"] = m_window_size_H;
|
||||||
|
data["GUI"]["theme"] = mw_themes;
|
||||||
|
data["GUI"]["iconSize"] = m_icon_size;
|
||||||
|
data["GUI"]["sliderPos"] = m_slider_pos;
|
||||||
|
data["GUI"]["iconSizeGrid"] = m_icon_size_grid;
|
||||||
|
data["GUI"]["sliderPosGrid"] = m_slider_pos_grid;
|
||||||
|
data["GUI"]["gameTableMode"] = m_table_mode;
|
||||||
data["GUI"]["geometry_x"] = main_window_geometry_x;
|
data["GUI"]["geometry_x"] = main_window_geometry_x;
|
||||||
data["GUI"]["geometry_y"] = main_window_geometry_y;
|
data["GUI"]["geometry_y"] = main_window_geometry_y;
|
||||||
data["GUI"]["geometry_w"] = main_window_geometry_w;
|
data["GUI"]["geometry_w"] = main_window_geometry_w;
|
||||||
@ -697,9 +731,6 @@ void save(const std::filesystem::path& path) {
|
|||||||
data["GUI"]["pkgDirs"] = m_pkg_viewer;
|
data["GUI"]["pkgDirs"] = m_pkg_viewer;
|
||||||
data["GUI"]["elfDirs"] = m_elf_viewer;
|
data["GUI"]["elfDirs"] = m_elf_viewer;
|
||||||
data["GUI"]["recentFiles"] = m_recent_files;
|
data["GUI"]["recentFiles"] = m_recent_files;
|
||||||
data["GUI"]["emulatorLanguage"] = emulator_language;
|
|
||||||
|
|
||||||
data["Settings"]["consoleLanguage"] = m_language;
|
|
||||||
|
|
||||||
std::ofstream file(path, std::ios::binary);
|
std::ofstream file(path, std::ios::binary);
|
||||||
file << data;
|
file << data;
|
||||||
|
@ -13,6 +13,7 @@ enum HideCursorState : s16 { Never, Idle, Always };
|
|||||||
|
|
||||||
void load(const std::filesystem::path& path);
|
void load(const std::filesystem::path& path);
|
||||||
void save(const std::filesystem::path& path);
|
void save(const std::filesystem::path& path);
|
||||||
|
void saveMainWindow(const std::filesystem::path& path);
|
||||||
|
|
||||||
bool isNeoMode();
|
bool isNeoMode();
|
||||||
bool isFullscreenMode();
|
bool isFullscreenMode();
|
||||||
@ -67,6 +68,7 @@ void setNeoMode(bool enable);
|
|||||||
void setUserName(const std::string& type);
|
void setUserName(const std::string& type);
|
||||||
void setUpdateChannel(const std::string& type);
|
void setUpdateChannel(const std::string& type);
|
||||||
void setSeparateUpdateEnabled(bool use);
|
void setSeparateUpdateEnabled(bool use);
|
||||||
|
void setGameInstallDirs(const std::vector<std::filesystem::path>& settings_install_dirs_config);
|
||||||
|
|
||||||
void setCursorState(s16 cursorState);
|
void setCursorState(s16 cursorState);
|
||||||
void setCursorHideTimeout(int newcursorHideTimeout);
|
void setCursorHideTimeout(int newcursorHideTimeout);
|
||||||
|
@ -17,6 +17,8 @@ static inline bool IsProfilerConnected() {
|
|||||||
return tracy::GetProfiler().IsConnected();
|
return tracy::GetProfiler().IsConnected();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define TRACY_GPU_ENABLED 0
|
||||||
|
|
||||||
#define CUSTOM_LOCK(type, varname) \
|
#define CUSTOM_LOCK(type, varname) \
|
||||||
tracy::LockableCtx varname { \
|
tracy::LockableCtx varname { \
|
||||||
[]() -> const tracy::SourceLocationData* { \
|
[]() -> const tracy::SourceLocationData* { \
|
||||||
@ -57,3 +59,11 @@ enum MarkersPalette : int {
|
|||||||
tracy::SourceLocationData{nullptr, name, TracyFile, (uint32_t)TracyLine, 0};
|
tracy::SourceLocationData{nullptr, name, TracyFile, (uint32_t)TracyLine, 0};
|
||||||
|
|
||||||
#define FRAME_END FrameMark
|
#define FRAME_END FrameMark
|
||||||
|
|
||||||
|
#ifdef TRACY_FIBERS
|
||||||
|
#define FIBER_ENTER(name) TracyFiberEnter(name)
|
||||||
|
#define FIBER_EXIT TracyFiberLeave
|
||||||
|
#else
|
||||||
|
#define FIBER_ENTER(name)
|
||||||
|
#define FIBER_EXIT
|
||||||
|
#endif
|
||||||
|
@ -142,32 +142,34 @@ void DebugStateImpl::PushQueueDump(QueueDump dump) {
|
|||||||
frame.queues.push_back(std::move(dump));
|
frame.queues.push_back(std::move(dump));
|
||||||
}
|
}
|
||||||
|
|
||||||
void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
|
std::optional<RegDump*> DebugStateImpl::GetRegDump(uintptr_t base_addr, uintptr_t header_addr) {
|
||||||
const AmdGpu::Liverpool::Regs& regs, bool is_compute) {
|
|
||||||
std::scoped_lock lock{frame_dump_list_mutex};
|
|
||||||
const auto it = waiting_reg_dumps.find(header_addr);
|
const auto it = waiting_reg_dumps.find(header_addr);
|
||||||
if (it == waiting_reg_dumps.end()) {
|
if (it == waiting_reg_dumps.end()) {
|
||||||
return;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
auto& frame = *it->second;
|
auto& frame = *it->second;
|
||||||
waiting_reg_dumps.erase(it);
|
waiting_reg_dumps.erase(it);
|
||||||
waiting_reg_dumps_dbg.erase(waiting_reg_dumps_dbg.find(header_addr));
|
waiting_reg_dumps_dbg.erase(waiting_reg_dumps_dbg.find(header_addr));
|
||||||
auto& dump = frame.regs[header_addr - base_addr];
|
return &frame.regs[header_addr - base_addr];
|
||||||
dump.regs = regs;
|
}
|
||||||
if (is_compute) {
|
|
||||||
dump.is_compute = true;
|
void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
|
||||||
const auto& cs = dump.regs.cs_program;
|
const AmdGpu::Liverpool::Regs& regs) {
|
||||||
dump.cs_data = PipelineComputerProgramDump{
|
std::scoped_lock lock{frame_dump_list_mutex};
|
||||||
.cs_program = cs,
|
|
||||||
.code = std::vector<u32>{cs.Code().begin(), cs.Code().end()},
|
auto dump = GetRegDump(base_addr, header_addr);
|
||||||
};
|
if (!dump) {
|
||||||
} else {
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
(*dump)->regs = regs;
|
||||||
|
|
||||||
for (int i = 0; i < RegDump::MaxShaderStages; i++) {
|
for (int i = 0; i < RegDump::MaxShaderStages; i++) {
|
||||||
if (regs.stage_enable.IsStageEnabled(i)) {
|
if ((*dump)->regs.stage_enable.IsStageEnabled(i)) {
|
||||||
auto stage = regs.ProgramForStage(i);
|
auto stage = (*dump)->regs.ProgramForStage(i);
|
||||||
if (stage->address_lo != 0) {
|
if (stage->address_lo != 0) {
|
||||||
auto code = stage->Code();
|
auto code = stage->Code();
|
||||||
dump.stages[i] = PipelineShaderProgramDump{
|
(*dump)->stages[i] = PipelineShaderProgramDump{
|
||||||
.user_data = *stage,
|
.user_data = *stage,
|
||||||
.code = std::vector<u32>{code.begin(), code.end()},
|
.code = std::vector<u32>{code.begin(), code.end()},
|
||||||
};
|
};
|
||||||
@ -175,12 +177,31 @@ void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DebugStateImpl::PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr,
|
||||||
|
const CsState& cs_state) {
|
||||||
|
std::scoped_lock lock{frame_dump_list_mutex};
|
||||||
|
|
||||||
|
auto dump = GetRegDump(base_addr, header_addr);
|
||||||
|
if (!dump) {
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DebugStateImpl::CollectShader(const std::string& name, vk::ShaderModule module,
|
(*dump)->is_compute = true;
|
||||||
std::span<const u32> spv, std::span<const u32> raw_code,
|
auto& cs = (*dump)->regs.cs_program;
|
||||||
std::span<const u32> patch_spv, bool is_patched) {
|
cs = cs_state;
|
||||||
shader_dump_list.emplace_back(name, module, std::vector<u32>{spv.begin(), spv.end()},
|
|
||||||
|
(*dump)->cs_data = PipelineComputerProgramDump{
|
||||||
|
.cs_program = cs,
|
||||||
|
.code = std::vector<u32>{cs.Code().begin(), cs.Code().end()},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebugStateImpl::CollectShader(const std::string& name, Shader::LogicalStage l_stage,
|
||||||
|
vk::ShaderModule module, std::span<const u32> spv,
|
||||||
|
std::span<const u32> raw_code, std::span<const u32> patch_spv,
|
||||||
|
bool is_patched) {
|
||||||
|
shader_dump_list.emplace_back(name, l_stage, module, std::vector<u32>{spv.begin(), spv.end()},
|
||||||
std::vector<u32>{raw_code.begin(), raw_code.end()},
|
std::vector<u32>{raw_code.begin(), raw_code.end()},
|
||||||
std::vector<u32>{patch_spv.begin(), patch_spv.end()}, is_patched);
|
std::vector<u32>{patch_spv.begin(), patch_spv.end()}, is_patched);
|
||||||
}
|
}
|
||||||
|
@ -11,7 +11,6 @@
|
|||||||
#include <queue>
|
#include <queue>
|
||||||
|
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "video_core/amdgpu/liverpool.h"
|
|
||||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
@ -76,6 +75,7 @@ struct FrameDump {
|
|||||||
|
|
||||||
struct ShaderDump {
|
struct ShaderDump {
|
||||||
std::string name;
|
std::string name;
|
||||||
|
Shader::LogicalStage l_stage;
|
||||||
vk::ShaderModule module;
|
vk::ShaderModule module;
|
||||||
|
|
||||||
std::vector<u32> spv;
|
std::vector<u32> spv;
|
||||||
@ -90,16 +90,17 @@ struct ShaderDump {
|
|||||||
std::string cache_isa_disasm{};
|
std::string cache_isa_disasm{};
|
||||||
std::string cache_patch_disasm{};
|
std::string cache_patch_disasm{};
|
||||||
|
|
||||||
ShaderDump(std::string name, vk::ShaderModule module, std::vector<u32> spv,
|
ShaderDump(std::string name, Shader::LogicalStage l_stage, vk::ShaderModule module,
|
||||||
std::vector<u32> isa, std::vector<u32> patch_spv, bool is_patched)
|
std::vector<u32> spv, std::vector<u32> isa, std::vector<u32> patch_spv,
|
||||||
: name(std::move(name)), module(module), spv(std::move(spv)), isa(std::move(isa)),
|
bool is_patched)
|
||||||
patch_spv(std::move(patch_spv)), is_patched(is_patched) {}
|
: name(std::move(name)), l_stage(l_stage), module(module), spv(std::move(spv)),
|
||||||
|
isa(std::move(isa)), patch_spv(std::move(patch_spv)), is_patched(is_patched) {}
|
||||||
|
|
||||||
ShaderDump(const ShaderDump& other) = delete;
|
ShaderDump(const ShaderDump& other) = delete;
|
||||||
ShaderDump(ShaderDump&& other) noexcept
|
ShaderDump(ShaderDump&& other) noexcept
|
||||||
: name{std::move(other.name)}, module{std::move(other.module)}, spv{std::move(other.spv)},
|
: name{std::move(other.name)}, l_stage(other.l_stage), module{std::move(other.module)},
|
||||||
isa{std::move(other.isa)}, patch_spv{std::move(other.patch_spv)},
|
spv{std::move(other.spv)}, isa{std::move(other.isa)},
|
||||||
patch_source{std::move(other.patch_source)},
|
patch_spv{std::move(other.patch_spv)}, patch_source{std::move(other.patch_source)},
|
||||||
cache_spv_disasm{std::move(other.cache_spv_disasm)},
|
cache_spv_disasm{std::move(other.cache_spv_disasm)},
|
||||||
cache_isa_disasm{std::move(other.cache_isa_disasm)},
|
cache_isa_disasm{std::move(other.cache_isa_disasm)},
|
||||||
cache_patch_disasm{std::move(other.cache_patch_disasm)} {}
|
cache_patch_disasm{std::move(other.cache_patch_disasm)} {}
|
||||||
@ -108,6 +109,7 @@ struct ShaderDump {
|
|||||||
if (this == &other)
|
if (this == &other)
|
||||||
return *this;
|
return *this;
|
||||||
name = std::move(other.name);
|
name = std::move(other.name);
|
||||||
|
l_stage = other.l_stage;
|
||||||
module = std::move(other.module);
|
module = std::move(other.module);
|
||||||
spv = std::move(other.spv);
|
spv = std::move(other.spv);
|
||||||
isa = std::move(other.isa);
|
isa = std::move(other.isa);
|
||||||
@ -201,11 +203,17 @@ public:
|
|||||||
void PushQueueDump(QueueDump dump);
|
void PushQueueDump(QueueDump dump);
|
||||||
|
|
||||||
void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
|
void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
|
||||||
const AmdGpu::Liverpool::Regs& regs, bool is_compute = false);
|
const AmdGpu::Liverpool::Regs& regs);
|
||||||
|
using CsState = AmdGpu::Liverpool::ComputeProgram;
|
||||||
|
void PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr, const CsState& cs_state);
|
||||||
|
|
||||||
void CollectShader(const std::string& name, vk::ShaderModule module, std::span<const u32> spv,
|
void CollectShader(const std::string& name, Shader::LogicalStage l_stage,
|
||||||
|
vk::ShaderModule module, std::span<const u32> spv,
|
||||||
std::span<const u32> raw_code, std::span<const u32> patch_spv,
|
std::span<const u32> raw_code, std::span<const u32> patch_spv,
|
||||||
bool is_patched);
|
bool is_patched);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::optional<RegDump*> GetRegDump(uintptr_t base_addr, uintptr_t header_addr);
|
||||||
};
|
};
|
||||||
} // namespace DebugStateType
|
} // namespace DebugStateType
|
||||||
|
|
||||||
|
@ -158,16 +158,17 @@ bool ShaderList::Selection::DrawShader(DebugStateType::ShaderDump& value) {
|
|||||||
DebugState.ShowDebugMessage(msg);
|
DebugState.ShowDebugMessage(msg);
|
||||||
}
|
}
|
||||||
if (compile) {
|
if (compile) {
|
||||||
static std::map<std::string, std::string> stage_arg = {
|
static std::map<Shader::LogicalStage, std::string> stage_arg = {
|
||||||
{"vs", "vert"},
|
{Shader::LogicalStage::Vertex, "vert"},
|
||||||
{"gs", "geom"},
|
{Shader::LogicalStage::TessellationControl, "tesc"},
|
||||||
{"fs", "frag"},
|
{Shader::LogicalStage::TessellationEval, "tese"},
|
||||||
{"cs", "comp"},
|
{Shader::LogicalStage::Geometry, "geom"},
|
||||||
|
{Shader::LogicalStage::Fragment, "frag"},
|
||||||
|
{Shader::LogicalStage::Compute, "comp"},
|
||||||
};
|
};
|
||||||
auto stage = stage_arg.find(value.name.substr(0, 2));
|
auto stage = stage_arg.find(value.l_stage);
|
||||||
if (stage == stage_arg.end()) {
|
if (stage == stage_arg.end()) {
|
||||||
DebugState.ShowDebugMessage(std::string{"Invalid shader stage: "} +
|
DebugState.ShowDebugMessage(std::string{"Invalid shader stage"});
|
||||||
value.name.substr(0, 2));
|
|
||||||
} else {
|
} else {
|
||||||
std::string cmd =
|
std::string cmd =
|
||||||
fmt::format("glslc --target-env=vulkan1.3 --target-spv=spv1.6 "
|
fmt::format("glslc --target-env=vulkan1.3 --target-spv=spv1.6 "
|
||||||
|
@ -10,16 +10,28 @@
|
|||||||
|
|
||||||
namespace Core::FileSys {
|
namespace Core::FileSys {
|
||||||
|
|
||||||
|
std::string RemoveTrailingSlashes(const std::string& path) {
|
||||||
|
// Remove trailing slashes to make comparisons simpler.
|
||||||
|
std::string path_sanitized = path;
|
||||||
|
while (path_sanitized.ends_with("/")) {
|
||||||
|
path_sanitized.pop_back();
|
||||||
|
}
|
||||||
|
return path_sanitized;
|
||||||
|
}
|
||||||
|
|
||||||
void MntPoints::Mount(const std::filesystem::path& host_folder, const std::string& guest_folder,
|
void MntPoints::Mount(const std::filesystem::path& host_folder, const std::string& guest_folder,
|
||||||
bool read_only) {
|
bool read_only) {
|
||||||
std::scoped_lock lock{m_mutex};
|
std::scoped_lock lock{m_mutex};
|
||||||
m_mnt_pairs.emplace_back(host_folder, guest_folder, read_only);
|
const auto guest_folder_sanitized = RemoveTrailingSlashes(guest_folder);
|
||||||
|
m_mnt_pairs.emplace_back(host_folder, guest_folder_sanitized, read_only);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MntPoints::Unmount(const std::filesystem::path& host_folder, const std::string& guest_folder) {
|
void MntPoints::Unmount(const std::filesystem::path& host_folder, const std::string& guest_folder) {
|
||||||
std::scoped_lock lock{m_mutex};
|
std::scoped_lock lock{m_mutex};
|
||||||
auto it = std::remove_if(m_mnt_pairs.begin(), m_mnt_pairs.end(),
|
const auto guest_folder_sanitized = RemoveTrailingSlashes(guest_folder);
|
||||||
[&](const MntPair& pair) { return pair.mount == guest_folder; });
|
auto it = std::remove_if(m_mnt_pairs.begin(), m_mnt_pairs.end(), [&](const MntPair& pair) {
|
||||||
|
return pair.mount == guest_folder_sanitized;
|
||||||
|
});
|
||||||
m_mnt_pairs.erase(it, m_mnt_pairs.end());
|
m_mnt_pairs.erase(it, m_mnt_pairs.end());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -47,7 +59,8 @@ std::filesystem::path MntPoints::GetHostPath(std::string_view path, bool* is_rea
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Nothing to do if getting the mount itself.
|
// Nothing to do if getting the mount itself.
|
||||||
if (corrected_path == mount->mount) {
|
const auto corrected_path_sanitized = RemoveTrailingSlashes(corrected_path);
|
||||||
|
if (corrected_path_sanitized == mount->mount) {
|
||||||
return mount->host_path;
|
return mount->host_path;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -186,4 +199,14 @@ void HandleTable::CreateStdHandles() {
|
|||||||
setup("/dev/stderr", new Devices::Logger("stderr", true)); // stderr
|
setup("/dev/stderr", new Devices::Logger("stderr", true)); // stderr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int HandleTable::GetFileDescriptor(File* file) {
|
||||||
|
std::scoped_lock lock{m_mutex};
|
||||||
|
auto it = std::find(m_files.begin(), m_files.end(), file);
|
||||||
|
|
||||||
|
if (it != m_files.end()) {
|
||||||
|
return std::distance(m_files.begin(), it);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Core::FileSys
|
} // namespace Core::FileSys
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include <tsl/robin_map.h>
|
#include <tsl/robin_map.h>
|
||||||
#include "common/io_file.h"
|
#include "common/io_file.h"
|
||||||
|
#include "common/logging/formatter.h"
|
||||||
#include "core/devices/base_device.h"
|
#include "core/devices/base_device.h"
|
||||||
|
|
||||||
namespace Core::FileSys {
|
namespace Core::FileSys {
|
||||||
@ -22,7 +23,7 @@ class MntPoints {
|
|||||||
public:
|
public:
|
||||||
struct MntPair {
|
struct MntPair {
|
||||||
std::filesystem::path host_path;
|
std::filesystem::path host_path;
|
||||||
std::string mount; // e.g /app0/
|
std::string mount; // e.g /app0
|
||||||
bool read_only;
|
bool read_only;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -37,10 +38,21 @@ public:
|
|||||||
std::filesystem::path GetHostPath(std::string_view guest_directory,
|
std::filesystem::path GetHostPath(std::string_view guest_directory,
|
||||||
bool* is_read_only = nullptr);
|
bool* is_read_only = nullptr);
|
||||||
|
|
||||||
|
const MntPair* GetMountFromHostPath(const std::string& host_path) {
|
||||||
|
std::scoped_lock lock{m_mutex};
|
||||||
|
const auto it = std::ranges::find_if(m_mnt_pairs, [&](const MntPair& mount) {
|
||||||
|
return host_path.starts_with(std::string{fmt::UTF(mount.host_path.u8string()).data});
|
||||||
|
});
|
||||||
|
return it == m_mnt_pairs.end() ? nullptr : &*it;
|
||||||
|
}
|
||||||
|
|
||||||
const MntPair* GetMount(const std::string& guest_path) {
|
const MntPair* GetMount(const std::string& guest_path) {
|
||||||
std::scoped_lock lock{m_mutex};
|
std::scoped_lock lock{m_mutex};
|
||||||
const auto it = std::ranges::find_if(
|
const auto it = std::ranges::find_if(m_mnt_pairs, [&](const auto& mount) {
|
||||||
m_mnt_pairs, [&](const auto& mount) { return guest_path.starts_with(mount.mount); });
|
// When doing starts-with check, add a trailing slash to make sure we don't match
|
||||||
|
// against only part of the mount path.
|
||||||
|
return guest_path == mount.mount || guest_path.starts_with(mount.mount + "/");
|
||||||
|
});
|
||||||
return it == m_mnt_pairs.end() ? nullptr : &*it;
|
return it == m_mnt_pairs.end() ? nullptr : &*it;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -83,6 +95,7 @@ public:
|
|||||||
void DeleteHandle(int d);
|
void DeleteHandle(int d);
|
||||||
File* GetFile(int d);
|
File* GetFile(int d);
|
||||||
File* GetFile(const std::filesystem::path& host_name);
|
File* GetFile(const std::filesystem::path& host_name);
|
||||||
|
int GetFileDescriptor(File* file);
|
||||||
|
|
||||||
void CreateStdHandles();
|
void CreateStdHandles();
|
||||||
|
|
||||||
|
@ -80,7 +80,7 @@ int PS4_SYSV_ABI sceAudio3dPortGetAttributesSupported(OrbisAudio3dPortId uiPortI
|
|||||||
|
|
||||||
int PS4_SYSV_ABI sceAudio3dPortGetQueueLevel(OrbisAudio3dPortId uiPortId, u32* pQueueLevel,
|
int PS4_SYSV_ABI sceAudio3dPortGetQueueLevel(OrbisAudio3dPortId uiPortId, u32* pQueueLevel,
|
||||||
u32* pQueueAvailable) {
|
u32* pQueueAvailable) {
|
||||||
LOG_INFO(Lib_Audio3d, "uiPortId = {}", uiPortId);
|
LOG_TRACE(Lib_Audio3d, "uiPortId = {}", uiPortId);
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -296,17 +296,12 @@ static_assert(CtxInitSequence400.size() == 0x61);
|
|||||||
// In case if `submitDone` is issued we need to block submissions until GPU idle
|
// In case if `submitDone` is issued we need to block submissions until GPU idle
|
||||||
static u32 submission_lock{};
|
static u32 submission_lock{};
|
||||||
std::condition_variable cv_lock{};
|
std::condition_variable cv_lock{};
|
||||||
static std::mutex m_submission{};
|
std::mutex m_submission{};
|
||||||
static u64 frames_submitted{}; // frame counter
|
static u64 frames_submitted{}; // frame counter
|
||||||
static bool send_init_packet{true}; // initialize HW state before first game's submit in a frame
|
static bool send_init_packet{true}; // initialize HW state before first game's submit in a frame
|
||||||
static int sdk_version{0};
|
static int sdk_version{0};
|
||||||
|
|
||||||
struct AscQueueInfo {
|
static u32 asc_next_offs_dw[Liverpool::NumComputeRings];
|
||||||
VAddr map_addr;
|
|
||||||
u32* read_addr;
|
|
||||||
u32 ring_size_dw;
|
|
||||||
};
|
|
||||||
static Common::SlotVector<AscQueueInfo> asc_queues{};
|
|
||||||
static constexpr VAddr tessellation_factors_ring_addr = Core::SYSTEM_RESERVED_MAX - 0xFFFFFFF;
|
static constexpr VAddr tessellation_factors_ring_addr = Core::SYSTEM_RESERVED_MAX - 0xFFFFFFF;
|
||||||
static constexpr u32 tessellation_offchip_buffer_size = 0x800000u;
|
static constexpr u32 tessellation_offchip_buffer_size = 0x800000u;
|
||||||
|
|
||||||
@ -493,6 +488,7 @@ int PS4_SYSV_ABI sceGnmDestroyWorkloadStream() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
|
void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
|
||||||
|
HLE_TRACE;
|
||||||
LOG_DEBUG(Lib_GnmDriver, "vqid {}, offset_dw {}", gnm_vqid, next_offs_dw);
|
LOG_DEBUG(Lib_GnmDriver, "vqid {}, offset_dw {}", gnm_vqid, next_offs_dw);
|
||||||
|
|
||||||
if (gnm_vqid == 0) {
|
if (gnm_vqid == 0) {
|
||||||
@ -506,11 +502,19 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto vqid = gnm_vqid - 1;
|
auto vqid = gnm_vqid - 1;
|
||||||
auto& asc_queue = asc_queues[{vqid}];
|
auto& asc_queue = liverpool->asc_queues[{vqid}];
|
||||||
const auto* acb_ptr = reinterpret_cast<const u32*>(asc_queue.map_addr + *asc_queue.read_addr);
|
|
||||||
const auto acb_size = next_offs_dw ? (next_offs_dw << 2u) - *asc_queue.read_addr
|
const auto& offs_dw = asc_next_offs_dw[vqid];
|
||||||
: (asc_queue.ring_size_dw << 2u) - *asc_queue.read_addr;
|
|
||||||
const std::span acb_span{acb_ptr, acb_size >> 2u};
|
if (next_offs_dw < offs_dw) {
|
||||||
|
ASSERT_MSG(next_offs_dw == 0, "ACB submission is split at the end of ring buffer");
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto* acb_ptr = reinterpret_cast<const u32*>(asc_queue.map_addr) + offs_dw;
|
||||||
|
const auto acb_size_dw = (next_offs_dw ? next_offs_dw : asc_queue.ring_size_dw) - offs_dw;
|
||||||
|
const std::span acb_span{acb_ptr, acb_size_dw};
|
||||||
|
|
||||||
|
asc_next_offs_dw[vqid] = next_offs_dw;
|
||||||
|
|
||||||
if (DebugState.DumpingCurrentFrame()) {
|
if (DebugState.DumpingCurrentFrame()) {
|
||||||
static auto last_frame_num = -1LL;
|
static auto last_frame_num = -1LL;
|
||||||
@ -545,9 +549,6 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
liverpool->SubmitAsc(gnm_vqid, acb_span);
|
liverpool->SubmitAsc(gnm_vqid, acb_span);
|
||||||
|
|
||||||
*asc_queue.read_addr += acb_size;
|
|
||||||
*asc_queue.read_addr %= asc_queue.ring_size_dw * 4;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void PS4_SYSV_ABI sceGnmDingDongForWorkload(u32 gnm_vqid, u32 next_offs_dw, u64 workload_id) {
|
void PS4_SYSV_ABI sceGnmDingDongForWorkload(u32 gnm_vqid, u32 next_offs_dw, u64 workload_id) {
|
||||||
@ -971,7 +972,7 @@ s32 PS4_SYSV_ABI sceGnmFindResourcesPublic() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void PS4_SYSV_ABI sceGnmFlushGarlic() {
|
void PS4_SYSV_ABI sceGnmFlushGarlic() {
|
||||||
LOG_WARNING(Lib_GnmDriver, "(STUBBED) called");
|
LOG_TRACE(Lib_GnmDriver, "(STUBBED) called");
|
||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceGnmGetCoredumpAddress() {
|
int PS4_SYSV_ABI sceGnmGetCoredumpAddress() {
|
||||||
@ -1266,12 +1267,16 @@ int PS4_SYSV_ABI sceGnmMapComputeQueue(u32 pipe_id, u32 queue_id, VAddr ring_bas
|
|||||||
return ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_READ_PTR_ADDR;
|
return ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_READ_PTR_ADDR;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto vqid = asc_queues.insert(VAddr(ring_base_addr), read_ptr_addr, ring_size_dw);
|
const auto vqid =
|
||||||
|
liverpool->asc_queues.insert(VAddr(ring_base_addr), read_ptr_addr, ring_size_dw, pipe_id);
|
||||||
// We need to offset index as `dingDong` assumes it to be from the range [1..64]
|
// We need to offset index as `dingDong` assumes it to be from the range [1..64]
|
||||||
const auto gnm_vqid = vqid.index + 1;
|
const auto gnm_vqid = vqid.index + 1;
|
||||||
LOG_INFO(Lib_GnmDriver, "ASC pipe {} queue {} mapped to vqueue {}", pipe_id, queue_id,
|
LOG_INFO(Lib_GnmDriver, "ASC pipe {} queue {} mapped to vqueue {}", pipe_id, queue_id,
|
||||||
gnm_vqid);
|
gnm_vqid);
|
||||||
|
|
||||||
|
const auto& queue = liverpool->asc_queues[vqid];
|
||||||
|
*queue.read_addr = 0u;
|
||||||
|
|
||||||
return gnm_vqid;
|
return gnm_vqid;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1642,7 +1647,6 @@ s32 PS4_SYSV_ABI sceGnmSetGsShader(u32* cmdbuf, u32 size, const u32* gs_regs) {
|
|||||||
|
|
||||||
s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u32 param4) {
|
s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u32 param4) {
|
||||||
LOG_TRACE(Lib_GnmDriver, "called");
|
LOG_TRACE(Lib_GnmDriver, "called");
|
||||||
|
|
||||||
if (!cmdbuf || size < 0x1E) {
|
if (!cmdbuf || size < 0x1E) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -1660,11 +1664,13 @@ s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u3
|
|||||||
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x108u, hs_regs[0], 0u); // SPI_SHADER_PGM_LO_HS
|
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x108u, hs_regs[0], 0u); // SPI_SHADER_PGM_LO_HS
|
||||||
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x10au, hs_regs[2],
|
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x10au, hs_regs[2],
|
||||||
hs_regs[3]); // SPI_SHADER_PGM_RSRC1_HS/SPI_SHADER_PGM_RSRC2_HS
|
hs_regs[3]); // SPI_SHADER_PGM_RSRC1_HS/SPI_SHADER_PGM_RSRC2_HS
|
||||||
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u, hs_regs[5],
|
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u,
|
||||||
hs_regs[5]); // VGT_HOS_MAX_TESS_LEVEL
|
hs_regs[5], // VGT_HOS_MAX_TESS_LEVEL
|
||||||
|
hs_regs[6]); // VGT_HOS_MIN_TESS_LEVEL
|
||||||
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2dbu, hs_regs[4]); // VGT_TF_PARAM
|
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2dbu, hs_regs[4]); // VGT_TF_PARAM
|
||||||
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2d6u, param4); // VGT_LS_HS_CONFIG
|
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2d6u, param4); // VGT_LS_HS_CONFIG
|
||||||
|
|
||||||
|
// right padding?
|
||||||
WriteTrailingNop<11>(cmdbuf);
|
WriteTrailingNop<11>(cmdbuf);
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
@ -2161,6 +2167,7 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(u32 workload, u32 count,
|
|||||||
u32* dcb_sizes_in_bytes,
|
u32* dcb_sizes_in_bytes,
|
||||||
const u32* ccb_gpu_addrs[],
|
const u32* ccb_gpu_addrs[],
|
||||||
u32* ccb_sizes_in_bytes) {
|
u32* ccb_sizes_in_bytes) {
|
||||||
|
HLE_TRACE;
|
||||||
LOG_DEBUG(Lib_GnmDriver, "called");
|
LOG_DEBUG(Lib_GnmDriver, "called");
|
||||||
|
|
||||||
if (!dcb_gpu_addrs || !dcb_sizes_in_bytes) {
|
if (!dcb_gpu_addrs || !dcb_sizes_in_bytes) {
|
||||||
@ -2253,6 +2260,7 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[
|
|||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceGnmSubmitDone() {
|
int PS4_SYSV_ABI sceGnmSubmitDone() {
|
||||||
|
HLE_TRACE;
|
||||||
LOG_DEBUG(Lib_GnmDriver, "called");
|
LOG_DEBUG(Lib_GnmDriver, "called");
|
||||||
WaitGpuIdle();
|
WaitGpuIdle();
|
||||||
if (!liverpool->IsGpuIdle()) {
|
if (!liverpool->IsGpuIdle()) {
|
||||||
|
@ -695,12 +695,66 @@ static int GetDents(int fd, char* buf, int nbytes, s64* basep) {
|
|||||||
return sizeof(OrbisKernelDirent);
|
return sizeof(OrbisKernelDirent);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int HandleSeparateUpdateDents(int fd, char* buf, int nbytes, s64* basep) {
|
||||||
|
int dir_entries = 0;
|
||||||
|
|
||||||
|
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
|
||||||
|
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
|
||||||
|
auto* file = h->GetFile(fd);
|
||||||
|
auto update_dir_name = std::string{fmt::UTF(file->m_host_name.u8string()).data};
|
||||||
|
auto mount = mnt->GetMountFromHostPath(update_dir_name);
|
||||||
|
auto suffix = std::string{fmt::UTF(mount->host_path.u8string()).data};
|
||||||
|
|
||||||
|
size_t pos = update_dir_name.find("-UPDATE");
|
||||||
|
if (pos != std::string::npos) {
|
||||||
|
update_dir_name.erase(pos, 7);
|
||||||
|
auto guest_name = mount->mount + "/" + update_dir_name.substr(suffix.size() + 1);
|
||||||
|
int descriptor;
|
||||||
|
|
||||||
|
auto existent_folder = h->GetFile(update_dir_name);
|
||||||
|
if (!existent_folder) {
|
||||||
|
u32 handle = h->CreateHandle();
|
||||||
|
auto* new_file = h->GetFile(handle);
|
||||||
|
new_file->type = Core::FileSys::FileType::Directory;
|
||||||
|
new_file->m_guest_name = guest_name;
|
||||||
|
new_file->m_host_name = update_dir_name;
|
||||||
|
if (!std::filesystem::is_directory(new_file->m_host_name)) {
|
||||||
|
h->DeleteHandle(handle);
|
||||||
|
return dir_entries;
|
||||||
|
} else {
|
||||||
|
new_file->dirents = GetDirectoryEntries(new_file->m_host_name);
|
||||||
|
new_file->dirents_index = 0;
|
||||||
|
}
|
||||||
|
new_file->is_opened = true;
|
||||||
|
descriptor = h->GetFileDescriptor(new_file);
|
||||||
|
} else {
|
||||||
|
descriptor = h->GetFileDescriptor(existent_folder);
|
||||||
|
}
|
||||||
|
|
||||||
|
dir_entries = GetDents(descriptor, buf, nbytes, basep);
|
||||||
|
if (dir_entries == ORBIS_OK && existent_folder) {
|
||||||
|
existent_folder->dirents_index = 0;
|
||||||
|
file->dirents_index = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return dir_entries;
|
||||||
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceKernelGetdents(int fd, char* buf, int nbytes) {
|
int PS4_SYSV_ABI sceKernelGetdents(int fd, char* buf, int nbytes) {
|
||||||
return GetDents(fd, buf, nbytes, nullptr);
|
int a = GetDents(fd, buf, nbytes, nullptr);
|
||||||
|
if (a == ORBIS_OK) {
|
||||||
|
return HandleSeparateUpdateDents(fd, buf, nbytes, nullptr);
|
||||||
|
}
|
||||||
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceKernelGetdirentries(int fd, char* buf, int nbytes, s64* basep) {
|
int PS4_SYSV_ABI sceKernelGetdirentries(int fd, char* buf, int nbytes, s64* basep) {
|
||||||
return GetDents(fd, buf, nbytes, basep);
|
int a = GetDents(fd, buf, nbytes, basep);
|
||||||
|
if (a == ORBIS_OK) {
|
||||||
|
return HandleSeparateUpdateDents(fd, buf, nbytes, basep);
|
||||||
|
}
|
||||||
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
s64 PS4_SYSV_ABI sceKernelPwrite(int d, void* buf, size_t nbytes, s64 offset) {
|
s64 PS4_SYSV_ABI sceKernelPwrite(int d, void* buf, size_t nbytes, s64 offset) {
|
||||||
|
@ -50,6 +50,9 @@ s32 PS4_SYSV_ABI sceKernelLoadStartModule(const char* moduleFileName, size_t arg
|
|||||||
return handle;
|
return handle;
|
||||||
}
|
}
|
||||||
handle = linker->LoadModule(path, true);
|
handle = linker->LoadModule(path, true);
|
||||||
|
if (handle == -1) {
|
||||||
|
return ORBIS_KERNEL_ERROR_ESRCH;
|
||||||
|
}
|
||||||
auto* module = linker->GetModule(handle);
|
auto* module = linker->GetModule(handle);
|
||||||
linker->RelocateAnyImports(module);
|
linker->RelocateAnyImports(module);
|
||||||
|
|
||||||
|
@ -327,7 +327,8 @@ void PS4_SYSV_ABI sched_yield() {
|
|||||||
std::this_thread::yield();
|
std::this_thread::yield();
|
||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI posix_pthread_once(PthreadOnce* once_control, void (*init_routine)()) {
|
int PS4_SYSV_ABI posix_pthread_once(PthreadOnce* once_control,
|
||||||
|
void PS4_SYSV_ABI (*init_routine)()) {
|
||||||
for (;;) {
|
for (;;) {
|
||||||
auto state = once_control->state.load();
|
auto state = once_control->state.load();
|
||||||
if (state == PthreadOnceState::Done) {
|
if (state == PthreadOnceState::Done) {
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
@ -65,6 +66,15 @@ char* PS4_SYSV_ABI internal_strncpy(char* dest, const char* src, std::size_t cou
|
|||||||
return std::strncpy(dest, src, count);
|
return std::strncpy(dest, src, count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int PS4_SYSV_ABI internal_strncpy_s(char* dest, size_t destsz, const char* src, size_t count) {
|
||||||
|
#ifdef _WIN64
|
||||||
|
return strncpy_s(dest, destsz, src, count);
|
||||||
|
#else
|
||||||
|
std::strcpy(dest, src);
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
char* PS4_SYSV_ABI internal_strcat(char* dest, const char* src) {
|
char* PS4_SYSV_ABI internal_strcat(char* dest, const char* src) {
|
||||||
return std::strcat(dest, src);
|
return std::strcat(dest, src);
|
||||||
}
|
}
|
||||||
@ -237,6 +247,8 @@ void RegisterlibSceLibcInternal(Core::Loader::SymbolsResolver* sym) {
|
|||||||
internal_strlen);
|
internal_strlen);
|
||||||
LIB_FUNCTION("6sJWiWSRuqk", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1,
|
LIB_FUNCTION("6sJWiWSRuqk", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1,
|
||||||
internal_strncpy);
|
internal_strncpy);
|
||||||
|
LIB_FUNCTION("YNzNkJzYqEg", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1,
|
||||||
|
internal_strncpy_s);
|
||||||
LIB_FUNCTION("Ls4tzzhimqQ", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1,
|
LIB_FUNCTION("Ls4tzzhimqQ", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1,
|
||||||
internal_strcat);
|
internal_strcat);
|
||||||
LIB_FUNCTION("ob5xAW4ln-0", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1,
|
LIB_FUNCTION("ob5xAW4ln-0", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1,
|
||||||
|
@ -38,21 +38,22 @@ void TrophyUI::Finish() {
|
|||||||
void TrophyUI::Draw() {
|
void TrophyUI::Draw() {
|
||||||
const auto& io = GetIO();
|
const auto& io = GetIO();
|
||||||
|
|
||||||
|
float AdjustWidth = io.DisplaySize.x / 1280;
|
||||||
|
float AdjustHeight = io.DisplaySize.y / 720;
|
||||||
const ImVec2 window_size{
|
const ImVec2 window_size{
|
||||||
std::min(io.DisplaySize.x, 250.f),
|
std::min(io.DisplaySize.x, (300 * AdjustWidth)),
|
||||||
std::min(io.DisplaySize.y, 70.f),
|
std::min(io.DisplaySize.y, (70 * AdjustHeight)),
|
||||||
};
|
};
|
||||||
|
|
||||||
SetNextWindowSize(window_size);
|
SetNextWindowSize(window_size);
|
||||||
SetNextWindowCollapsed(false);
|
SetNextWindowCollapsed(false);
|
||||||
SetNextWindowPos(ImVec2(io.DisplaySize.x - 250, 50));
|
SetNextWindowPos(ImVec2(io.DisplaySize.x - (300 * AdjustWidth), (50 * AdjustHeight)));
|
||||||
KeepNavHighlight();
|
KeepNavHighlight();
|
||||||
|
|
||||||
if (Begin("Trophy Window", nullptr,
|
if (Begin("Trophy Window", nullptr,
|
||||||
ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_NoSavedSettings |
|
ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_NoSavedSettings |
|
||||||
ImGuiWindowFlags_NoInputs)) {
|
ImGuiWindowFlags_NoInputs)) {
|
||||||
if (trophy_icon) {
|
if (trophy_icon) {
|
||||||
Image(trophy_icon.GetTexture().im_id, ImVec2(50, 50));
|
Image(trophy_icon.GetTexture().im_id, ImVec2((50 * AdjustWidth), (50 * AdjustHeight)));
|
||||||
ImGui::SameLine();
|
ImGui::SameLine();
|
||||||
} else {
|
} else {
|
||||||
// placeholder
|
// placeholder
|
||||||
@ -61,6 +62,7 @@ void TrophyUI::Draw() {
|
|||||||
GetColorU32(ImVec4{0.7f}));
|
GetColorU32(ImVec4{0.7f}));
|
||||||
ImGui::Indent(60);
|
ImGui::Indent(60);
|
||||||
}
|
}
|
||||||
|
SetWindowFontScale((1.2 * AdjustHeight));
|
||||||
TextWrapped("Trophy earned!\n%s", trophy_name.c_str());
|
TextWrapped("Trophy earned!\n%s", trophy_name.c_str());
|
||||||
}
|
}
|
||||||
End();
|
End();
|
||||||
|
@ -155,6 +155,9 @@ int PS4_SYSV_ABI scePadGetFeatureReport() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI scePadGetHandle(s32 userId, s32 type, s32 index) {
|
int PS4_SYSV_ABI scePadGetHandle(s32 userId, s32 type, s32 index) {
|
||||||
|
if (userId == -1) {
|
||||||
|
return ORBIS_PAD_ERROR_DEVICE_NO_HANDLE;
|
||||||
|
}
|
||||||
LOG_DEBUG(Lib_Pad, "(DUMMY) called");
|
LOG_DEBUG(Lib_Pad, "(DUMMY) called");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -246,6 +249,9 @@ int PS4_SYSV_ABI scePadMbusTerm() {
|
|||||||
|
|
||||||
int PS4_SYSV_ABI scePadOpen(s32 userId, s32 type, s32 index, const OrbisPadOpenParam* pParam) {
|
int PS4_SYSV_ABI scePadOpen(s32 userId, s32 type, s32 index, const OrbisPadOpenParam* pParam) {
|
||||||
LOG_INFO(Lib_Pad, "(DUMMY) called user_id = {} type = {} index = {}", userId, type, index);
|
LOG_INFO(Lib_Pad, "(DUMMY) called user_id = {} type = {} index = {}", userId, type, index);
|
||||||
|
if (userId == -1) {
|
||||||
|
return ORBIS_PAD_ERROR_DEVICE_NO_HANDLE;
|
||||||
|
}
|
||||||
if (Config::getUseSpecialPad()) {
|
if (Config::getUseSpecialPad()) {
|
||||||
if (type != ORBIS_PAD_PORT_TYPE_SPECIAL)
|
if (type != ORBIS_PAD_PORT_TYPE_SPECIAL)
|
||||||
return ORBIS_PAD_ERROR_DEVICE_NOT_CONNECTED;
|
return ORBIS_PAD_ERROR_DEVICE_NOT_CONNECTED;
|
||||||
@ -346,6 +352,9 @@ int PS4_SYSV_ABI scePadReadHistory() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI scePadReadState(s32 handle, OrbisPadData* pData) {
|
int PS4_SYSV_ABI scePadReadState(s32 handle, OrbisPadData* pData) {
|
||||||
|
if (handle == ORBIS_PAD_ERROR_DEVICE_NO_HANDLE) {
|
||||||
|
return ORBIS_PAD_ERROR_INVALID_HANDLE;
|
||||||
|
}
|
||||||
auto* controller = Common::Singleton<Input::GameController>::Instance();
|
auto* controller = Common::Singleton<Input::GameController>::Instance();
|
||||||
int connectedCount = 0;
|
int connectedCount = 0;
|
||||||
bool isConnected = false;
|
bool isConnected = false;
|
||||||
|
@ -137,7 +137,7 @@ s32 PS4_SYSV_ABI scePlayGoGetLanguageMask(OrbisPlayGoHandle handle,
|
|||||||
|
|
||||||
s32 PS4_SYSV_ABI scePlayGoGetLocus(OrbisPlayGoHandle handle, const OrbisPlayGoChunkId* chunkIds,
|
s32 PS4_SYSV_ABI scePlayGoGetLocus(OrbisPlayGoHandle handle, const OrbisPlayGoChunkId* chunkIds,
|
||||||
uint32_t numberOfEntries, OrbisPlayGoLocus* outLoci) {
|
uint32_t numberOfEntries, OrbisPlayGoLocus* outLoci) {
|
||||||
LOG_INFO(Lib_PlayGo, "called handle = {}, chunkIds = {}, numberOfEntries = {}", handle,
|
LOG_DEBUG(Lib_PlayGo, "called handle = {}, chunkIds = {}, numberOfEntries = {}", handle,
|
||||||
*chunkIds, numberOfEntries);
|
*chunkIds, numberOfEntries);
|
||||||
|
|
||||||
if (handle != PlaygoHandle) {
|
if (handle != PlaygoHandle) {
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include <set>
|
||||||
#include <fmt/core.h>
|
#include <fmt/core.h>
|
||||||
|
|
||||||
#include "common/config.h"
|
#include "common/config.h"
|
||||||
@ -100,15 +101,17 @@ Emulator::Emulator() {
|
|||||||
|
|
||||||
Emulator::~Emulator() {
|
Emulator::~Emulator() {
|
||||||
const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir);
|
const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir);
|
||||||
Config::save(config_dir / "config.toml");
|
Config::saveMainWindow(config_dir / "config.toml");
|
||||||
}
|
}
|
||||||
|
|
||||||
void Emulator::Run(const std::filesystem::path& file) {
|
void Emulator::Run(const std::filesystem::path& file) {
|
||||||
|
|
||||||
// Use the eboot from the separated updates folder if it's there
|
// Use the eboot from the separated updates folder if it's there
|
||||||
std::filesystem::path game_patch_folder = file.parent_path().concat("-UPDATE");
|
std::filesystem::path game_patch_folder = file.parent_path();
|
||||||
bool use_game_patch = std::filesystem::exists(game_patch_folder / "sce_sys");
|
game_patch_folder += "-UPDATE";
|
||||||
std::filesystem::path eboot_path = use_game_patch ? game_patch_folder / file.filename() : file;
|
std::filesystem::path eboot_path = std::filesystem::exists(game_patch_folder / file.filename())
|
||||||
|
? game_patch_folder / file.filename()
|
||||||
|
: file;
|
||||||
|
|
||||||
// Applications expect to be run from /app0 so mount the file's parent path as app0.
|
// Applications expect to be run from /app0 so mount the file's parent path as app0.
|
||||||
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
|
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
|
||||||
@ -226,19 +229,36 @@ void Emulator::Run(const std::filesystem::path& file) {
|
|||||||
LoadSystemModules(eboot_path, game_info.game_serial);
|
LoadSystemModules(eboot_path, game_info.game_serial);
|
||||||
|
|
||||||
// Load all prx from game's sce_module folder
|
// Load all prx from game's sce_module folder
|
||||||
std::filesystem::path sce_module_folder = file.parent_path() / "sce_module";
|
std::vector<std::filesystem::path> modules_to_load;
|
||||||
if (std::filesystem::is_directory(sce_module_folder)) {
|
std::filesystem::path game_module_folder = file.parent_path() / "sce_module";
|
||||||
for (const auto& entry : std::filesystem::directory_iterator(sce_module_folder)) {
|
if (std::filesystem::is_directory(game_module_folder)) {
|
||||||
std::filesystem::path module_path = entry.path();
|
for (const auto& entry : std::filesystem::directory_iterator(game_module_folder)) {
|
||||||
std::filesystem::path update_module_path =
|
if (entry.is_regular_file()) {
|
||||||
eboot_path.parent_path() / "sce_module" / entry.path().filename();
|
modules_to_load.push_back(entry.path());
|
||||||
if (std::filesystem::exists(update_module_path) && use_game_patch) {
|
|
||||||
module_path = update_module_path;
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load all prx from separate update's sce_module folder
|
||||||
|
std::filesystem::path update_module_folder = game_patch_folder / "sce_module";
|
||||||
|
if (std::filesystem::is_directory(update_module_folder)) {
|
||||||
|
for (const auto& entry : std::filesystem::directory_iterator(update_module_folder)) {
|
||||||
|
auto it = std::find_if(modules_to_load.begin(), modules_to_load.end(),
|
||||||
|
[&entry](const std::filesystem::path& p) {
|
||||||
|
return p.filename() == entry.path().filename();
|
||||||
|
});
|
||||||
|
if (it != modules_to_load.end()) {
|
||||||
|
*it = entry.path();
|
||||||
|
} else {
|
||||||
|
modules_to_load.push_back(entry.path());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto& module_path : modules_to_load) {
|
||||||
LOG_INFO(Loader, "Loading {}", fmt::UTF(module_path.u8string()));
|
LOG_INFO(Loader, "Loading {}", fmt::UTF(module_path.u8string()));
|
||||||
linker->LoadModule(module_path);
|
linker->LoadModule(module_path);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef ENABLE_DISCORD_RPC
|
#ifdef ENABLE_DISCORD_RPC
|
||||||
// Discord RPC
|
// Discord RPC
|
||||||
@ -266,7 +286,7 @@ void Emulator::Run(const std::filesystem::path& file) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Emulator::LoadSystemModules(const std::filesystem::path& file, std::string game_serial) {
|
void Emulator::LoadSystemModules(const std::filesystem::path& file, std::string game_serial) {
|
||||||
constexpr std::array<SysModules, 10> ModulesToLoad{
|
constexpr std::array<SysModules, 13> ModulesToLoad{
|
||||||
{{"libSceNgs2.sprx", &Libraries::Ngs2::RegisterlibSceNgs2},
|
{{"libSceNgs2.sprx", &Libraries::Ngs2::RegisterlibSceNgs2},
|
||||||
{"libSceFiber.sprx", &Libraries::Fiber::RegisterlibSceFiber},
|
{"libSceFiber.sprx", &Libraries::Fiber::RegisterlibSceFiber},
|
||||||
{"libSceUlt.sprx", nullptr},
|
{"libSceUlt.sprx", nullptr},
|
||||||
@ -276,7 +296,10 @@ void Emulator::LoadSystemModules(const std::filesystem::path& file, std::string
|
|||||||
{"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap},
|
{"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap},
|
||||||
{"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc},
|
{"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc},
|
||||||
{"libSceJpegEnc.sprx", &Libraries::JpegEnc::RegisterlibSceJpegEnc},
|
{"libSceJpegEnc.sprx", &Libraries::JpegEnc::RegisterlibSceJpegEnc},
|
||||||
{"libSceCesCs.sprx", nullptr}}};
|
{"libSceCesCs.sprx", nullptr},
|
||||||
|
{"libSceFont.sprx", nullptr},
|
||||||
|
{"libSceFontFt.sprx", nullptr},
|
||||||
|
{"libSceFreeTypeOt.sprx", nullptr}}};
|
||||||
|
|
||||||
std::vector<std::filesystem::path> found_modules;
|
std::vector<std::filesystem::path> found_modules;
|
||||||
const auto& sys_module_path = Common::FS::GetUserPath(Common::FS::PathType::SysModuleDir);
|
const auto& sys_module_path = Common::FS::GetUserPath(Common::FS::PathType::SysModuleDir);
|
||||||
|
@ -139,7 +139,7 @@ void GameListFrame::PopulateGameList() {
|
|||||||
formattedPlayTime = formattedPlayTime.trimmed();
|
formattedPlayTime = formattedPlayTime.trimmed();
|
||||||
m_game_info->m_games[i].play_time = playTime.toStdString();
|
m_game_info->m_games[i].play_time = playTime.toStdString();
|
||||||
if (formattedPlayTime.isEmpty()) {
|
if (formattedPlayTime.isEmpty()) {
|
||||||
SetTableItem(i, 8, "0");
|
SetTableItem(i, 8, QString("%1s").arg(seconds));
|
||||||
} else {
|
} else {
|
||||||
SetTableItem(i, 8, formattedPlayTime);
|
SetTableItem(i, 8, formattedPlayTime);
|
||||||
}
|
}
|
||||||
|
@ -122,11 +122,11 @@ public:
|
|||||||
|
|
||||||
if (selected == &openSfoViewer) {
|
if (selected == &openSfoViewer) {
|
||||||
PSF psf;
|
PSF psf;
|
||||||
QString game_update_path;
|
|
||||||
Common::FS::PathToQString(game_update_path, m_games[itemID].path.concat("-UPDATE"));
|
|
||||||
std::filesystem::path game_folder_path = m_games[itemID].path;
|
std::filesystem::path game_folder_path = m_games[itemID].path;
|
||||||
if (std::filesystem::exists(Common::FS::PathFromQString(game_update_path))) {
|
std::filesystem::path game_update_path = game_folder_path;
|
||||||
game_folder_path = Common::FS::PathFromQString(game_update_path);
|
game_update_path += "UPDATE";
|
||||||
|
if (std::filesystem::exists(game_update_path)) {
|
||||||
|
game_folder_path = game_update_path;
|
||||||
}
|
}
|
||||||
if (psf.Open(game_folder_path / "sce_sys" / "param.sfo")) {
|
if (psf.Open(game_folder_path / "sce_sys" / "param.sfo")) {
|
||||||
int rows = psf.GetEntries().size();
|
int rows = psf.GetEntries().size();
|
||||||
@ -320,21 +320,17 @@ public:
|
|||||||
bool error = false;
|
bool error = false;
|
||||||
QString folder_path, game_update_path, dlc_path;
|
QString folder_path, game_update_path, dlc_path;
|
||||||
Common::FS::PathToQString(folder_path, m_games[itemID].path);
|
Common::FS::PathToQString(folder_path, m_games[itemID].path);
|
||||||
Common::FS::PathToQString(game_update_path, m_games[itemID].path.concat("-UPDATE"));
|
game_update_path = folder_path + "-UPDATE";
|
||||||
Common::FS::PathToQString(
|
Common::FS::PathToQString(
|
||||||
dlc_path, Config::getAddonInstallDir() /
|
dlc_path, Config::getAddonInstallDir() /
|
||||||
Common::FS::PathFromQString(folder_path).parent_path().filename());
|
Common::FS::PathFromQString(folder_path).parent_path().filename());
|
||||||
QString message_type = tr("Game");
|
QString message_type = tr("Game");
|
||||||
|
|
||||||
if (selected == deleteUpdate) {
|
if (selected == deleteUpdate) {
|
||||||
if (!Config::getSeparateUpdateEnabled()) {
|
if (!std::filesystem::exists(Common::FS::PathFromQString(game_update_path))) {
|
||||||
QMessageBox::critical(nullptr, tr("Error"),
|
QMessageBox::critical(
|
||||||
QString(tr("requiresEnableSeparateUpdateFolder_MSG")));
|
nullptr, tr("Error"),
|
||||||
error = true;
|
QString(tr("This game has no separate update to delete!")));
|
||||||
} else if (!std::filesystem::exists(
|
|
||||||
Common::FS::PathFromQString(game_update_path))) {
|
|
||||||
QMessageBox::critical(nullptr, tr("Error"),
|
|
||||||
QString(tr("This game has no update to delete!")));
|
|
||||||
error = true;
|
error = true;
|
||||||
} else {
|
} else {
|
||||||
folder_path = game_update_path;
|
folder_path = game_update_path;
|
||||||
|
@ -35,7 +35,7 @@ MainWindow::MainWindow(QWidget* parent) : QMainWindow(parent), ui(new Ui::MainWi
|
|||||||
MainWindow::~MainWindow() {
|
MainWindow::~MainWindow() {
|
||||||
SaveWindowState();
|
SaveWindowState();
|
||||||
const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir);
|
const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir);
|
||||||
Config::save(config_dir / "config.toml");
|
Config::saveMainWindow(config_dir / "config.toml");
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MainWindow::Init() {
|
bool MainWindow::Init() {
|
||||||
@ -111,6 +111,7 @@ void MainWindow::CreateActions() {
|
|||||||
m_theme_act_group->addAction(ui->setThemeGreen);
|
m_theme_act_group->addAction(ui->setThemeGreen);
|
||||||
m_theme_act_group->addAction(ui->setThemeBlue);
|
m_theme_act_group->addAction(ui->setThemeBlue);
|
||||||
m_theme_act_group->addAction(ui->setThemeViolet);
|
m_theme_act_group->addAction(ui->setThemeViolet);
|
||||||
|
m_theme_act_group->addAction(ui->setThemeGruvbox);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MainWindow::AddUiWidgets() {
|
void MainWindow::AddUiWidgets() {
|
||||||
@ -542,6 +543,14 @@ void MainWindow::CreateConnects() {
|
|||||||
isIconBlack = false;
|
isIconBlack = false;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
connect(ui->setThemeGruvbox, &QAction::triggered, &m_window_themes, [this]() {
|
||||||
|
m_window_themes.SetWindowTheme(Theme::Gruvbox, ui->mw_searchbar);
|
||||||
|
Config::setMainWindowTheme(static_cast<int>(Theme::Gruvbox));
|
||||||
|
if (isIconBlack) {
|
||||||
|
SetUiIcons(false);
|
||||||
|
isIconBlack = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void MainWindow::StartGame() {
|
void MainWindow::StartGame() {
|
||||||
@ -915,6 +924,11 @@ void MainWindow::SetLastUsedTheme() {
|
|||||||
isIconBlack = false;
|
isIconBlack = false;
|
||||||
SetUiIcons(false);
|
SetUiIcons(false);
|
||||||
break;
|
break;
|
||||||
|
case Theme::Gruvbox:
|
||||||
|
ui->setThemeGruvbox->setChecked(true);
|
||||||
|
isIconBlack = false;
|
||||||
|
SetUiIcons(false);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1008,7 +1022,7 @@ void MainWindow::AddRecentFiles(QString filePath) {
|
|||||||
}
|
}
|
||||||
Config::setRecentFiles(vec);
|
Config::setRecentFiles(vec);
|
||||||
const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir);
|
const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir);
|
||||||
Config::save(config_dir / "config.toml");
|
Config::saveMainWindow(config_dir / "config.toml");
|
||||||
CreateRecentGameActions(); // Refresh the QActions.
|
CreateRecentGameActions(); // Refresh the QActions.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8,14 +8,15 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) {
|
|||||||
|
|
||||||
switch (theme) {
|
switch (theme) {
|
||||||
case Theme::Dark:
|
case Theme::Dark:
|
||||||
mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background
|
mw_searchbar->setStyleSheet(
|
||||||
"color: #ffffff;" // White text
|
"QLineEdit {"
|
||||||
"border: 2px solid #ffffff;" // White border
|
"background-color: #1e1e1e; color: #ffffff; border: 1px solid #ffffff; "
|
||||||
"padding: 5px;");
|
"border-radius: 4px; padding: 5px; }"
|
||||||
|
"QLineEdit:focus {"
|
||||||
|
"border: 1px solid #2A82DA; }");
|
||||||
themePalette.setColor(QPalette::Window, QColor(50, 50, 50));
|
themePalette.setColor(QPalette::Window, QColor(50, 50, 50));
|
||||||
themePalette.setColor(QPalette::WindowText, Qt::white);
|
themePalette.setColor(QPalette::WindowText, Qt::white);
|
||||||
themePalette.setColor(QPalette::Base, QColor(20, 20, 20));
|
themePalette.setColor(QPalette::Base, QColor(20, 20, 20));
|
||||||
themePalette.setColor(QPalette::AlternateBase, QColor(25, 25, 25));
|
|
||||||
themePalette.setColor(QPalette::AlternateBase, QColor(53, 53, 53));
|
themePalette.setColor(QPalette::AlternateBase, QColor(53, 53, 53));
|
||||||
themePalette.setColor(QPalette::ToolTipBase, Qt::white);
|
themePalette.setColor(QPalette::ToolTipBase, Qt::white);
|
||||||
themePalette.setColor(QPalette::ToolTipText, Qt::white);
|
themePalette.setColor(QPalette::ToolTipText, Qt::white);
|
||||||
@ -28,12 +29,13 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) {
|
|||||||
themePalette.setColor(QPalette::HighlightedText, Qt::black);
|
themePalette.setColor(QPalette::HighlightedText, Qt::black);
|
||||||
qApp->setPalette(themePalette);
|
qApp->setPalette(themePalette);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Theme::Light:
|
case Theme::Light:
|
||||||
mw_searchbar->setStyleSheet("background-color: #ffffff;" // Light gray background
|
mw_searchbar->setStyleSheet(
|
||||||
"color: #000000;" // Black text
|
"QLineEdit {"
|
||||||
"border: 2px solid #000000;" // Black border
|
"background-color: #ffffff; color: #000000; border: 1px solid #000000; "
|
||||||
"padding: 5px;");
|
"border-radius: 4px; padding: 5px; }"
|
||||||
|
"QLineEdit:focus {"
|
||||||
|
"border: 1px solid #2A82DA; }");
|
||||||
themePalette.setColor(QPalette::Window, QColor(240, 240, 240)); // Light gray
|
themePalette.setColor(QPalette::Window, QColor(240, 240, 240)); // Light gray
|
||||||
themePalette.setColor(QPalette::WindowText, Qt::black); // Black
|
themePalette.setColor(QPalette::WindowText, Qt::black); // Black
|
||||||
themePalette.setColor(QPalette::Base, QColor(230, 230, 230, 80)); // Grayish
|
themePalette.setColor(QPalette::Base, QColor(230, 230, 230, 80)); // Grayish
|
||||||
@ -48,12 +50,13 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) {
|
|||||||
themePalette.setColor(QPalette::HighlightedText, Qt::white); // White
|
themePalette.setColor(QPalette::HighlightedText, Qt::white); // White
|
||||||
qApp->setPalette(themePalette);
|
qApp->setPalette(themePalette);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Theme::Green:
|
case Theme::Green:
|
||||||
mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background
|
mw_searchbar->setStyleSheet(
|
||||||
"color: #ffffff;" // White text
|
"QLineEdit {"
|
||||||
"border: 2px solid #ffffff;" // White border
|
"background-color: #192819; color: #ffffff; border: 1px solid #ffffff; "
|
||||||
"padding: 5px;");
|
"border-radius: 4px; padding: 5px; }"
|
||||||
|
"QLineEdit:focus {"
|
||||||
|
"border: 1px solid #2A82DA; }");
|
||||||
themePalette.setColor(QPalette::Window, QColor(53, 69, 53)); // Dark green background
|
themePalette.setColor(QPalette::Window, QColor(53, 69, 53)); // Dark green background
|
||||||
themePalette.setColor(QPalette::WindowText, Qt::white); // White text
|
themePalette.setColor(QPalette::WindowText, Qt::white); // White text
|
||||||
themePalette.setColor(QPalette::Base, QColor(25, 40, 25)); // Darker green base
|
themePalette.setColor(QPalette::Base, QColor(25, 40, 25)); // Darker green base
|
||||||
@ -68,15 +71,15 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) {
|
|||||||
themePalette.setColor(QPalette::Link, QColor(42, 130, 218)); // Light blue links
|
themePalette.setColor(QPalette::Link, QColor(42, 130, 218)); // Light blue links
|
||||||
themePalette.setColor(QPalette::Highlight, QColor(42, 130, 218)); // Light blue highlight
|
themePalette.setColor(QPalette::Highlight, QColor(42, 130, 218)); // Light blue highlight
|
||||||
themePalette.setColor(QPalette::HighlightedText, Qt::black); // Black highlighted text
|
themePalette.setColor(QPalette::HighlightedText, Qt::black); // Black highlighted text
|
||||||
|
|
||||||
qApp->setPalette(themePalette);
|
qApp->setPalette(themePalette);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Theme::Blue:
|
case Theme::Blue:
|
||||||
mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background
|
mw_searchbar->setStyleSheet(
|
||||||
"color: #ffffff;" // White text
|
"QLineEdit {"
|
||||||
"border: 2px solid #ffffff;" // White border
|
"background-color: #14283c; color: #ffffff; border: 1px solid #ffffff; "
|
||||||
"padding: 5px;");
|
"border-radius: 4px; padding: 5px; }"
|
||||||
|
"QLineEdit:focus {"
|
||||||
|
"border: 1px solid #2A82DA; }");
|
||||||
themePalette.setColor(QPalette::Window, QColor(40, 60, 90)); // Dark blue background
|
themePalette.setColor(QPalette::Window, QColor(40, 60, 90)); // Dark blue background
|
||||||
themePalette.setColor(QPalette::WindowText, Qt::white); // White text
|
themePalette.setColor(QPalette::WindowText, Qt::white); // White text
|
||||||
themePalette.setColor(QPalette::Base, QColor(20, 40, 60)); // Darker blue base
|
themePalette.setColor(QPalette::Base, QColor(20, 40, 60)); // Darker blue base
|
||||||
@ -94,12 +97,13 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) {
|
|||||||
|
|
||||||
qApp->setPalette(themePalette);
|
qApp->setPalette(themePalette);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Theme::Violet:
|
case Theme::Violet:
|
||||||
mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background
|
mw_searchbar->setStyleSheet(
|
||||||
"color: #ffffff;" // White text
|
"QLineEdit {"
|
||||||
"border: 2px solid #ffffff;" // White border
|
"background-color: #501e5a; color: #ffffff; border: 1px solid #ffffff; "
|
||||||
"padding: 5px;");
|
"border-radius: 4px; padding: 5px; }"
|
||||||
|
"QLineEdit:focus {"
|
||||||
|
"border: 1px solid #2A82DA; }");
|
||||||
themePalette.setColor(QPalette::Window, QColor(100, 50, 120)); // Violet background
|
themePalette.setColor(QPalette::Window, QColor(100, 50, 120)); // Violet background
|
||||||
themePalette.setColor(QPalette::WindowText, Qt::white); // White text
|
themePalette.setColor(QPalette::WindowText, Qt::white); // White text
|
||||||
themePalette.setColor(QPalette::Base, QColor(80, 30, 90)); // Darker violet base
|
themePalette.setColor(QPalette::Base, QColor(80, 30, 90)); // Darker violet base
|
||||||
@ -115,6 +119,28 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) {
|
|||||||
themePalette.setColor(QPalette::Highlight, QColor(42, 130, 218)); // Light blue highlight
|
themePalette.setColor(QPalette::Highlight, QColor(42, 130, 218)); // Light blue highlight
|
||||||
themePalette.setColor(QPalette::HighlightedText, Qt::black); // Black highlighted text
|
themePalette.setColor(QPalette::HighlightedText, Qt::black); // Black highlighted text
|
||||||
|
|
||||||
|
qApp->setPalette(themePalette);
|
||||||
|
break;
|
||||||
|
case Theme::Gruvbox:
|
||||||
|
mw_searchbar->setStyleSheet(
|
||||||
|
"QLineEdit {"
|
||||||
|
"background-color: #1d2021; color: #f9f5d7; border: 1px solid #f9f5d7; "
|
||||||
|
"border-radius: 4px; padding: 5px; }"
|
||||||
|
"QLineEdit:focus {"
|
||||||
|
"border: 1px solid #83A598; }");
|
||||||
|
themePalette.setColor(QPalette::Window, QColor(29, 32, 33));
|
||||||
|
themePalette.setColor(QPalette::WindowText, QColor(249, 245, 215));
|
||||||
|
themePalette.setColor(QPalette::Base, QColor(29, 32, 33));
|
||||||
|
themePalette.setColor(QPalette::AlternateBase, QColor(50, 48, 47));
|
||||||
|
themePalette.setColor(QPalette::ToolTipBase, QColor(249, 245, 215));
|
||||||
|
themePalette.setColor(QPalette::ToolTipText, QColor(249, 245, 215));
|
||||||
|
themePalette.setColor(QPalette::Text, QColor(249, 245, 215));
|
||||||
|
themePalette.setColor(QPalette::Button, QColor(40, 40, 40));
|
||||||
|
themePalette.setColor(QPalette::ButtonText, QColor(249, 245, 215));
|
||||||
|
themePalette.setColor(QPalette::BrightText, QColor(251, 73, 52));
|
||||||
|
themePalette.setColor(QPalette::Link, QColor(131, 165, 152));
|
||||||
|
themePalette.setColor(QPalette::Highlight, QColor(131, 165, 152));
|
||||||
|
themePalette.setColor(QPalette::HighlightedText, Qt::black);
|
||||||
qApp->setPalette(themePalette);
|
qApp->setPalette(themePalette);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -7,13 +7,7 @@
|
|||||||
#include <QLineEdit>
|
#include <QLineEdit>
|
||||||
#include <QWidget>
|
#include <QWidget>
|
||||||
|
|
||||||
enum class Theme : int {
|
enum class Theme : int { Dark, Light, Green, Blue, Violet, Gruvbox };
|
||||||
Dark,
|
|
||||||
Light,
|
|
||||||
Green,
|
|
||||||
Blue,
|
|
||||||
Violet,
|
|
||||||
};
|
|
||||||
|
|
||||||
class WindowThemes : public QObject {
|
class WindowThemes : public QObject {
|
||||||
Q_OBJECT
|
Q_OBJECT
|
||||||
|
@ -36,6 +36,7 @@ public:
|
|||||||
QAction* setThemeGreen;
|
QAction* setThemeGreen;
|
||||||
QAction* setThemeBlue;
|
QAction* setThemeBlue;
|
||||||
QAction* setThemeViolet;
|
QAction* setThemeViolet;
|
||||||
|
QAction* setThemeGruvbox;
|
||||||
QWidget* centralWidget;
|
QWidget* centralWidget;
|
||||||
QLineEdit* mw_searchbar;
|
QLineEdit* mw_searchbar;
|
||||||
QPushButton* playButton;
|
QPushButton* playButton;
|
||||||
@ -158,6 +159,9 @@ public:
|
|||||||
setThemeViolet = new QAction(MainWindow);
|
setThemeViolet = new QAction(MainWindow);
|
||||||
setThemeViolet->setObjectName("setThemeViolet");
|
setThemeViolet->setObjectName("setThemeViolet");
|
||||||
setThemeViolet->setCheckable(true);
|
setThemeViolet->setCheckable(true);
|
||||||
|
setThemeGruvbox = new QAction(MainWindow);
|
||||||
|
setThemeGruvbox->setObjectName("setThemeGruvbox");
|
||||||
|
setThemeGruvbox->setCheckable(true);
|
||||||
centralWidget = new QWidget(MainWindow);
|
centralWidget = new QWidget(MainWindow);
|
||||||
centralWidget->setObjectName("centralWidget");
|
centralWidget->setObjectName("centralWidget");
|
||||||
sizePolicy.setHeightForWidth(centralWidget->sizePolicy().hasHeightForWidth());
|
sizePolicy.setHeightForWidth(centralWidget->sizePolicy().hasHeightForWidth());
|
||||||
@ -282,6 +286,7 @@ public:
|
|||||||
menuThemes->addAction(setThemeGreen);
|
menuThemes->addAction(setThemeGreen);
|
||||||
menuThemes->addAction(setThemeBlue);
|
menuThemes->addAction(setThemeBlue);
|
||||||
menuThemes->addAction(setThemeViolet);
|
menuThemes->addAction(setThemeViolet);
|
||||||
|
menuThemes->addAction(setThemeGruvbox);
|
||||||
menuGame_List_Icons->addAction(setIconSizeTinyAct);
|
menuGame_List_Icons->addAction(setIconSizeTinyAct);
|
||||||
menuGame_List_Icons->addAction(setIconSizeSmallAct);
|
menuGame_List_Icons->addAction(setIconSizeSmallAct);
|
||||||
menuGame_List_Icons->addAction(setIconSizeMediumAct);
|
menuGame_List_Icons->addAction(setIconSizeMediumAct);
|
||||||
@ -368,6 +373,7 @@ public:
|
|||||||
setThemeGreen->setText(QCoreApplication::translate("MainWindow", "Green", nullptr));
|
setThemeGreen->setText(QCoreApplication::translate("MainWindow", "Green", nullptr));
|
||||||
setThemeBlue->setText(QCoreApplication::translate("MainWindow", "Blue", nullptr));
|
setThemeBlue->setText(QCoreApplication::translate("MainWindow", "Blue", nullptr));
|
||||||
setThemeViolet->setText(QCoreApplication::translate("MainWindow", "Violet", nullptr));
|
setThemeViolet->setText(QCoreApplication::translate("MainWindow", "Violet", nullptr));
|
||||||
|
setThemeGruvbox->setText("Gruvbox");
|
||||||
toolBar->setWindowTitle(QCoreApplication::translate("MainWindow", "toolBar", nullptr));
|
toolBar->setWindowTitle(QCoreApplication::translate("MainWindow", "toolBar", nullptr));
|
||||||
} // retranslateUi
|
} // retranslateUi
|
||||||
};
|
};
|
||||||
|
@ -12,12 +12,13 @@
|
|||||||
#ifdef ENABLE_UPDATER
|
#ifdef ENABLE_UPDATER
|
||||||
#include "check_update.h"
|
#include "check_update.h"
|
||||||
#endif
|
#endif
|
||||||
|
#include <toml.hpp>
|
||||||
#include "common/logging/backend.h"
|
#include "common/logging/backend.h"
|
||||||
#include "common/logging/filter.h"
|
#include "common/logging/filter.h"
|
||||||
|
#include "common/logging/formatter.h"
|
||||||
#include "main_window.h"
|
#include "main_window.h"
|
||||||
#include "settings_dialog.h"
|
#include "settings_dialog.h"
|
||||||
#include "ui_settings_dialog.h"
|
#include "ui_settings_dialog.h"
|
||||||
|
|
||||||
QStringList languageNames = {"Arabic",
|
QStringList languageNames = {"Arabic",
|
||||||
"Czech",
|
"Czech",
|
||||||
"Danish",
|
"Danish",
|
||||||
@ -94,13 +95,18 @@ SettingsDialog::SettingsDialog(std::span<const QString> physical_devices, QWidge
|
|||||||
connect(ui->buttonBox, &QDialogButtonBox::clicked, this,
|
connect(ui->buttonBox, &QDialogButtonBox::clicked, this,
|
||||||
[this, config_dir](QAbstractButton* button) {
|
[this, config_dir](QAbstractButton* button) {
|
||||||
if (button == ui->buttonBox->button(QDialogButtonBox::Save)) {
|
if (button == ui->buttonBox->button(QDialogButtonBox::Save)) {
|
||||||
|
UpdateSettings();
|
||||||
Config::save(config_dir / "config.toml");
|
Config::save(config_dir / "config.toml");
|
||||||
QWidget::close();
|
QWidget::close();
|
||||||
} else if (button == ui->buttonBox->button(QDialogButtonBox::Apply)) {
|
} else if (button == ui->buttonBox->button(QDialogButtonBox::Apply)) {
|
||||||
|
UpdateSettings();
|
||||||
Config::save(config_dir / "config.toml");
|
Config::save(config_dir / "config.toml");
|
||||||
} else if (button == ui->buttonBox->button(QDialogButtonBox::RestoreDefaults)) {
|
} else if (button == ui->buttonBox->button(QDialogButtonBox::RestoreDefaults)) {
|
||||||
Config::setDefaultValues();
|
Config::setDefaultValues();
|
||||||
|
Config::save(config_dir / "config.toml");
|
||||||
LoadValuesFromConfig();
|
LoadValuesFromConfig();
|
||||||
|
} else if (button == ui->buttonBox->button(QDialogButtonBox::Close)) {
|
||||||
|
ResetInstallFolders();
|
||||||
}
|
}
|
||||||
if (Common::Log::IsActive()) {
|
if (Common::Log::IsActive()) {
|
||||||
Common::Log::Filter filter;
|
Common::Log::Filter filter;
|
||||||
@ -119,35 +125,6 @@ SettingsDialog::SettingsDialog(std::span<const QString> physical_devices, QWidge
|
|||||||
|
|
||||||
// GENERAL TAB
|
// GENERAL TAB
|
||||||
{
|
{
|
||||||
connect(ui->userNameLineEdit, &QLineEdit::textChanged, this,
|
|
||||||
[](const QString& text) { Config::setUserName(text.toStdString()); });
|
|
||||||
|
|
||||||
connect(ui->consoleLanguageComboBox, QOverload<int>::of(&QComboBox::currentIndexChanged),
|
|
||||||
this, [](int index) {
|
|
||||||
if (index >= 0 && index < languageIndexes.size()) {
|
|
||||||
int languageCode = languageIndexes[index];
|
|
||||||
Config::setLanguage(languageCode);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
connect(ui->fullscreenCheckBox, &QCheckBox::stateChanged, this,
|
|
||||||
[](int val) { Config::setFullscreenMode(val); });
|
|
||||||
|
|
||||||
connect(ui->separateUpdatesCheckBox, &QCheckBox::stateChanged, this,
|
|
||||||
[](int val) { Config::setSeparateUpdateEnabled(val); });
|
|
||||||
|
|
||||||
connect(ui->showSplashCheckBox, &QCheckBox::stateChanged, this,
|
|
||||||
[](int val) { Config::setShowSplash(val); });
|
|
||||||
|
|
||||||
connect(ui->ps4proCheckBox, &QCheckBox::stateChanged, this,
|
|
||||||
[](int val) { Config::setNeoMode(val); });
|
|
||||||
|
|
||||||
connect(ui->logTypeComboBox, &QComboBox::currentTextChanged, this,
|
|
||||||
[](const QString& text) { Config::setLogType(text.toStdString()); });
|
|
||||||
|
|
||||||
connect(ui->logFilterLineEdit, &QLineEdit::textChanged, this,
|
|
||||||
[](const QString& text) { Config::setLogFilter(text.toStdString()); });
|
|
||||||
|
|
||||||
#ifdef ENABLE_UPDATER
|
#ifdef ENABLE_UPDATER
|
||||||
connect(ui->updateCheckBox, &QCheckBox::stateChanged, this,
|
connect(ui->updateCheckBox, &QCheckBox::stateChanged, this,
|
||||||
[](int state) { Config::setAutoUpdate(state == Qt::Checked); });
|
[](int state) { Config::setAutoUpdate(state == Qt::Checked); });
|
||||||
@ -163,74 +140,12 @@ SettingsDialog::SettingsDialog(std::span<const QString> physical_devices, QWidge
|
|||||||
ui->updaterGroupBox->setVisible(false);
|
ui->updaterGroupBox->setVisible(false);
|
||||||
ui->GUIgroupBox->setMaximumSize(265, 16777215);
|
ui->GUIgroupBox->setMaximumSize(265, 16777215);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
connect(ui->playBGMCheckBox, &QCheckBox::stateChanged, this, [](int val) {
|
|
||||||
Config::setPlayBGM(val);
|
|
||||||
if (val == Qt::Unchecked) {
|
|
||||||
BackgroundMusicPlayer::getInstance().stopMusic();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
connect(ui->BGMVolumeSlider, &QSlider::valueChanged, this, [](float val) {
|
|
||||||
Config::setBGMvolume(val);
|
|
||||||
BackgroundMusicPlayer::getInstance().setVolume(val);
|
|
||||||
});
|
|
||||||
|
|
||||||
#ifdef ENABLE_DISCORD_RPC
|
|
||||||
connect(ui->discordRPCCheckbox, &QCheckBox::stateChanged, this, [](int val) {
|
|
||||||
Config::setEnableDiscordRPC(val);
|
|
||||||
auto* rpc = Common::Singleton<DiscordRPCHandler::RPC>::Instance();
|
|
||||||
if (val == Qt::Checked) {
|
|
||||||
rpc->init();
|
|
||||||
rpc->setStatusIdling();
|
|
||||||
} else {
|
|
||||||
rpc->shutdown();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Input TAB
|
// Input TAB
|
||||||
{
|
{
|
||||||
connect(ui->hideCursorComboBox, QOverload<int>::of(&QComboBox::currentIndexChanged), this,
|
connect(ui->hideCursorComboBox, QOverload<int>::of(&QComboBox::currentIndexChanged), this,
|
||||||
[this](s16 index) {
|
[this](s16 index) { OnCursorStateChanged(index); });
|
||||||
Config::setCursorState(index);
|
|
||||||
OnCursorStateChanged(index);
|
|
||||||
});
|
|
||||||
|
|
||||||
connect(ui->idleTimeoutSpinBox, &QSpinBox::valueChanged, this,
|
|
||||||
[](int index) { Config::setCursorHideTimeout(index); });
|
|
||||||
|
|
||||||
connect(ui->backButtonBehaviorComboBox, QOverload<int>::of(&QComboBox::currentIndexChanged),
|
|
||||||
this, [this](int index) {
|
|
||||||
if (index >= 0 && index < ui->backButtonBehaviorComboBox->count()) {
|
|
||||||
QString data = ui->backButtonBehaviorComboBox->itemData(index).toString();
|
|
||||||
Config::setBackButtonBehavior(data.toStdString());
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// GPU TAB
|
|
||||||
{
|
|
||||||
// First options is auto selection -1, so gpuId on the GUI will always have to subtract 1
|
|
||||||
// when setting and add 1 when getting to select the correct gpu in Qt
|
|
||||||
connect(ui->graphicsAdapterBox, &QComboBox::currentIndexChanged, this,
|
|
||||||
[](int index) { Config::setGpuId(index - 1); });
|
|
||||||
|
|
||||||
connect(ui->widthSpinBox, &QSpinBox::valueChanged, this,
|
|
||||||
[](int val) { Config::setScreenWidth(val); });
|
|
||||||
|
|
||||||
connect(ui->heightSpinBox, &QSpinBox::valueChanged, this,
|
|
||||||
[](int val) { Config::setScreenHeight(val); });
|
|
||||||
|
|
||||||
connect(ui->vblankSpinBox, &QSpinBox::valueChanged, this,
|
|
||||||
[](int val) { Config::setVblankDiv(val); });
|
|
||||||
|
|
||||||
connect(ui->dumpShadersCheckBox, &QCheckBox::stateChanged, this,
|
|
||||||
[](int val) { Config::setDumpShaders(val); });
|
|
||||||
|
|
||||||
connect(ui->nullGpuCheckBox, &QCheckBox::stateChanged, this,
|
|
||||||
[](int val) { Config::setNullGpu(val); });
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// PATH TAB
|
// PATH TAB
|
||||||
@ -262,21 +177,6 @@ SettingsDialog::SettingsDialog(std::span<const QString> physical_devices, QWidge
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// DEBUG TAB
|
|
||||||
{
|
|
||||||
connect(ui->debugDump, &QCheckBox::stateChanged, this,
|
|
||||||
[](int val) { Config::setDebugDump(val); });
|
|
||||||
|
|
||||||
connect(ui->vkValidationCheckBox, &QCheckBox::stateChanged, this,
|
|
||||||
[](int val) { Config::setVkValidation(val); });
|
|
||||||
|
|
||||||
connect(ui->vkSyncValidationCheckBox, &QCheckBox::stateChanged, this,
|
|
||||||
[](int val) { Config::setVkSyncValidation(val); });
|
|
||||||
|
|
||||||
connect(ui->rdocCheckBox, &QCheckBox::stateChanged, this,
|
|
||||||
[](int val) { Config::setRdocEnabled(val); });
|
|
||||||
}
|
|
||||||
|
|
||||||
// Descriptions
|
// Descriptions
|
||||||
{
|
{
|
||||||
// General
|
// General
|
||||||
@ -323,40 +223,69 @@ SettingsDialog::SettingsDialog(std::span<const QString> physical_devices, QWidge
|
|||||||
}
|
}
|
||||||
|
|
||||||
void SettingsDialog::LoadValuesFromConfig() {
|
void SettingsDialog::LoadValuesFromConfig() {
|
||||||
ui->consoleLanguageComboBox->setCurrentIndex(
|
|
||||||
std::distance(
|
|
||||||
languageIndexes.begin(),
|
|
||||||
std::find(languageIndexes.begin(), languageIndexes.end(), Config::GetLanguage())) %
|
|
||||||
languageIndexes.size());
|
|
||||||
ui->emulatorLanguageComboBox->setCurrentIndex(languages[Config::getEmulatorLanguage()]);
|
|
||||||
ui->hideCursorComboBox->setCurrentIndex(Config::getCursorState());
|
|
||||||
OnCursorStateChanged(Config::getCursorState());
|
|
||||||
ui->idleTimeoutSpinBox->setValue(Config::getCursorHideTimeout());
|
|
||||||
ui->graphicsAdapterBox->setCurrentIndex(Config::getGpuId() + 1);
|
|
||||||
ui->widthSpinBox->setValue(Config::getScreenWidth());
|
|
||||||
ui->heightSpinBox->setValue(Config::getScreenHeight());
|
|
||||||
ui->vblankSpinBox->setValue(Config::vblankDiv());
|
|
||||||
ui->dumpShadersCheckBox->setChecked(Config::dumpShaders());
|
|
||||||
ui->nullGpuCheckBox->setChecked(Config::nullGpu());
|
|
||||||
ui->playBGMCheckBox->setChecked(Config::getPlayBGM());
|
|
||||||
ui->BGMVolumeSlider->setValue((Config::getBGMvolume()));
|
|
||||||
ui->discordRPCCheckbox->setChecked(Config::getEnableDiscordRPC());
|
|
||||||
ui->fullscreenCheckBox->setChecked(Config::isFullscreenMode());
|
|
||||||
ui->separateUpdatesCheckBox->setChecked(Config::getSeparateUpdateEnabled());
|
|
||||||
ui->showSplashCheckBox->setChecked(Config::showSplash());
|
|
||||||
ui->ps4proCheckBox->setChecked(Config::isNeoMode());
|
|
||||||
ui->logTypeComboBox->setCurrentText(QString::fromStdString(Config::getLogType()));
|
|
||||||
ui->logFilterLineEdit->setText(QString::fromStdString(Config::getLogFilter()));
|
|
||||||
ui->userNameLineEdit->setText(QString::fromStdString(Config::getUserName()));
|
|
||||||
|
|
||||||
ui->debugDump->setChecked(Config::debugDump());
|
std::filesystem::path userdir = Common::FS::GetUserPath(Common::FS::PathType::UserDir);
|
||||||
ui->vkValidationCheckBox->setChecked(Config::vkValidationEnabled());
|
std::error_code error;
|
||||||
ui->vkSyncValidationCheckBox->setChecked(Config::vkValidationSyncEnabled());
|
if (!std::filesystem::exists(userdir / "config.toml", error)) {
|
||||||
ui->rdocCheckBox->setChecked(Config::isRdocEnabled());
|
Config::load(userdir / "config.toml");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
std::ifstream ifs;
|
||||||
|
ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit);
|
||||||
|
const toml::value data = toml::parse(userdir / "config.toml");
|
||||||
|
} catch (std::exception& ex) {
|
||||||
|
fmt::print("Got exception trying to load config file. Exception: {}\n", ex.what());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const toml::value data = toml::parse(userdir / "config.toml");
|
||||||
|
const QVector<int> languageIndexes = {21, 23, 14, 6, 18, 1, 12, 22, 2, 4, 25, 24, 29, 5, 0, 9,
|
||||||
|
15, 16, 17, 7, 26, 8, 11, 20, 3, 13, 27, 10, 19, 30, 28};
|
||||||
|
|
||||||
|
ui->consoleLanguageComboBox->setCurrentIndex(
|
||||||
|
std::distance(languageIndexes.begin(),
|
||||||
|
std::find(languageIndexes.begin(), languageIndexes.end(),
|
||||||
|
toml::find_or<int>(data, "Settings", "consoleLanguage", 6))) %
|
||||||
|
languageIndexes.size());
|
||||||
|
ui->emulatorLanguageComboBox->setCurrentIndex(
|
||||||
|
languages[toml::find_or<std::string>(data, "GUI", "emulatorLanguage", "en")]);
|
||||||
|
ui->hideCursorComboBox->setCurrentIndex(toml::find_or<int>(data, "Input", "cursorState", 1));
|
||||||
|
OnCursorStateChanged(toml::find_or<int>(data, "Input", "cursorState", 1));
|
||||||
|
ui->idleTimeoutSpinBox->setValue(toml::find_or<int>(data, "Input", "cursorHideTimeout", 5));
|
||||||
|
// First options is auto selection -1, so gpuId on the GUI will always have to subtract 1
|
||||||
|
// when setting and add 1 when getting to select the correct gpu in Qt
|
||||||
|
ui->graphicsAdapterBox->setCurrentIndex(toml::find_or<int>(data, "Vulkan", "gpuId", -1) + 1);
|
||||||
|
ui->widthSpinBox->setValue(toml::find_or<int>(data, "GPU", "screenWidth", 1280));
|
||||||
|
ui->heightSpinBox->setValue(toml::find_or<int>(data, "GPU", "screenHeight", 720));
|
||||||
|
ui->vblankSpinBox->setValue(toml::find_or<int>(data, "GPU", "vblankDivider", 1));
|
||||||
|
ui->dumpShadersCheckBox->setChecked(toml::find_or<bool>(data, "GPU", "dumpShaders", false));
|
||||||
|
ui->nullGpuCheckBox->setChecked(toml::find_or<bool>(data, "GPU", "nullGpu", false));
|
||||||
|
ui->playBGMCheckBox->setChecked(toml::find_or<bool>(data, "General", "playBGM", false));
|
||||||
|
ui->BGMVolumeSlider->setValue(toml::find_or<int>(data, "General", "BGMvolume", 50));
|
||||||
|
ui->discordRPCCheckbox->setChecked(
|
||||||
|
toml::find_or<bool>(data, "General", "enableDiscordRPC", true));
|
||||||
|
ui->fullscreenCheckBox->setChecked(toml::find_or<bool>(data, "General", "Fullscreen", false));
|
||||||
|
ui->separateUpdatesCheckBox->setChecked(
|
||||||
|
toml::find_or<bool>(data, "General", "separateUpdateEnabled", false));
|
||||||
|
ui->showSplashCheckBox->setChecked(toml::find_or<bool>(data, "General", "showSplash", false));
|
||||||
|
ui->ps4proCheckBox->setChecked(toml::find_or<bool>(data, "General", "isPS4Pro", false));
|
||||||
|
ui->logTypeComboBox->setCurrentText(
|
||||||
|
QString::fromStdString(toml::find_or<std::string>(data, "General", "logType", "async")));
|
||||||
|
ui->logFilterLineEdit->setText(
|
||||||
|
QString::fromStdString(toml::find_or<std::string>(data, "General", "logFilter", "")));
|
||||||
|
ui->userNameLineEdit->setText(
|
||||||
|
QString::fromStdString(toml::find_or<std::string>(data, "General", "userName", "shadPS4")));
|
||||||
|
ui->debugDump->setChecked(toml::find_or<bool>(data, "Debug", "DebugDump", false));
|
||||||
|
ui->vkValidationCheckBox->setChecked(toml::find_or<bool>(data, "Vulkan", "validation", false));
|
||||||
|
ui->vkSyncValidationCheckBox->setChecked(
|
||||||
|
toml::find_or<bool>(data, "Vulkan", "validation_sync", false));
|
||||||
|
ui->rdocCheckBox->setChecked(toml::find_or<bool>(data, "Vulkan", "rdocEnable", false));
|
||||||
|
|
||||||
#ifdef ENABLE_UPDATER
|
#ifdef ENABLE_UPDATER
|
||||||
ui->updateCheckBox->setChecked(Config::autoUpdate());
|
ui->updateCheckBox->setChecked(toml::find_or<bool>(data, "General", "autoUpdate", false));
|
||||||
std::string updateChannel = Config::getUpdateChannel();
|
std::string updateChannel = toml::find_or<std::string>(data, "General", "updateChannel", "");
|
||||||
if (updateChannel != "Release" && updateChannel != "Nightly") {
|
if (updateChannel != "Release" && updateChannel != "Nightly") {
|
||||||
if (Common::isRelease) {
|
if (Common::isRelease) {
|
||||||
updateChannel = "Release";
|
updateChannel = "Release";
|
||||||
@ -367,18 +296,13 @@ void SettingsDialog::LoadValuesFromConfig() {
|
|||||||
ui->updateComboBox->setCurrentText(QString::fromStdString(updateChannel));
|
ui->updateComboBox->setCurrentText(QString::fromStdString(updateChannel));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (const auto& dir : Config::getGameInstallDirs()) {
|
QString backButtonBehavior = QString::fromStdString(
|
||||||
QString path_string;
|
toml::find_or<std::string>(data, "Input", "backButtonBehavior", "left"));
|
||||||
Common::FS::PathToQString(path_string, dir);
|
|
||||||
QListWidgetItem* item = new QListWidgetItem(path_string);
|
|
||||||
ui->gameFoldersListWidget->addItem(item);
|
|
||||||
}
|
|
||||||
|
|
||||||
QString backButtonBehavior = QString::fromStdString(Config::getBackButtonBehavior());
|
|
||||||
int index = ui->backButtonBehaviorComboBox->findData(backButtonBehavior);
|
int index = ui->backButtonBehaviorComboBox->findData(backButtonBehavior);
|
||||||
ui->backButtonBehaviorComboBox->setCurrentIndex(index != -1 ? index : 0);
|
ui->backButtonBehaviorComboBox->setCurrentIndex(index != -1 ? index : 0);
|
||||||
|
|
||||||
ui->removeFolderButton->setEnabled(!ui->gameFoldersListWidget->selectedItems().isEmpty());
|
ui->removeFolderButton->setEnabled(!ui->gameFoldersListWidget->selectedItems().isEmpty());
|
||||||
|
ResetInstallFolders();
|
||||||
}
|
}
|
||||||
|
|
||||||
void SettingsDialog::InitializeEmulatorLanguages() {
|
void SettingsDialog::InitializeEmulatorLanguages() {
|
||||||
@ -554,3 +478,75 @@ bool SettingsDialog::eventFilter(QObject* obj, QEvent* event) {
|
|||||||
}
|
}
|
||||||
return QDialog::eventFilter(obj, event);
|
return QDialog::eventFilter(obj, event);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SettingsDialog::UpdateSettings() {
|
||||||
|
|
||||||
|
const QVector<std::string> TouchPadIndex = {"left", "center", "right", "none"};
|
||||||
|
Config::setBackButtonBehavior(TouchPadIndex[ui->backButtonBehaviorComboBox->currentIndex()]);
|
||||||
|
Config::setNeoMode(ui->ps4proCheckBox->isChecked());
|
||||||
|
Config::setFullscreenMode(ui->fullscreenCheckBox->isChecked());
|
||||||
|
Config::setPlayBGM(ui->playBGMCheckBox->isChecked());
|
||||||
|
Config::setNeoMode(ui->ps4proCheckBox->isChecked());
|
||||||
|
Config::setLogType(ui->logTypeComboBox->currentText().toStdString());
|
||||||
|
Config::setLogFilter(ui->logFilterLineEdit->text().toStdString());
|
||||||
|
Config::setUserName(ui->userNameLineEdit->text().toStdString());
|
||||||
|
Config::setCursorState(ui->hideCursorComboBox->currentIndex());
|
||||||
|
Config::setCursorHideTimeout(ui->idleTimeoutSpinBox->value());
|
||||||
|
Config::setGpuId(ui->graphicsAdapterBox->currentIndex() - 1);
|
||||||
|
Config::setBGMvolume(ui->BGMVolumeSlider->value());
|
||||||
|
Config::setLanguage(languageIndexes[ui->consoleLanguageComboBox->currentIndex()]);
|
||||||
|
Config::setEnableDiscordRPC(ui->discordRPCCheckbox->isChecked());
|
||||||
|
Config::setScreenWidth(ui->widthSpinBox->value());
|
||||||
|
Config::setScreenHeight(ui->heightSpinBox->value());
|
||||||
|
Config::setVblankDiv(ui->vblankSpinBox->value());
|
||||||
|
Config::setDumpShaders(ui->dumpShadersCheckBox->isChecked());
|
||||||
|
Config::setNullGpu(ui->nullGpuCheckBox->isChecked());
|
||||||
|
Config::setSeparateUpdateEnabled(ui->separateUpdatesCheckBox->isChecked());
|
||||||
|
Config::setShowSplash(ui->showSplashCheckBox->isChecked());
|
||||||
|
Config::setDebugDump(ui->debugDump->isChecked());
|
||||||
|
Config::setVkValidation(ui->vkValidationCheckBox->isChecked());
|
||||||
|
Config::setVkSyncValidation(ui->vkSyncValidationCheckBox->isChecked());
|
||||||
|
Config::setRdocEnabled(ui->rdocCheckBox->isChecked());
|
||||||
|
Config::setAutoUpdate(ui->updateCheckBox->isChecked());
|
||||||
|
Config::setUpdateChannel(ui->updateComboBox->currentText().toStdString());
|
||||||
|
|
||||||
|
#ifdef ENABLE_DISCORD_RPC
|
||||||
|
auto* rpc = Common::Singleton<DiscordRPCHandler::RPC>::Instance();
|
||||||
|
if (Config::getEnableDiscordRPC()) {
|
||||||
|
rpc->init();
|
||||||
|
rpc->setStatusIdling();
|
||||||
|
} else {
|
||||||
|
rpc->shutdown();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
BackgroundMusicPlayer::getInstance().setVolume(ui->BGMVolumeSlider->value());
|
||||||
|
}
|
||||||
|
|
||||||
|
void SettingsDialog::ResetInstallFolders() {
|
||||||
|
|
||||||
|
std::filesystem::path userdir = Common::FS::GetUserPath(Common::FS::PathType::UserDir);
|
||||||
|
const toml::value data = toml::parse(userdir / "config.toml");
|
||||||
|
|
||||||
|
if (data.contains("GUI")) {
|
||||||
|
const toml::value& gui = data.at("GUI");
|
||||||
|
const auto install_dir_array =
|
||||||
|
toml::find_or<std::vector<std::string>>(gui, "installDirs", {});
|
||||||
|
std::vector<std::filesystem::path> settings_install_dirs_config = {};
|
||||||
|
|
||||||
|
for (const auto& dir : install_dir_array) {
|
||||||
|
if (std::find(settings_install_dirs_config.begin(), settings_install_dirs_config.end(),
|
||||||
|
dir) == settings_install_dirs_config.end()) {
|
||||||
|
settings_install_dirs_config.push_back(dir);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto& dir : settings_install_dirs_config) {
|
||||||
|
QString path_string;
|
||||||
|
Common::FS::PathToQString(path_string, dir);
|
||||||
|
QListWidgetItem* item = new QListWidgetItem(path_string);
|
||||||
|
ui->gameFoldersListWidget->addItem(item);
|
||||||
|
}
|
||||||
|
Config::setGameInstallDirs(settings_install_dirs_config);
|
||||||
|
}
|
||||||
|
}
|
@ -31,6 +31,8 @@ signals:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
void LoadValuesFromConfig();
|
void LoadValuesFromConfig();
|
||||||
|
void UpdateSettings();
|
||||||
|
void ResetInstallFolders();
|
||||||
void InitializeEmulatorLanguages();
|
void InitializeEmulatorLanguages();
|
||||||
void OnLanguageChanged(int index);
|
void OnLanguageChanged(int index);
|
||||||
void OnCursorStateChanged(s16 index);
|
void OnCursorStateChanged(s16 index);
|
||||||
|
@ -1159,7 +1159,7 @@
|
|||||||
<message>
|
<message>
|
||||||
<location filename="../settings_dialog.cpp" line="293"/>
|
<location filename="../settings_dialog.cpp" line="293"/>
|
||||||
<source>separateUpdatesCheckBox</source>
|
<source>separateUpdatesCheckBox</source>
|
||||||
<translation>Enable Separate Update Folder:\nEnables installing game updates into a separate folder for easy management.</translation>
|
<translation>Enable Separate Update Folder:\nEnables installing game updates into a separate folder for easy management.\nThis can be manually created by adding the extracted update to the game folder with the name "CUSA00000-UPDATE" where the CUSA ID matches the game's ID.</translation>
|
||||||
</message>
|
</message>
|
||||||
<message>
|
<message>
|
||||||
<location filename="../settings_dialog.cpp" line="295"/>
|
<location filename="../settings_dialog.cpp" line="295"/>
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include <SDL3/SDL_events.h>
|
#include <SDL3/SDL_events.h>
|
||||||
|
#include <SDL3/SDL_hints.h>
|
||||||
#include <SDL3/SDL_init.h>
|
#include <SDL3/SDL_init.h>
|
||||||
#include <SDL3/SDL_properties.h>
|
#include <SDL3/SDL_properties.h>
|
||||||
#include <SDL3/SDL_timer.h>
|
#include <SDL3/SDL_timer.h>
|
||||||
@ -68,6 +69,9 @@ static Uint32 SDLCALL PollController(void* userdata, SDL_TimerID timer_id, Uint3
|
|||||||
WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_,
|
WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_,
|
||||||
std::string_view window_title)
|
std::string_view window_title)
|
||||||
: width{width_}, height{height_}, controller{controller_} {
|
: width{width_}, height{height_}, controller{controller_} {
|
||||||
|
if (!SDL_SetHint(SDL_HINT_APP_NAME, "shadPS4")) {
|
||||||
|
UNREACHABLE_MSG("Failed to set SDL window hint: {}", SDL_GetError());
|
||||||
|
}
|
||||||
if (!SDL_Init(SDL_INIT_VIDEO)) {
|
if (!SDL_Init(SDL_INIT_VIDEO)) {
|
||||||
UNREACHABLE_MSG("Failed to initialize SDL video subsystem: {}", SDL_GetError());
|
UNREACHABLE_MSG("Failed to initialize SDL video subsystem: {}", SDL_GetError());
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include <span>
|
#include <span>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
@ -13,6 +12,7 @@
|
|||||||
#include "shader_recompiler/frontend/translate/translate.h"
|
#include "shader_recompiler/frontend/translate/translate.h"
|
||||||
#include "shader_recompiler/ir/basic_block.h"
|
#include "shader_recompiler/ir/basic_block.h"
|
||||||
#include "shader_recompiler/ir/program.h"
|
#include "shader_recompiler/ir/program.h"
|
||||||
|
#include "shader_recompiler/runtime_info.h"
|
||||||
#include "video_core/amdgpu/types.h"
|
#include "video_core/amdgpu/types.h"
|
||||||
|
|
||||||
namespace Shader::Backend::SPIRV {
|
namespace Shader::Backend::SPIRV {
|
||||||
@ -72,7 +72,10 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
|
|||||||
return arg.VectorReg();
|
return arg.VectorReg();
|
||||||
} else if constexpr (std::is_same_v<ArgType, const char*>) {
|
} else if constexpr (std::is_same_v<ArgType, const char*>) {
|
||||||
return arg.StringLiteral();
|
return arg.StringLiteral();
|
||||||
|
} else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
|
||||||
|
return arg.Patch();
|
||||||
}
|
}
|
||||||
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <auto func, bool is_first_arg_inst, size_t... I>
|
template <auto func, bool is_first_arg_inst, size_t... I>
|
||||||
@ -206,6 +209,32 @@ Id DefineMain(EmitContext& ctx, const IR::Program& program) {
|
|||||||
return main;
|
return main;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spv::ExecutionMode ExecutionMode(AmdGpu::TessellationType primitive) {
|
||||||
|
switch (primitive) {
|
||||||
|
case AmdGpu::TessellationType::Isoline:
|
||||||
|
return spv::ExecutionMode::Isolines;
|
||||||
|
case AmdGpu::TessellationType::Triangle:
|
||||||
|
return spv::ExecutionMode::Triangles;
|
||||||
|
case AmdGpu::TessellationType::Quad:
|
||||||
|
return spv::ExecutionMode::Quads;
|
||||||
|
}
|
||||||
|
UNREACHABLE_MSG("Tessellation primitive {}", primitive);
|
||||||
|
}
|
||||||
|
|
||||||
|
spv::ExecutionMode ExecutionMode(AmdGpu::TessellationPartitioning spacing) {
|
||||||
|
switch (spacing) {
|
||||||
|
case AmdGpu::TessellationPartitioning::Integer:
|
||||||
|
return spv::ExecutionMode::SpacingEqual;
|
||||||
|
case AmdGpu::TessellationPartitioning::FracOdd:
|
||||||
|
return spv::ExecutionMode::SpacingFractionalOdd;
|
||||||
|
case AmdGpu::TessellationPartitioning::FracEven:
|
||||||
|
return spv::ExecutionMode::SpacingFractionalEven;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
UNREACHABLE_MSG("Tessellation spacing {}", spacing);
|
||||||
|
}
|
||||||
|
|
||||||
void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ctx) {
|
void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ctx) {
|
||||||
ctx.AddCapability(spv::Capability::Image1D);
|
ctx.AddCapability(spv::Capability::Image1D);
|
||||||
ctx.AddCapability(spv::Capability::Sampled1D);
|
ctx.AddCapability(spv::Capability::Sampled1D);
|
||||||
@ -222,6 +251,10 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
|
|||||||
ctx.AddCapability(spv::Capability::StorageImageExtendedFormats);
|
ctx.AddCapability(spv::Capability::StorageImageExtendedFormats);
|
||||||
ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat);
|
ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat);
|
||||||
ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
|
ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
|
||||||
|
if (profile.supports_image_load_store_lod) {
|
||||||
|
ctx.AddExtension("SPV_AMD_shader_image_load_store_lod");
|
||||||
|
ctx.AddCapability(spv::Capability::ImageReadWriteLodAMD);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (info.has_texel_buffers) {
|
if (info.has_texel_buffers) {
|
||||||
ctx.AddCapability(spv::Capability::SampledBuffer);
|
ctx.AddCapability(spv::Capability::SampledBuffer);
|
||||||
@ -244,36 +277,55 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
|
|||||||
if (info.uses_group_ballot) {
|
if (info.uses_group_ballot) {
|
||||||
ctx.AddCapability(spv::Capability::GroupNonUniformBallot);
|
ctx.AddCapability(spv::Capability::GroupNonUniformBallot);
|
||||||
}
|
}
|
||||||
if (info.stage == Stage::Export || info.stage == Stage::Vertex) {
|
const auto stage = info.l_stage;
|
||||||
|
if (stage == LogicalStage::Vertex) {
|
||||||
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
|
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
|
||||||
ctx.AddCapability(spv::Capability::DrawParameters);
|
ctx.AddCapability(spv::Capability::DrawParameters);
|
||||||
}
|
}
|
||||||
if (info.stage == Stage::Geometry) {
|
if (stage == LogicalStage::Geometry) {
|
||||||
ctx.AddCapability(spv::Capability::Geometry);
|
ctx.AddCapability(spv::Capability::Geometry);
|
||||||
}
|
}
|
||||||
if (info.stage == Stage::Fragment && profile.needs_manual_interpolation) {
|
if (info.stage == Stage::Fragment && profile.needs_manual_interpolation) {
|
||||||
ctx.AddExtension("SPV_KHR_fragment_shader_barycentric");
|
ctx.AddExtension("SPV_KHR_fragment_shader_barycentric");
|
||||||
ctx.AddCapability(spv::Capability::FragmentBarycentricKHR);
|
ctx.AddCapability(spv::Capability::FragmentBarycentricKHR);
|
||||||
}
|
}
|
||||||
|
if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) {
|
||||||
|
ctx.AddCapability(spv::Capability::Tessellation);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
|
||||||
const auto& info = program.info;
|
|
||||||
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
|
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
|
||||||
spv::ExecutionModel execution_model{};
|
spv::ExecutionModel execution_model{};
|
||||||
switch (program.info.stage) {
|
switch (info.l_stage) {
|
||||||
case Stage::Compute: {
|
case LogicalStage::Compute: {
|
||||||
const std::array<u32, 3> workgroup_size{ctx.runtime_info.cs_info.workgroup_size};
|
const std::array<u32, 3> workgroup_size{ctx.runtime_info.cs_info.workgroup_size};
|
||||||
execution_model = spv::ExecutionModel::GLCompute;
|
execution_model = spv::ExecutionModel::GLCompute;
|
||||||
ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
|
ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
|
||||||
workgroup_size[1], workgroup_size[2]);
|
workgroup_size[1], workgroup_size[2]);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Stage::Export:
|
case LogicalStage::Vertex:
|
||||||
case Stage::Vertex:
|
|
||||||
execution_model = spv::ExecutionModel::Vertex;
|
execution_model = spv::ExecutionModel::Vertex;
|
||||||
break;
|
break;
|
||||||
case Stage::Fragment:
|
case LogicalStage::TessellationControl:
|
||||||
|
execution_model = spv::ExecutionModel::TessellationControl;
|
||||||
|
ctx.AddCapability(spv::Capability::Tessellation);
|
||||||
|
ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
|
||||||
|
ctx.runtime_info.hs_info.NumOutputControlPoints());
|
||||||
|
break;
|
||||||
|
case LogicalStage::TessellationEval: {
|
||||||
|
execution_model = spv::ExecutionModel::TessellationEvaluation;
|
||||||
|
const auto& vs_info = ctx.runtime_info.vs_info;
|
||||||
|
ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_type));
|
||||||
|
ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_partitioning));
|
||||||
|
ctx.AddExecutionMode(main,
|
||||||
|
vs_info.tess_topology == AmdGpu::TessellationTopology::TriangleCcw
|
||||||
|
? spv::ExecutionMode::VertexOrderCcw
|
||||||
|
: spv::ExecutionMode::VertexOrderCw);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case LogicalStage::Fragment:
|
||||||
execution_model = spv::ExecutionModel::Fragment;
|
execution_model = spv::ExecutionModel::Fragment;
|
||||||
if (ctx.profile.lower_left_origin_mode) {
|
if (ctx.profile.lower_left_origin_mode) {
|
||||||
ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft);
|
ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft);
|
||||||
@ -288,7 +340,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||||||
ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
|
ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Stage::Geometry:
|
case LogicalStage::Geometry:
|
||||||
execution_model = spv::ExecutionModel::Geometry;
|
execution_model = spv::ExecutionModel::Geometry;
|
||||||
ctx.AddExecutionMode(main, GetInputPrimitiveType(ctx.runtime_info.gs_info.in_primitive));
|
ctx.AddExecutionMode(main, GetInputPrimitiveType(ctx.runtime_info.gs_info.in_primitive));
|
||||||
ctx.AddExecutionMode(main,
|
ctx.AddExecutionMode(main,
|
||||||
@ -299,7 +351,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||||||
ctx.runtime_info.gs_info.num_invocations);
|
ctx.runtime_info.gs_info.num_invocations);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw NotImplementedException("Stage {}", u32(program.info.stage));
|
UNREACHABLE_MSG("Stage {}", u32(info.stage));
|
||||||
}
|
}
|
||||||
ctx.AddEntryPoint(execution_model, main, "main", interfaces);
|
ctx.AddEntryPoint(execution_model, main, "main", interfaces);
|
||||||
}
|
}
|
||||||
@ -345,7 +397,7 @@ std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in
|
|||||||
const IR::Program& program, Bindings& binding) {
|
const IR::Program& program, Bindings& binding) {
|
||||||
EmitContext ctx{profile, runtime_info, program.info, binding};
|
EmitContext ctx{profile, runtime_info, program.info, binding};
|
||||||
const Id main{DefineMain(ctx, program)};
|
const Id main{DefineMain(ctx, program)};
|
||||||
DefineEntryPoint(program, ctx, main);
|
DefineEntryPoint(program.info, ctx, main);
|
||||||
SetupCapabilities(program.info, profile, ctx);
|
SetupCapabilities(program.info, profile, ctx);
|
||||||
SetupFloatMode(ctx, profile, runtime_info, main);
|
SetupFloatMode(ctx, profile, runtime_info, main);
|
||||||
PatchPhiNodes(program, ctx);
|
PatchPhiNodes(program, ctx);
|
||||||
|
@ -18,9 +18,16 @@ void MemoryBarrier(EmitContext& ctx, spv::Scope scope) {
|
|||||||
|
|
||||||
void EmitBarrier(EmitContext& ctx) {
|
void EmitBarrier(EmitContext& ctx) {
|
||||||
const auto execution{spv::Scope::Workgroup};
|
const auto execution{spv::Scope::Workgroup};
|
||||||
const auto memory{spv::Scope::Workgroup};
|
spv::Scope memory;
|
||||||
const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease |
|
spv::MemorySemanticsMask memory_semantics;
|
||||||
spv::MemorySemanticsMask::WorkgroupMemory};
|
if (ctx.l_stage == Shader::LogicalStage::TessellationControl) {
|
||||||
|
memory = spv::Scope::Invocation;
|
||||||
|
memory_semantics = spv::MemorySemanticsMask::MaskNone;
|
||||||
|
} else {
|
||||||
|
memory = spv::Scope::Workgroup;
|
||||||
|
memory_semantics =
|
||||||
|
spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::WorkgroupMemory;
|
||||||
|
}
|
||||||
ctx.OpControlBarrier(ctx.ConstU32(static_cast<u32>(execution)),
|
ctx.OpControlBarrier(ctx.ConstU32(static_cast<u32>(execution)),
|
||||||
ctx.ConstU32(static_cast<u32>(memory)),
|
ctx.ConstU32(static_cast<u32>(memory)),
|
||||||
ctx.ConstU32(static_cast<u32>(memory_semantics)));
|
ctx.ConstU32(static_cast<u32>(memory_semantics)));
|
||||||
|
@ -4,6 +4,9 @@
|
|||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
||||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||||
|
#include "shader_recompiler/ir/attribute.h"
|
||||||
|
#include "shader_recompiler/ir/patch.h"
|
||||||
|
#include "shader_recompiler/runtime_info.h"
|
||||||
|
|
||||||
#include <magic_enum/magic_enum.hpp>
|
#include <magic_enum/magic_enum.hpp>
|
||||||
|
|
||||||
@ -45,8 +48,13 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
|
|||||||
|
|
||||||
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
||||||
if (IR::IsParam(attr)) {
|
if (IR::IsParam(attr)) {
|
||||||
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
const u32 attr_index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||||
const auto& info{ctx.output_params.at(index)};
|
if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) {
|
||||||
|
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]);
|
||||||
|
return ctx.OpAccessChain(component_ptr, ctx.output_attr_array, ctx.ConstU32(attr_index),
|
||||||
|
ctx.ConstU32(element));
|
||||||
|
} else {
|
||||||
|
const auto& info{ctx.output_params.at(attr_index)};
|
||||||
ASSERT(info.num_components > 0);
|
ASSERT(info.num_components > 0);
|
||||||
if (info.num_components == 1) {
|
if (info.num_components == 1) {
|
||||||
return info.id;
|
return info.id;
|
||||||
@ -54,6 +62,7 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
|||||||
return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element));
|
return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if (IR::IsMrt(attr)) {
|
if (IR::IsMrt(attr)) {
|
||||||
const u32 index{u32(attr) - u32(IR::Attribute::RenderTarget0)};
|
const u32 index{u32(attr) - u32(IR::Attribute::RenderTarget0)};
|
||||||
const auto& info{ctx.frag_outputs.at(index)};
|
const auto& info{ctx.frag_outputs.at(index)};
|
||||||
@ -82,10 +91,14 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
|||||||
|
|
||||||
std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr) {
|
std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr) {
|
||||||
if (IR::IsParam(attr)) {
|
if (IR::IsParam(attr)) {
|
||||||
|
if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) {
|
||||||
|
return {ctx.F32[1], false};
|
||||||
|
} else {
|
||||||
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||||
const auto& info{ctx.output_params.at(index)};
|
const auto& info{ctx.output_params.at(index)};
|
||||||
return {info.component_type, info.is_integer};
|
return {info.component_type, info.is_integer};
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if (IR::IsMrt(attr)) {
|
if (IR::IsMrt(attr)) {
|
||||||
const u32 index{u32(attr) - u32(IR::Attribute::RenderTarget0)};
|
const u32 index{u32(attr) - u32(IR::Attribute::RenderTarget0)};
|
||||||
const auto& info{ctx.frag_outputs.at(index)};
|
const auto& info{ctx.frag_outputs.at(index)};
|
||||||
@ -171,12 +184,11 @@ Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
|
|||||||
rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
|
rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
|
Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
|
||||||
if (IR::IsPosition(attr)) {
|
if (IR::IsPosition(attr)) {
|
||||||
ASSERT(attr == IR::Attribute::Position0);
|
ASSERT(attr == IR::Attribute::Position0);
|
||||||
const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
||||||
const auto pointer{
|
const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, index, ctx.ConstU32(0u))};
|
||||||
ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index), ctx.ConstU32(0u))};
|
|
||||||
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
||||||
return ctx.OpLoad(ctx.F32[1],
|
return ctx.OpLoad(ctx.F32[1],
|
||||||
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
|
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
|
||||||
@ -186,7 +198,7 @@ Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u
|
|||||||
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
|
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
|
||||||
const auto param = ctx.input_params.at(param_id).id;
|
const auto param = ctx.input_params.at(param_id).id;
|
||||||
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
||||||
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))};
|
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)};
|
||||||
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
||||||
return ctx.OpLoad(ctx.F32[1],
|
return ctx.OpLoad(ctx.F32[1],
|
||||||
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
|
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
|
||||||
@ -194,9 +206,27 @@ Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u
|
|||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
|
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
|
||||||
if (ctx.info.stage == Stage::Geometry) {
|
if (ctx.info.l_stage == LogicalStage::Geometry) {
|
||||||
return EmitGetAttributeForGeometry(ctx, attr, comp, index);
|
return EmitGetAttributeForGeometry(ctx, attr, comp, index);
|
||||||
|
} else if (ctx.info.l_stage == LogicalStage::TessellationControl ||
|
||||||
|
ctx.info.l_stage == LogicalStage::TessellationEval) {
|
||||||
|
if (IR::IsTessCoord(attr)) {
|
||||||
|
const u32 component = attr == IR::Attribute::TessellationEvaluationPointU ? 0 : 1;
|
||||||
|
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
||||||
|
const auto pointer{
|
||||||
|
ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))};
|
||||||
|
return ctx.OpLoad(ctx.F32[1], pointer);
|
||||||
|
} else if (IR::IsParam(attr)) {
|
||||||
|
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
|
||||||
|
const auto param = ctx.input_params.at(param_id).id;
|
||||||
|
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
||||||
|
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)};
|
||||||
|
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
||||||
|
return ctx.OpLoad(ctx.F32[1],
|
||||||
|
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IR::IsParam(attr)) {
|
if (IR::IsParam(attr)) {
|
||||||
@ -242,8 +272,14 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
|
|||||||
}
|
}
|
||||||
return coord;
|
return coord;
|
||||||
}
|
}
|
||||||
|
case IR::Attribute::TessellationEvaluationPointU:
|
||||||
|
return ctx.OpLoad(ctx.F32[1],
|
||||||
|
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value));
|
||||||
|
case IR::Attribute::TessellationEvaluationPointV:
|
||||||
|
return ctx.OpLoad(ctx.F32[1],
|
||||||
|
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.ConstU32(1U)));
|
||||||
default:
|
default:
|
||||||
throw NotImplementedException("Read attribute {}", attr);
|
UNREACHABLE_MSG("Read attribute {}", attr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -266,10 +302,32 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
|||||||
return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value,
|
return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value,
|
||||||
ctx.u32_zero_value);
|
ctx.u32_zero_value);
|
||||||
case IR::Attribute::PrimitiveId:
|
case IR::Attribute::PrimitiveId:
|
||||||
ASSERT(ctx.info.stage == Stage::Geometry);
|
|
||||||
return ctx.OpLoad(ctx.U32[1], ctx.primitive_id);
|
return ctx.OpLoad(ctx.U32[1], ctx.primitive_id);
|
||||||
|
case IR::Attribute::InvocationId:
|
||||||
|
ASSERT(ctx.info.l_stage == LogicalStage::Geometry ||
|
||||||
|
ctx.info.l_stage == LogicalStage::TessellationControl);
|
||||||
|
return ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
|
||||||
|
case IR::Attribute::PatchVertices:
|
||||||
|
ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl);
|
||||||
|
return ctx.OpLoad(ctx.U32[1], ctx.patch_vertices);
|
||||||
|
case IR::Attribute::PackedHullInvocationInfo: {
|
||||||
|
ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl);
|
||||||
|
// [0:8]: patch id within VGT
|
||||||
|
// [8:12]: output control point id
|
||||||
|
// But 0:8 should be treated as 0 for attribute addressing purposes
|
||||||
|
if (ctx.runtime_info.hs_info.IsPassthrough()) {
|
||||||
|
// Gcn shader would run with 1 thread, but we need to run a thread for
|
||||||
|
// each output control point.
|
||||||
|
// If Gcn shader uses this value, we should make sure all threads in the
|
||||||
|
// Vulkan shader use 0
|
||||||
|
return ctx.ConstU32(0u);
|
||||||
|
} else {
|
||||||
|
const Id invocation_id = ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
|
||||||
|
return ctx.OpShiftLeftLogical(ctx.U32[1], invocation_id, ctx.ConstU32(8u));
|
||||||
|
}
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
throw NotImplementedException("Read U32 attribute {}", attr);
|
UNREACHABLE_MSG("Read U32 attribute {}", attr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -287,6 +345,58 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index) {
|
||||||
|
const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
||||||
|
return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.input_attr_array,
|
||||||
|
vertex_index, attr_index, comp_index));
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index) {
|
||||||
|
// Implied vertex index is invocation_id
|
||||||
|
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]);
|
||||||
|
Id pointer =
|
||||||
|
ctx.OpAccessChain(component_ptr, ctx.output_attr_array,
|
||||||
|
ctx.OpLoad(ctx.U32[1], ctx.invocation_id), attr_index, comp_index);
|
||||||
|
ctx.OpStore(pointer, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) {
|
||||||
|
const u32 index{IR::GenericPatchIndex(patch)};
|
||||||
|
const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))};
|
||||||
|
const Id type{ctx.l_stage == LogicalStage::TessellationControl ? ctx.output_f32
|
||||||
|
: ctx.input_f32};
|
||||||
|
const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)};
|
||||||
|
return ctx.OpLoad(ctx.F32[1], pointer);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
|
||||||
|
const Id pointer{[&] {
|
||||||
|
if (IR::IsGeneric(patch)) {
|
||||||
|
const u32 index{IR::GenericPatchIndex(patch)};
|
||||||
|
const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))};
|
||||||
|
return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element);
|
||||||
|
}
|
||||||
|
switch (patch) {
|
||||||
|
case IR::Patch::TessellationLodLeft:
|
||||||
|
case IR::Patch::TessellationLodRight:
|
||||||
|
case IR::Patch::TessellationLodTop:
|
||||||
|
case IR::Patch::TessellationLodBottom: {
|
||||||
|
const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
|
||||||
|
const Id index_id{ctx.ConstU32(index)};
|
||||||
|
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id);
|
||||||
|
}
|
||||||
|
case IR::Patch::TessellationLodInteriorU:
|
||||||
|
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner,
|
||||||
|
ctx.u32_zero_value);
|
||||||
|
case IR::Patch::TessellationLodInteriorV:
|
||||||
|
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.ConstU32(1u));
|
||||||
|
default:
|
||||||
|
UNREACHABLE_MSG("Patch {}", u32(patch));
|
||||||
|
}
|
||||||
|
}()};
|
||||||
|
ctx.OpStore(pointer, value);
|
||||||
|
}
|
||||||
|
|
||||||
template <u32 N>
|
template <u32 N>
|
||||||
static Id EmitLoadBufferU32xN(EmitContext& ctx, u32 handle, Id address) {
|
static Id EmitLoadBufferU32xN(EmitContext& ctx, u32 handle, Id address) {
|
||||||
auto& buffer = ctx.buffers[handle];
|
auto& buffer = ctx.buffers[handle];
|
||||||
|
@ -87,6 +87,14 @@ Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
|||||||
return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b));
|
return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||||
|
return Decorate(ctx, inst, ctx.OpFDiv(ctx.F32[1], a, b));
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||||
|
return Decorate(ctx, inst, ctx.OpFDiv(ctx.F64[1], a, b));
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitFPNeg16(EmitContext& ctx, Id value) {
|
Id EmitFPNeg16(EmitContext& ctx, Id value) {
|
||||||
return ctx.OpFNegate(ctx.F16[1], value);
|
return ctx.OpFNegate(ctx.F16[1], value);
|
||||||
}
|
}
|
||||||
@ -217,10 +225,34 @@ Id EmitFPTrunc64(EmitContext& ctx, Id value) {
|
|||||||
return ctx.OpTrunc(ctx.F64[1], value);
|
return ctx.OpTrunc(ctx.F64[1], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitFPFract(EmitContext& ctx, Id value) {
|
Id EmitFPFract32(EmitContext& ctx, Id value) {
|
||||||
return ctx.OpFract(ctx.F32[1], value);
|
return ctx.OpFract(ctx.F32[1], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitFPFract64(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpFract(ctx.F64[1], value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitFPFrexpSig32(EmitContext& ctx, Id value) {
|
||||||
|
const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f32, value);
|
||||||
|
return ctx.OpCompositeExtract(ctx.F32[1], frexp, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitFPFrexpSig64(EmitContext& ctx, Id value) {
|
||||||
|
const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f64, value);
|
||||||
|
return ctx.OpCompositeExtract(ctx.F64[1], frexp, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitFPFrexpExp32(EmitContext& ctx, Id value) {
|
||||||
|
const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f32, value);
|
||||||
|
return ctx.OpCompositeExtract(ctx.U32[1], frexp, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitFPFrexpExp64(EmitContext& ctx, Id value) {
|
||||||
|
const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f64, value);
|
||||||
|
return ctx.OpCompositeExtract(ctx.U32[1], frexp, 1);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs) {
|
Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs) {
|
||||||
return ctx.OpFOrdEqual(ctx.U1[1], lhs, rhs);
|
return ctx.OpFOrdEqual(ctx.U1[1], lhs, rhs);
|
||||||
}
|
}
|
||||||
|
@ -130,8 +130,8 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle,
|
|||||||
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
|
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
|
||||||
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
||||||
ImageOperands operands;
|
ImageOperands operands;
|
||||||
operands.AddOffset(ctx, offset);
|
|
||||||
operands.Add(spv::ImageOperandsMask::Lod, lod);
|
operands.Add(spv::ImageOperandsMask::Lod, lod);
|
||||||
|
operands.AddOffset(ctx, offset);
|
||||||
const Id sample = ctx.OpImageSampleDrefExplicitLod(result_type, sampled_image, coords, dref,
|
const Id sample = ctx.OpImageSampleDrefExplicitLod(result_type, sampled_image, coords, dref,
|
||||||
operands.mask, operands.operands);
|
operands.mask, operands.operands);
|
||||||
const Id sample_typed = texture.is_integer ? ctx.OpBitcast(ctx.F32[1], sample) : sample;
|
const Id sample_typed = texture.is_integer ? ctx.OpBitcast(ctx.F32[1], sample) : sample;
|
||||||
@ -168,8 +168,8 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
|||||||
return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], texels) : texels;
|
return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], texels) : texels;
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset,
|
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod,
|
||||||
Id lod, Id ms) {
|
const IR::Value& offset, Id ms) {
|
||||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||||
const Id result_type = texture.data_types->Get(4);
|
const Id result_type = texture.data_types->Get(4);
|
||||||
@ -236,15 +236,22 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id
|
|||||||
return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], sample) : sample;
|
return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], sample) : sample;
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
|
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id lod) {
|
||||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color) {
|
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id color) {
|
||||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||||
const Id color_type = texture.data_types->Get(4);
|
const Id color_type = texture.data_types->Get(4);
|
||||||
ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color));
|
ImageOperands operands;
|
||||||
|
if (ctx.profile.supports_image_load_store_lod) {
|
||||||
|
operands.Add(spv::ImageOperandsMask::Lod, lod);
|
||||||
|
} else if (Sirit::ValidId(lod)) {
|
||||||
|
LOG_WARNING(Render, "Image write with LOD not supported by driver");
|
||||||
|
}
|
||||||
|
ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color), operands.mask,
|
||||||
|
operands.operands);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Shader::Backend::SPIRV
|
} // namespace Shader::Backend::SPIRV
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
namespace Shader::IR {
|
namespace Shader::IR {
|
||||||
enum class Attribute : u64;
|
enum class Attribute : u64;
|
||||||
enum class ScalarReg : u32;
|
enum class ScalarReg : u32;
|
||||||
|
enum class Patch : u64;
|
||||||
class Inst;
|
class Inst;
|
||||||
class Value;
|
class Value;
|
||||||
} // namespace Shader::IR
|
} // namespace Shader::IR
|
||||||
@ -27,8 +28,6 @@ Id EmitConditionRef(EmitContext& ctx, const IR::Value& value);
|
|||||||
void EmitReference(EmitContext&);
|
void EmitReference(EmitContext&);
|
||||||
void EmitPhiMove(EmitContext&);
|
void EmitPhiMove(EmitContext&);
|
||||||
void EmitJoin(EmitContext& ctx);
|
void EmitJoin(EmitContext& ctx);
|
||||||
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
|
|
||||||
void EmitDeviceMemoryBarrier(EmitContext& ctx);
|
|
||||||
void EmitGetScc(EmitContext& ctx);
|
void EmitGetScc(EmitContext& ctx);
|
||||||
void EmitGetExec(EmitContext& ctx);
|
void EmitGetExec(EmitContext& ctx);
|
||||||
void EmitGetVcc(EmitContext& ctx);
|
void EmitGetVcc(EmitContext& ctx);
|
||||||
@ -85,9 +84,13 @@ Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
|
|||||||
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||||
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||||
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index);
|
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index);
|
||||||
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
||||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
|
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
|
||||||
|
Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index);
|
||||||
|
void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index);
|
||||||
|
Id EmitGetPatch(EmitContext& ctx, IR::Patch patch);
|
||||||
|
void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value);
|
||||||
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
|
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
|
||||||
void EmitSetSampleMask(EmitContext& ctx, Id value);
|
void EmitSetSampleMask(EmitContext& ctx, Id value);
|
||||||
void EmitSetFragDepth(EmitContext& ctx, Id value);
|
void EmitSetFragDepth(EmitContext& ctx, Id value);
|
||||||
@ -189,6 +192,8 @@ Id EmitFPMin64(EmitContext& ctx, Id a, Id b);
|
|||||||
Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||||
Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||||
Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||||
|
Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||||
|
Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||||
Id EmitFPNeg16(EmitContext& ctx, Id value);
|
Id EmitFPNeg16(EmitContext& ctx, Id value);
|
||||||
Id EmitFPNeg32(EmitContext& ctx, Id value);
|
Id EmitFPNeg32(EmitContext& ctx, Id value);
|
||||||
Id EmitFPNeg64(EmitContext& ctx, Id value);
|
Id EmitFPNeg64(EmitContext& ctx, Id value);
|
||||||
@ -220,7 +225,12 @@ Id EmitFPCeil64(EmitContext& ctx, Id value);
|
|||||||
Id EmitFPTrunc16(EmitContext& ctx, Id value);
|
Id EmitFPTrunc16(EmitContext& ctx, Id value);
|
||||||
Id EmitFPTrunc32(EmitContext& ctx, Id value);
|
Id EmitFPTrunc32(EmitContext& ctx, Id value);
|
||||||
Id EmitFPTrunc64(EmitContext& ctx, Id value);
|
Id EmitFPTrunc64(EmitContext& ctx, Id value);
|
||||||
Id EmitFPFract(EmitContext& ctx, Id value);
|
Id EmitFPFract32(EmitContext& ctx, Id value);
|
||||||
|
Id EmitFPFract64(EmitContext& ctx, Id value);
|
||||||
|
Id EmitFPFrexpSig32(EmitContext& ctx, Id value);
|
||||||
|
Id EmitFPFrexpSig64(EmitContext& ctx, Id value);
|
||||||
|
Id EmitFPFrexpExp32(EmitContext& ctx, Id value);
|
||||||
|
Id EmitFPFrexpExp64(EmitContext& ctx, Id value);
|
||||||
Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs);
|
Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs);
|
||||||
Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs);
|
Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs);
|
||||||
Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs);
|
Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs);
|
||||||
@ -385,14 +395,14 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
|||||||
const IR::Value& offset);
|
const IR::Value& offset);
|
||||||
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
||||||
const IR::Value& offset, Id dref);
|
const IR::Value& offset, Id dref);
|
||||||
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset,
|
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod,
|
||||||
Id lod, Id ms);
|
const IR::Value& offset, Id ms);
|
||||||
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips);
|
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips);
|
||||||
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords);
|
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords);
|
||||||
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id derivatives_dx,
|
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id derivatives_dx,
|
||||||
Id derivatives_dy, const IR::Value& offset, const IR::Value& lod_clamp);
|
Id derivatives_dy, const IR::Value& offset, const IR::Value& lod_clamp);
|
||||||
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
|
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id lod);
|
||||||
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color);
|
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id color);
|
||||||
|
|
||||||
Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||||
Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||||
#include "shader_recompiler/ir/passes/srt.h"
|
#include "shader_recompiler/ir/passes/srt.h"
|
||||||
|
#include "shader_recompiler/runtime_info.h"
|
||||||
#include "video_core/amdgpu/types.h"
|
#include "video_core/amdgpu/types.h"
|
||||||
|
|
||||||
#include <boost/container/static_vector.hpp>
|
#include <boost/container/static_vector.hpp>
|
||||||
@ -34,7 +35,7 @@ std::string_view StageName(Stage stage) {
|
|||||||
case Stage::Compute:
|
case Stage::Compute:
|
||||||
return "cs";
|
return "cs";
|
||||||
}
|
}
|
||||||
throw InvalidArgument("Invalid stage {}", u32(stage));
|
UNREACHABLE_MSG("Invalid hw stage {}", u32(stage));
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr u32 NumVertices(AmdGpu::PrimitiveType type) {
|
static constexpr u32 NumVertices(AmdGpu::PrimitiveType type) {
|
||||||
@ -65,7 +66,7 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar
|
|||||||
EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
|
EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
|
||||||
const Info& info_, Bindings& binding_)
|
const Info& info_, Bindings& binding_)
|
||||||
: Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
|
: Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
|
||||||
profile{profile_}, stage{info.stage}, binding{binding_} {
|
profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} {
|
||||||
AddCapability(spv::Capability::Shader);
|
AddCapability(spv::Capability::Shader);
|
||||||
DefineArithmeticTypes();
|
DefineArithmeticTypes();
|
||||||
DefineInterfaces();
|
DefineInterfaces();
|
||||||
@ -147,6 +148,10 @@ void EmitContext::DefineArithmeticTypes() {
|
|||||||
|
|
||||||
full_result_i32x2 = Name(TypeStruct(S32[1], S32[1]), "full_result_i32x2");
|
full_result_i32x2 = Name(TypeStruct(S32[1], S32[1]), "full_result_i32x2");
|
||||||
full_result_u32x2 = Name(TypeStruct(U32[1], U32[1]), "full_result_u32x2");
|
full_result_u32x2 = Name(TypeStruct(U32[1], U32[1]), "full_result_u32x2");
|
||||||
|
frexp_result_f32 = Name(TypeStruct(F32[1], U32[1]), "frexp_result_f32");
|
||||||
|
if (info.uses_fp64) {
|
||||||
|
frexp_result_f64 = Name(TypeStruct(F64[1], U32[1]), "frexp_result_f64");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitContext::DefineInterfaces() {
|
void EmitContext::DefineInterfaces() {
|
||||||
@ -264,9 +269,8 @@ void EmitContext::DefineInputs() {
|
|||||||
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
|
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
|
||||||
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
|
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
|
||||||
}
|
}
|
||||||
switch (stage) {
|
switch (l_stage) {
|
||||||
case Stage::Export:
|
case LogicalStage::Vertex: {
|
||||||
case Stage::Vertex: {
|
|
||||||
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
|
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
|
||||||
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
|
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
|
||||||
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
|
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
|
||||||
@ -290,7 +294,7 @@ void EmitContext::DefineInputs() {
|
|||||||
});
|
});
|
||||||
// Note that we pass index rather than Id
|
// Note that we pass index rather than Id
|
||||||
input_params[attrib.semantic] = SpirvAttribute{
|
input_params[attrib.semantic] = SpirvAttribute{
|
||||||
.id = rate_idx,
|
.id = {rate_idx},
|
||||||
.pointer_type = input_u32,
|
.pointer_type = input_u32,
|
||||||
.component_type = U32[1],
|
.component_type = U32[1],
|
||||||
.num_components = std::min<u16>(attrib.num_elements, num_components),
|
.num_components = std::min<u16>(attrib.num_elements, num_components),
|
||||||
@ -307,12 +311,11 @@ void EmitContext::DefineInputs() {
|
|||||||
}
|
}
|
||||||
input_params[attrib.semantic] =
|
input_params[attrib.semantic] =
|
||||||
GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false);
|
GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false);
|
||||||
interfaces.push_back(id);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Stage::Fragment:
|
case LogicalStage::Fragment:
|
||||||
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
|
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
|
||||||
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
|
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
|
||||||
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
||||||
@ -347,15 +350,14 @@ void EmitContext::DefineInputs() {
|
|||||||
}
|
}
|
||||||
input_params[semantic] =
|
input_params[semantic] =
|
||||||
GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false);
|
GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false);
|
||||||
interfaces.push_back(attr_id);
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Stage::Compute:
|
case LogicalStage::Compute:
|
||||||
workgroup_id = DefineVariable(U32[3], spv::BuiltIn::WorkgroupId, spv::StorageClass::Input);
|
workgroup_id = DefineVariable(U32[3], spv::BuiltIn::WorkgroupId, spv::StorageClass::Input);
|
||||||
local_invocation_id =
|
local_invocation_id =
|
||||||
DefineVariable(U32[3], spv::BuiltIn::LocalInvocationId, spv::StorageClass::Input);
|
DefineVariable(U32[3], spv::BuiltIn::LocalInvocationId, spv::StorageClass::Input);
|
||||||
break;
|
break;
|
||||||
case Stage::Geometry: {
|
case LogicalStage::Geometry: {
|
||||||
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
|
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
|
||||||
const auto gl_per_vertex =
|
const auto gl_per_vertex =
|
||||||
Name(TypeStruct(TypeVector(F32[1], 4), F32[1], TypeArray(F32[1], ConstU32(1u))),
|
Name(TypeStruct(TypeVector(F32[1], 4), F32[1], TypeArray(F32[1], ConstU32(1u))),
|
||||||
@ -379,9 +381,50 @@ void EmitContext::DefineInputs() {
|
|||||||
for (int param_id = 0; param_id < num_params; ++param_id) {
|
for (int param_id = 0; param_id < num_params; ++param_id) {
|
||||||
const Id type{TypeArray(F32[4], ConstU32(num_verts_in))};
|
const Id type{TypeArray(F32[4], ConstU32(num_verts_in))};
|
||||||
const Id id{DefineInput(type, param_id)};
|
const Id id{DefineInput(type, param_id)};
|
||||||
Name(id, fmt::format("in_attr{}", param_id));
|
Name(id, fmt::format("gs_in_attr{}", param_id));
|
||||||
input_params[param_id] = {id, input_f32, F32[1], 4};
|
input_params[param_id] = {id, input_f32, F32[1], 4};
|
||||||
interfaces.push_back(id);
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case LogicalStage::TessellationControl: {
|
||||||
|
invocation_id =
|
||||||
|
DefineVariable(U32[1], spv::BuiltIn::InvocationId, spv::StorageClass::Input);
|
||||||
|
patch_vertices =
|
||||||
|
DefineVariable(U32[1], spv::BuiltIn::PatchVertices, spv::StorageClass::Input);
|
||||||
|
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
|
||||||
|
|
||||||
|
const u32 num_attrs = runtime_info.hs_info.ls_stride >> 4;
|
||||||
|
if (num_attrs > 0) {
|
||||||
|
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
|
||||||
|
// The input vertex count isn't statically known, so make length 32 (what glslang does)
|
||||||
|
const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))};
|
||||||
|
input_attr_array = DefineInput(patch_array_type, 0);
|
||||||
|
Name(input_attr_array, "in_attrs");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case LogicalStage::TessellationEval: {
|
||||||
|
tess_coord = DefineInput(F32[3], std::nullopt, spv::BuiltIn::TessCoord);
|
||||||
|
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
|
||||||
|
|
||||||
|
const u32 num_attrs = runtime_info.vs_info.hs_output_cp_stride >> 4;
|
||||||
|
if (num_attrs > 0) {
|
||||||
|
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
|
||||||
|
// The input vertex count isn't statically known, so make length 32 (what glslang does)
|
||||||
|
const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))};
|
||||||
|
input_attr_array = DefineInput(patch_array_type, 0);
|
||||||
|
Name(input_attr_array, "in_attrs");
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 patch_base_location = runtime_info.vs_info.hs_output_cp_stride >> 4;
|
||||||
|
for (size_t index = 0; index < 30; ++index) {
|
||||||
|
if (!(info.uses_patches & (1U << index))) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const Id id{DefineInput(F32[4], patch_base_location + index)};
|
||||||
|
Decorate(id, spv::Decoration::Patch);
|
||||||
|
Name(id, fmt::format("patch_in{}", index));
|
||||||
|
patches[index] = id;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -391,9 +434,81 @@ void EmitContext::DefineInputs() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void EmitContext::DefineOutputs() {
|
void EmitContext::DefineOutputs() {
|
||||||
switch (stage) {
|
switch (l_stage) {
|
||||||
case Stage::Export:
|
case LogicalStage::Vertex: {
|
||||||
case Stage::Vertex: {
|
// No point in defining builtin outputs (i.e. position) unless next stage is fragment?
|
||||||
|
// Might cause problems linking with tcs
|
||||||
|
|
||||||
|
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
||||||
|
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
|
||||||
|
info.stores.Get(IR::Attribute::Position2) ||
|
||||||
|
info.stores.Get(IR::Attribute::Position3);
|
||||||
|
if (has_extra_pos_stores) {
|
||||||
|
const Id type{TypeArray(F32[1], ConstU32(8U))};
|
||||||
|
clip_distances =
|
||||||
|
DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output);
|
||||||
|
cull_distances =
|
||||||
|
DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output);
|
||||||
|
}
|
||||||
|
if (stage == Shader::Stage::Local && runtime_info.ls_info.links_with_tcs) {
|
||||||
|
const u32 num_attrs = runtime_info.ls_info.ls_stride >> 4;
|
||||||
|
if (num_attrs > 0) {
|
||||||
|
const Id type{TypeArray(F32[4], ConstU32(num_attrs))};
|
||||||
|
output_attr_array = DefineOutput(type, 0);
|
||||||
|
Name(output_attr_array, "out_attrs");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (u32 i = 0; i < IR::NumParams; i++) {
|
||||||
|
const IR::Attribute param{IR::Attribute::Param0 + i};
|
||||||
|
if (!info.stores.GetAny(param)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const u32 num_components = info.stores.NumComponents(param);
|
||||||
|
const Id id{DefineOutput(F32[num_components], i)};
|
||||||
|
Name(id, fmt::format("out_attr{}", i));
|
||||||
|
output_params[i] =
|
||||||
|
GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case LogicalStage::TessellationControl: {
|
||||||
|
if (info.stores_tess_level_outer) {
|
||||||
|
const Id type{TypeArray(F32[1], ConstU32(4U))};
|
||||||
|
output_tess_level_outer =
|
||||||
|
DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelOuter);
|
||||||
|
Decorate(output_tess_level_outer, spv::Decoration::Patch);
|
||||||
|
}
|
||||||
|
if (info.stores_tess_level_inner) {
|
||||||
|
const Id type{TypeArray(F32[1], ConstU32(2U))};
|
||||||
|
output_tess_level_inner =
|
||||||
|
DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelInner);
|
||||||
|
Decorate(output_tess_level_inner, spv::Decoration::Patch);
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 num_attrs = runtime_info.hs_info.hs_output_cp_stride >> 4;
|
||||||
|
if (num_attrs > 0) {
|
||||||
|
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
|
||||||
|
// The input vertex count isn't statically known, so make length 32 (what glslang does)
|
||||||
|
const Id patch_array_type{TypeArray(
|
||||||
|
per_vertex_type, ConstU32(runtime_info.hs_info.NumOutputControlPoints()))};
|
||||||
|
output_attr_array = DefineOutput(patch_array_type, 0);
|
||||||
|
Name(output_attr_array, "out_attrs");
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 patch_base_location = runtime_info.hs_info.hs_output_cp_stride >> 4;
|
||||||
|
for (size_t index = 0; index < 30; ++index) {
|
||||||
|
if (!(info.uses_patches & (1U << index))) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const Id id{DefineOutput(F32[4], patch_base_location + index)};
|
||||||
|
Decorate(id, spv::Decoration::Patch);
|
||||||
|
Name(id, fmt::format("patch_out{}", index));
|
||||||
|
patches[index] = id;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case LogicalStage::TessellationEval: {
|
||||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
||||||
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
|
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
|
||||||
info.stores.Get(IR::Attribute::Position2) ||
|
info.stores.Get(IR::Attribute::Position2) ||
|
||||||
@ -415,11 +530,10 @@ void EmitContext::DefineOutputs() {
|
|||||||
Name(id, fmt::format("out_attr{}", i));
|
Name(id, fmt::format("out_attr{}", i));
|
||||||
output_params[i] =
|
output_params[i] =
|
||||||
GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true);
|
GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true);
|
||||||
interfaces.push_back(id);
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Stage::Fragment:
|
case LogicalStage::Fragment:
|
||||||
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
|
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
|
||||||
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
|
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
|
||||||
if (!info.stores.GetAny(mrt)) {
|
if (!info.stores.GetAny(mrt)) {
|
||||||
@ -431,22 +545,22 @@ void EmitContext::DefineOutputs() {
|
|||||||
const Id id{DefineOutput(type, i)};
|
const Id id{DefineOutput(type, i)};
|
||||||
Name(id, fmt::format("frag_color{}", i));
|
Name(id, fmt::format("frag_color{}", i));
|
||||||
frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true);
|
frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true);
|
||||||
interfaces.push_back(id);
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Stage::Geometry: {
|
case LogicalStage::Geometry: {
|
||||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
||||||
|
|
||||||
for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) {
|
for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) {
|
||||||
const Id id{DefineOutput(F32[4], attr_id)};
|
const Id id{DefineOutput(F32[4], attr_id)};
|
||||||
Name(id, fmt::format("out_attr{}", attr_id));
|
Name(id, fmt::format("out_attr{}", attr_id));
|
||||||
output_params[attr_id] = {id, output_f32, F32[1], 4u};
|
output_params[attr_id] = {id, output_f32, F32[1], 4u};
|
||||||
interfaces.push_back(id);
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case LogicalStage::Compute:
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -582,6 +696,10 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
|
|||||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
|
||||||
return spv::ImageFormat::R32ui;
|
return spv::ImageFormat::R32ui;
|
||||||
}
|
}
|
||||||
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32 &&
|
||||||
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Sint) {
|
||||||
|
return spv::ImageFormat::R32i;
|
||||||
|
}
|
||||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32 &&
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32 &&
|
||||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
||||||
return spv::ImageFormat::R32f;
|
return spv::ImageFormat::R32f;
|
||||||
|
@ -46,14 +46,18 @@ public:
|
|||||||
void DefineBufferOffsets();
|
void DefineBufferOffsets();
|
||||||
void DefineInterpolatedAttribs();
|
void DefineInterpolatedAttribs();
|
||||||
|
|
||||||
[[nodiscard]] Id DefineInput(Id type, u32 location) {
|
[[nodiscard]] Id DefineInput(Id type, std::optional<u32> location = std::nullopt,
|
||||||
const Id input_id{DefineVar(type, spv::StorageClass::Input)};
|
std::optional<spv::BuiltIn> builtin = std::nullopt) {
|
||||||
Decorate(input_id, spv::Decoration::Location, location);
|
const Id input_id{DefineVariable(type, builtin, spv::StorageClass::Input)};
|
||||||
|
if (location) {
|
||||||
|
Decorate(input_id, spv::Decoration::Location, *location);
|
||||||
|
}
|
||||||
return input_id;
|
return input_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] Id DefineOutput(Id type, std::optional<u32> location = std::nullopt) {
|
[[nodiscard]] Id DefineOutput(Id type, std::optional<u32> location = std::nullopt,
|
||||||
const Id output_id{DefineVar(type, spv::StorageClass::Output)};
|
std::optional<spv::BuiltIn> builtin = std::nullopt) {
|
||||||
|
const Id output_id{DefineVariable(type, builtin, spv::StorageClass::Output)};
|
||||||
if (location) {
|
if (location) {
|
||||||
Decorate(output_id, spv::Decoration::Location, *location);
|
Decorate(output_id, spv::Decoration::Location, *location);
|
||||||
}
|
}
|
||||||
@ -131,7 +135,8 @@ public:
|
|||||||
const Info& info;
|
const Info& info;
|
||||||
const RuntimeInfo& runtime_info;
|
const RuntimeInfo& runtime_info;
|
||||||
const Profile& profile;
|
const Profile& profile;
|
||||||
Stage stage{};
|
Stage stage;
|
||||||
|
LogicalStage l_stage{};
|
||||||
|
|
||||||
Id void_id{};
|
Id void_id{};
|
||||||
Id U8{};
|
Id U8{};
|
||||||
@ -148,6 +153,8 @@ public:
|
|||||||
|
|
||||||
Id full_result_i32x2;
|
Id full_result_i32x2;
|
||||||
Id full_result_u32x2;
|
Id full_result_u32x2;
|
||||||
|
Id frexp_result_f32;
|
||||||
|
Id frexp_result_f64;
|
||||||
|
|
||||||
Id pi_x2;
|
Id pi_x2;
|
||||||
|
|
||||||
@ -186,8 +193,15 @@ public:
|
|||||||
Id clip_distances{};
|
Id clip_distances{};
|
||||||
Id cull_distances{};
|
Id cull_distances{};
|
||||||
|
|
||||||
|
Id patch_vertices{};
|
||||||
|
Id output_tess_level_outer{};
|
||||||
|
Id output_tess_level_inner{};
|
||||||
|
Id tess_coord;
|
||||||
|
std::array<Id, 30> patches{};
|
||||||
|
|
||||||
Id workgroup_id{};
|
Id workgroup_id{};
|
||||||
Id local_invocation_id{};
|
Id local_invocation_id{};
|
||||||
|
Id invocation_id{}; // for instanced geoshaders or output vertices within TCS patch
|
||||||
Id subgroup_local_invocation_id{};
|
Id subgroup_local_invocation_id{};
|
||||||
Id image_u32{};
|
Id image_u32{};
|
||||||
|
|
||||||
@ -250,6 +264,8 @@ public:
|
|||||||
bool is_loaded{};
|
bool is_loaded{};
|
||||||
s32 buffer_handle{-1};
|
s32 buffer_handle{-1};
|
||||||
};
|
};
|
||||||
|
Id input_attr_array;
|
||||||
|
Id output_attr_array;
|
||||||
std::array<SpirvAttribute, IR::NumParams> input_params{};
|
std::array<SpirvAttribute, IR::NumParams> input_params{};
|
||||||
std::array<SpirvAttribute, IR::NumParams> output_params{};
|
std::array<SpirvAttribute, IR::NumParams> output_params{};
|
||||||
std::array<SpirvAttribute, IR::NumRenderTargets> frag_outputs{};
|
std::array<SpirvAttribute, IR::NumRenderTargets> frag_outputs{};
|
||||||
|
@ -80,6 +80,8 @@ void CFG::EmitLabels() {
|
|||||||
if (inst.IsUnconditionalBranch()) {
|
if (inst.IsUnconditionalBranch()) {
|
||||||
const u32 target = inst.BranchTarget(pc);
|
const u32 target = inst.BranchTarget(pc);
|
||||||
AddLabel(target);
|
AddLabel(target);
|
||||||
|
// Emit this label so that the block ends with s_branch instruction
|
||||||
|
AddLabel(pc + inst.length);
|
||||||
} else if (inst.IsConditionalBranch()) {
|
} else if (inst.IsConditionalBranch()) {
|
||||||
const u32 true_label = inst.BranchTarget(pc);
|
const u32 true_label = inst.BranchTarget(pc);
|
||||||
const u32 false_label = pc + inst.length;
|
const u32 false_label = pc + inst.length;
|
||||||
|
38
src/shader_recompiler/frontend/tessellation.h
Normal file
38
src/shader_recompiler/frontend/tessellation.h
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "common/types.h"
|
||||||
|
|
||||||
|
namespace Shader {
|
||||||
|
|
||||||
|
struct TessellationDataConstantBuffer {
|
||||||
|
u32 ls_stride;
|
||||||
|
u32 hs_cp_stride; // HullStateConstants::m_cpStride != 0 ? HullStateConstants::m_cpStride :
|
||||||
|
// ls_stride
|
||||||
|
u32 num_patches; // num patches submitted in threadgroup
|
||||||
|
u32 hs_output_base; // HullStateConstants::m_numInputCP::m_cpStride != 0 ?
|
||||||
|
// HullStateConstants::m_numInputCP * ls_stride * num_patches : 0
|
||||||
|
// basically 0 when passthrough
|
||||||
|
u32 patch_const_size; // 16 * num_patch_attrs
|
||||||
|
u32 patch_const_base; // hs_output_base + patch_output_size
|
||||||
|
u32 patch_output_size; // output_cp_stride * num_output_cp_per_patch
|
||||||
|
f32 off_chip_tessellation_factor_threshold;
|
||||||
|
u32 first_edge_tess_factor_index;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Assign names to dword fields of TessellationDataConstantBuffer
|
||||||
|
enum class TessConstantAttribute : u32 {
|
||||||
|
LsStride,
|
||||||
|
HsCpStride,
|
||||||
|
HsNumPatch,
|
||||||
|
HsOutputBase,
|
||||||
|
PatchConstSize,
|
||||||
|
PatchConstBase,
|
||||||
|
PatchOutputSize,
|
||||||
|
OffChipTessellationFactorThreshold,
|
||||||
|
FirstEdgeTessFactorIndex,
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Shader
|
@ -1,8 +1,8 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include "shader_recompiler/frontend/translate/translate.h"
|
#include "shader_recompiler/frontend/translate/translate.h"
|
||||||
#include "shader_recompiler/ir/reg.h"
|
#include "shader_recompiler/ir/reg.h"
|
||||||
|
#include "shader_recompiler/runtime_info.h"
|
||||||
|
|
||||||
namespace Shader::Gcn {
|
namespace Shader::Gcn {
|
||||||
|
|
||||||
@ -73,10 +73,11 @@ void Translator::EmitDataShare(const GcnInst& inst) {
|
|||||||
void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {
|
void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {
|
||||||
const IR::U32 value{GetSrc(inst.src[0])};
|
const IR::U32 value{GetSrc(inst.src[0])};
|
||||||
|
|
||||||
if (info.stage != Stage::Compute) {
|
if (info.l_stage == LogicalStage::Compute ||
|
||||||
SetDst(inst.dst[0], value);
|
info.l_stage == LogicalStage::TessellationControl) {
|
||||||
} else {
|
|
||||||
SetDst(inst.dst[0], ir.ReadFirstLane(value));
|
SetDst(inst.dst[0], ir.ReadFirstLane(value));
|
||||||
|
} else {
|
||||||
|
SetDst(inst.dst[0], value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13,6 +13,11 @@ void Translator::EmitExport(const GcnInst& inst) {
|
|||||||
|
|
||||||
const auto& exp = inst.control.exp;
|
const auto& exp = inst.control.exp;
|
||||||
const IR::Attribute attrib{exp.target};
|
const IR::Attribute attrib{exp.target};
|
||||||
|
if (attrib == IR::Attribute::Depth && exp.en != 0 && exp.en != 1) {
|
||||||
|
LOG_WARNING(Render_Vulkan, "Unsupported depth export");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const std::array vsrc = {
|
const std::array vsrc = {
|
||||||
IR::VectorReg(inst.src[0].code),
|
IR::VectorReg(inst.src[0].code),
|
||||||
IR::VectorReg(inst.src[1].code),
|
IR::VectorReg(inst.src[1].code),
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include <bit>
|
||||||
|
#include "common/assert.h"
|
||||||
#include "shader_recompiler/frontend/translate/translate.h"
|
#include "shader_recompiler/frontend/translate/translate.h"
|
||||||
|
|
||||||
namespace Shader::Gcn {
|
namespace Shader::Gcn {
|
||||||
@ -78,8 +80,10 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
|
|||||||
return S_BFM_B32(inst);
|
return S_BFM_B32(inst);
|
||||||
case Opcode::S_MUL_I32:
|
case Opcode::S_MUL_I32:
|
||||||
return S_MUL_I32(inst);
|
return S_MUL_I32(inst);
|
||||||
|
case Opcode::S_BFE_I32:
|
||||||
|
return S_BFE(inst, true);
|
||||||
case Opcode::S_BFE_U32:
|
case Opcode::S_BFE_U32:
|
||||||
return S_BFE_U32(inst);
|
return S_BFE(inst, false);
|
||||||
case Opcode::S_ABSDIFF_I32:
|
case Opcode::S_ABSDIFF_I32:
|
||||||
return S_ABSDIFF_I32(inst);
|
return S_ABSDIFF_I32(inst);
|
||||||
|
|
||||||
@ -94,8 +98,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
|
|||||||
break;
|
break;
|
||||||
case Opcode::S_BREV_B32:
|
case Opcode::S_BREV_B32:
|
||||||
return S_BREV_B32(inst);
|
return S_BREV_B32(inst);
|
||||||
case Opcode::S_BCNT1_I32_B64:
|
case Opcode::S_BCNT1_I32_B32:
|
||||||
return S_BCNT1_I32_B64(inst);
|
return S_BCNT1_I32_B32(inst);
|
||||||
case Opcode::S_FF1_I32_B32:
|
case Opcode::S_FF1_I32_B32:
|
||||||
return S_FF1_I32_B32(inst);
|
return S_FF1_I32_B32(inst);
|
||||||
case Opcode::S_AND_SAVEEXEC_B64:
|
case Opcode::S_AND_SAVEEXEC_B64:
|
||||||
@ -157,8 +161,9 @@ void Translator::EmitSOPK(const GcnInst& inst) {
|
|||||||
switch (inst.opcode) {
|
switch (inst.opcode) {
|
||||||
// SOPK
|
// SOPK
|
||||||
case Opcode::S_MOVK_I32:
|
case Opcode::S_MOVK_I32:
|
||||||
return S_MOVK(inst);
|
return S_MOVK(inst, false);
|
||||||
|
case Opcode::S_CMOVK_I32:
|
||||||
|
return S_MOVK(inst, true);
|
||||||
case Opcode::S_CMPK_EQ_I32:
|
case Opcode::S_CMPK_EQ_I32:
|
||||||
return S_CMPK(ConditionOp::EQ, true, inst);
|
return S_CMPK(ConditionOp::EQ, true, inst);
|
||||||
case Opcode::S_CMPK_LG_I32:
|
case Opcode::S_CMPK_LG_I32:
|
||||||
@ -434,12 +439,12 @@ void Translator::S_MUL_I32(const GcnInst& inst) {
|
|||||||
SetDst(inst.dst[0], ir.IMul(GetSrc(inst.src[0]), GetSrc(inst.src[1])));
|
SetDst(inst.dst[0], ir.IMul(GetSrc(inst.src[0]), GetSrc(inst.src[1])));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::S_BFE_U32(const GcnInst& inst) {
|
void Translator::S_BFE(const GcnInst& inst, bool is_signed) {
|
||||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||||
const IR::U32 offset{ir.BitwiseAnd(src1, ir.Imm32(0x1F))};
|
const IR::U32 offset{ir.BitwiseAnd(src1, ir.Imm32(0x1F))};
|
||||||
const IR::U32 count{ir.BitFieldExtract(src1, ir.Imm32(16), ir.Imm32(7))};
|
const IR::U32 count{ir.BitFieldExtract(src1, ir.Imm32(16), ir.Imm32(7))};
|
||||||
const IR::U32 result{ir.BitFieldExtract(src0, offset, count)};
|
const IR::U32 result{ir.BitFieldExtract(src0, offset, count, is_signed)};
|
||||||
SetDst(inst.dst[0], result);
|
SetDst(inst.dst[0], result);
|
||||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||||
}
|
}
|
||||||
@ -454,13 +459,16 @@ void Translator::S_ABSDIFF_I32(const GcnInst& inst) {
|
|||||||
|
|
||||||
// SOPK
|
// SOPK
|
||||||
|
|
||||||
void Translator::S_MOVK(const GcnInst& inst) {
|
void Translator::S_MOVK(const GcnInst& inst, bool is_conditional) {
|
||||||
const auto simm16 = inst.control.sopk.simm;
|
const s16 simm16 = inst.control.sopk.simm;
|
||||||
if (simm16 & (1 << 15)) {
|
// do the sign extension
|
||||||
// TODO: need to verify the case of imm sign extension
|
const s32 simm32 = static_cast<s32>(simm16);
|
||||||
UNREACHABLE();
|
IR::U32 val = ir.Imm32(simm32);
|
||||||
|
if (is_conditional) {
|
||||||
|
// if !SCC its a NOP
|
||||||
|
val = IR::U32{ir.Select(ir.GetScc(), val, GetSrc(inst.dst[0]))};
|
||||||
}
|
}
|
||||||
SetDst(inst.dst[0], ir.Imm32(simm16));
|
SetDst(inst.dst[0], val);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst) {
|
void Translator::S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst) {
|
||||||
@ -571,7 +579,7 @@ void Translator::S_BREV_B32(const GcnInst& inst) {
|
|||||||
SetDst(inst.dst[0], ir.BitReverse(GetSrc(inst.src[0])));
|
SetDst(inst.dst[0], ir.BitReverse(GetSrc(inst.src[0])));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::S_BCNT1_I32_B64(const GcnInst& inst) {
|
void Translator::S_BCNT1_I32_B32(const GcnInst& inst) {
|
||||||
const IR::U32 result = ir.BitCount(GetSrc(inst.src[0]));
|
const IR::U32 result = ir.BitCount(GetSrc(inst.src[0]));
|
||||||
SetDst(inst.dst[0], result);
|
SetDst(inst.dst[0], result);
|
||||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||||
@ -594,6 +602,8 @@ void Translator::S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& in
|
|||||||
return ir.GetVcc();
|
return ir.GetVcc();
|
||||||
case OperandField::ScalarGPR:
|
case OperandField::ScalarGPR:
|
||||||
return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code));
|
return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code));
|
||||||
|
case OperandField::ExecLo:
|
||||||
|
return ir.GetExec();
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
@ -8,6 +8,8 @@
|
|||||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||||
#include "shader_recompiler/frontend/translate/translate.h"
|
#include "shader_recompiler/frontend/translate/translate.h"
|
||||||
#include "shader_recompiler/info.h"
|
#include "shader_recompiler/info.h"
|
||||||
|
#include "shader_recompiler/ir/attribute.h"
|
||||||
|
#include "shader_recompiler/ir/reg.h"
|
||||||
#include "shader_recompiler/runtime_info.h"
|
#include "shader_recompiler/runtime_info.h"
|
||||||
#include "video_core/amdgpu/resource.h"
|
#include "video_core/amdgpu/resource.h"
|
||||||
#include "video_core/amdgpu/types.h"
|
#include "video_core/amdgpu/types.h"
|
||||||
@ -34,9 +36,8 @@ void Translator::EmitPrologue() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
IR::VectorReg dst_vreg = IR::VectorReg::V0;
|
IR::VectorReg dst_vreg = IR::VectorReg::V0;
|
||||||
switch (info.stage) {
|
switch (info.l_stage) {
|
||||||
case Stage::Vertex:
|
case LogicalStage::Vertex:
|
||||||
case Stage::Export:
|
|
||||||
// v0: vertex ID, always present
|
// v0: vertex ID, always present
|
||||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
|
||||||
// v1: instance ID, step rate 0
|
// v1: instance ID, step rate 0
|
||||||
@ -52,7 +53,7 @@ void Translator::EmitPrologue() {
|
|||||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId));
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Stage::Fragment:
|
case LogicalStage::Fragment:
|
||||||
dst_vreg = IR::VectorReg::V0;
|
dst_vreg = IR::VectorReg::V0;
|
||||||
if (runtime_info.fs_info.addr_flags.persp_sample_ena) {
|
if (runtime_info.fs_info.addr_flags.persp_sample_ena) {
|
||||||
++dst_vreg; // I
|
++dst_vreg; // I
|
||||||
@ -122,7 +123,30 @@ void Translator::EmitPrologue() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Stage::Compute:
|
case LogicalStage::TessellationControl: {
|
||||||
|
// Should be laid out like:
|
||||||
|
// [0:8]: patch id within VGT
|
||||||
|
// [8:12]: output control point id
|
||||||
|
ir.SetVectorReg(IR::VectorReg::V1,
|
||||||
|
ir.GetAttributeU32(IR::Attribute::PackedHullInvocationInfo));
|
||||||
|
// TODO PrimitiveId is probably V2 but haven't seen it yet
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case LogicalStage::TessellationEval:
|
||||||
|
ir.SetVectorReg(IR::VectorReg::V0,
|
||||||
|
ir.GetAttribute(IR::Attribute::TessellationEvaluationPointU));
|
||||||
|
ir.SetVectorReg(IR::VectorReg::V1,
|
||||||
|
ir.GetAttribute(IR::Attribute::TessellationEvaluationPointV));
|
||||||
|
// V2 is similar to PrimitiveID but not the same. It seems to only be used in
|
||||||
|
// compiler-generated address calculations. Its probably the patch id within the
|
||||||
|
// patches running locally on a given VGT (or CU, whichever is the granularity of LDS
|
||||||
|
// memory)
|
||||||
|
// Set to 0. See explanation in comment describing hull/domain passes
|
||||||
|
ir.SetVectorReg(IR::VectorReg::V2, ir.Imm32(0u));
|
||||||
|
// V3 is the actual PrimitiveID as intended by the shader author.
|
||||||
|
ir.SetVectorReg(IR::VectorReg::V3, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
|
||||||
|
break;
|
||||||
|
case LogicalStage::Compute:
|
||||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 0));
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 0));
|
||||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1));
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1));
|
||||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2));
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2));
|
||||||
@ -137,7 +161,7 @@ void Translator::EmitPrologue() {
|
|||||||
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2));
|
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Stage::Geometry:
|
case LogicalStage::Geometry:
|
||||||
switch (runtime_info.gs_info.out_primitive[0]) {
|
switch (runtime_info.gs_info.out_primitive[0]) {
|
||||||
case AmdGpu::GsOutputPrimitiveType::TriangleStrip:
|
case AmdGpu::GsOutputPrimitiveType::TriangleStrip:
|
||||||
ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2
|
ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2
|
||||||
@ -152,7 +176,7 @@ void Translator::EmitPrologue() {
|
|||||||
ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
|
ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw NotImplementedException("Unknown shader stage");
|
UNREACHABLE_MSG("Unknown shader stage");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -415,7 +439,8 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra
|
|||||||
ir.SetVectorReg(IR::VectorReg(operand.code + 1), hi);
|
ir.SetVectorReg(IR::VectorReg(operand.code + 1), hi);
|
||||||
return ir.SetVectorReg(IR::VectorReg(operand.code), lo);
|
return ir.SetVectorReg(IR::VectorReg(operand.code), lo);
|
||||||
case OperandField::VccLo:
|
case OperandField::VccLo:
|
||||||
UNREACHABLE();
|
ir.SetVccLo(lo);
|
||||||
|
return ir.SetVccHi(hi);
|
||||||
case OperandField::VccHi:
|
case OperandField::VccHi:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
case OperandField::M0:
|
case OperandField::M0:
|
||||||
@ -503,7 +528,8 @@ void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Inf
|
|||||||
|
|
||||||
// Special case for emitting fetch shader.
|
// Special case for emitting fetch shader.
|
||||||
if (inst.opcode == Opcode::S_SWAPPC_B64) {
|
if (inst.opcode == Opcode::S_SWAPPC_B64) {
|
||||||
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export);
|
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export ||
|
||||||
|
info.stage == Stage::Local);
|
||||||
translator.EmitFetch(inst);
|
translator.EmitFetch(inst);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -94,12 +94,13 @@ public:
|
|||||||
void S_ASHR_I32(const GcnInst& inst);
|
void S_ASHR_I32(const GcnInst& inst);
|
||||||
void S_BFM_B32(const GcnInst& inst);
|
void S_BFM_B32(const GcnInst& inst);
|
||||||
void S_MUL_I32(const GcnInst& inst);
|
void S_MUL_I32(const GcnInst& inst);
|
||||||
void S_BFE_U32(const GcnInst& inst);
|
void S_BFE(const GcnInst& inst, bool is_signed);
|
||||||
|
void S_BFE_I32(const GcnInst& inst);
|
||||||
void S_ABSDIFF_I32(const GcnInst& inst);
|
void S_ABSDIFF_I32(const GcnInst& inst);
|
||||||
void S_NOT_B32(const GcnInst& inst);
|
void S_NOT_B32(const GcnInst& inst);
|
||||||
|
|
||||||
// SOPK
|
// SOPK
|
||||||
void S_MOVK(const GcnInst& inst);
|
void S_MOVK(const GcnInst& inst, bool is_conditional);
|
||||||
void S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst);
|
void S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst);
|
||||||
void S_ADDK_I32(const GcnInst& inst);
|
void S_ADDK_I32(const GcnInst& inst);
|
||||||
void S_MULK_I32(const GcnInst& inst);
|
void S_MULK_I32(const GcnInst& inst);
|
||||||
@ -109,7 +110,7 @@ public:
|
|||||||
void S_MOV_B64(const GcnInst& inst);
|
void S_MOV_B64(const GcnInst& inst);
|
||||||
void S_NOT_B64(const GcnInst& inst);
|
void S_NOT_B64(const GcnInst& inst);
|
||||||
void S_BREV_B32(const GcnInst& inst);
|
void S_BREV_B32(const GcnInst& inst);
|
||||||
void S_BCNT1_I32_B64(const GcnInst& inst);
|
void S_BCNT1_I32_B32(const GcnInst& inst);
|
||||||
void S_FF1_I32_B32(const GcnInst& inst);
|
void S_FF1_I32_B32(const GcnInst& inst);
|
||||||
void S_GETPC_B64(u32 pc, const GcnInst& inst);
|
void S_GETPC_B64(u32 pc, const GcnInst& inst);
|
||||||
void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst);
|
void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst);
|
||||||
@ -200,6 +201,11 @@ public:
|
|||||||
void V_BFREV_B32(const GcnInst& inst);
|
void V_BFREV_B32(const GcnInst& inst);
|
||||||
void V_FFBH_U32(const GcnInst& inst);
|
void V_FFBH_U32(const GcnInst& inst);
|
||||||
void V_FFBL_B32(const GcnInst& inst);
|
void V_FFBL_B32(const GcnInst& inst);
|
||||||
|
void V_FREXP_EXP_I32_F64(const GcnInst& inst);
|
||||||
|
void V_FREXP_MANT_F64(const GcnInst& inst);
|
||||||
|
void V_FRACT_F64(const GcnInst& inst);
|
||||||
|
void V_FREXP_EXP_I32_F32(const GcnInst& inst);
|
||||||
|
void V_FREXP_MANT_F32(const GcnInst& inst);
|
||||||
void V_MOVRELD_B32(const GcnInst& inst);
|
void V_MOVRELD_B32(const GcnInst& inst);
|
||||||
void V_MOVRELS_B32(const GcnInst& inst);
|
void V_MOVRELS_B32(const GcnInst& inst);
|
||||||
void V_MOVRELSD_B32(const GcnInst& inst);
|
void V_MOVRELSD_B32(const GcnInst& inst);
|
||||||
@ -212,7 +218,7 @@ public:
|
|||||||
|
|
||||||
// VOP3a
|
// VOP3a
|
||||||
void V_MAD_F32(const GcnInst& inst);
|
void V_MAD_F32(const GcnInst& inst);
|
||||||
void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = false);
|
void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = true);
|
||||||
void V_MAD_U32_U24(const GcnInst& inst);
|
void V_MAD_U32_U24(const GcnInst& inst);
|
||||||
void V_CUBEID_F32(const GcnInst& inst);
|
void V_CUBEID_F32(const GcnInst& inst);
|
||||||
void V_CUBESC_F32(const GcnInst& inst);
|
void V_CUBESC_F32(const GcnInst& inst);
|
||||||
@ -271,7 +277,7 @@ public:
|
|||||||
// Image Memory
|
// Image Memory
|
||||||
// MIMG
|
// MIMG
|
||||||
void IMAGE_LOAD(bool has_mip, const GcnInst& inst);
|
void IMAGE_LOAD(bool has_mip, const GcnInst& inst);
|
||||||
void IMAGE_STORE(const GcnInst& inst);
|
void IMAGE_STORE(bool has_mip, const GcnInst& inst);
|
||||||
void IMAGE_GET_RESINFO(const GcnInst& inst);
|
void IMAGE_GET_RESINFO(const GcnInst& inst);
|
||||||
void IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst);
|
void IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst);
|
||||||
void IMAGE_SAMPLE(const GcnInst& inst);
|
void IMAGE_SAMPLE(const GcnInst& inst);
|
||||||
|
@ -179,6 +179,16 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
|||||||
return V_FFBH_U32(inst);
|
return V_FFBH_U32(inst);
|
||||||
case Opcode::V_FFBL_B32:
|
case Opcode::V_FFBL_B32:
|
||||||
return V_FFBL_B32(inst);
|
return V_FFBL_B32(inst);
|
||||||
|
case Opcode::V_FREXP_EXP_I32_F64:
|
||||||
|
return V_FREXP_EXP_I32_F64(inst);
|
||||||
|
case Opcode::V_FREXP_MANT_F64:
|
||||||
|
return V_FREXP_MANT_F64(inst);
|
||||||
|
case Opcode::V_FRACT_F64:
|
||||||
|
return V_FRACT_F64(inst);
|
||||||
|
case Opcode::V_FREXP_EXP_I32_F32:
|
||||||
|
return V_FREXP_EXP_I32_F32(inst);
|
||||||
|
case Opcode::V_FREXP_MANT_F32:
|
||||||
|
return V_FREXP_MANT_F32(inst);
|
||||||
case Opcode::V_MOVRELD_B32:
|
case Opcode::V_MOVRELD_B32:
|
||||||
return V_MOVRELD_B32(inst);
|
return V_MOVRELD_B32(inst);
|
||||||
case Opcode::V_MOVRELS_B32:
|
case Opcode::V_MOVRELS_B32:
|
||||||
@ -733,7 +743,7 @@ void Translator::V_CVT_F32_UBYTE(u32 index, const GcnInst& inst) {
|
|||||||
|
|
||||||
void Translator::V_FRACT_F32(const GcnInst& inst) {
|
void Translator::V_FRACT_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
SetDst(inst.dst[0], ir.Fract(src0));
|
SetDst(inst.dst[0], ir.FPFract(src0));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_TRUNC_F32(const GcnInst& inst) {
|
void Translator::V_TRUNC_F32(const GcnInst& inst) {
|
||||||
@ -822,6 +832,31 @@ void Translator::V_FFBL_B32(const GcnInst& inst) {
|
|||||||
SetDst(inst.dst[0], ir.FindILsb(src0));
|
SetDst(inst.dst[0], ir.FindILsb(src0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::V_FREXP_EXP_I32_F64(const GcnInst& inst) {
|
||||||
|
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
|
||||||
|
SetDst(inst.dst[0], ir.FPFrexpExp(src0));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Translator::V_FREXP_MANT_F64(const GcnInst& inst) {
|
||||||
|
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
|
||||||
|
SetDst64(inst.dst[0], ir.FPFrexpSig(src0));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Translator::V_FRACT_F64(const GcnInst& inst) {
|
||||||
|
const IR::F32 src0{GetSrc64<IR::F64>(inst.src[0])};
|
||||||
|
SetDst64(inst.dst[0], ir.FPFract(src0));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Translator::V_FREXP_EXP_I32_F32(const GcnInst& inst) {
|
||||||
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
|
SetDst(inst.dst[0], ir.FPFrexpExp(src0));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Translator::V_FREXP_MANT_F32(const GcnInst& inst) {
|
||||||
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
|
SetDst(inst.dst[0], ir.FPFrexpSig(src0));
|
||||||
|
}
|
||||||
|
|
||||||
void Translator::V_MOVRELD_B32(const GcnInst& inst) {
|
void Translator::V_MOVRELD_B32(const GcnInst& inst) {
|
||||||
const IR::U32 src_val{GetSrc(inst.src[0])};
|
const IR::U32 src_val{GetSrc(inst.src[0])};
|
||||||
u32 dst_vgprno = inst.dst[0].code - static_cast<u32>(IR::VectorReg::V0);
|
u32 dst_vgprno = inst.dst[0].code - static_cast<u32>(IR::VectorReg::V0);
|
||||||
@ -1025,8 +1060,14 @@ void Translator::V_CUBEMA_F32(const GcnInst& inst) {
|
|||||||
|
|
||||||
void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) {
|
void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) {
|
||||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||||
const IR::U32 src1{ir.BitwiseAnd(GetSrc(inst.src[1]), ir.Imm32(0x1F))};
|
IR::U32 src1{GetSrc(inst.src[1])};
|
||||||
const IR::U32 src2{ir.BitwiseAnd(GetSrc(inst.src[2]), ir.Imm32(0x1F))};
|
IR::U32 src2{GetSrc(inst.src[2])};
|
||||||
|
if (!src1.IsImmediate()) {
|
||||||
|
src1 = ir.BitwiseAnd(src1, ir.Imm32(0x1F));
|
||||||
|
}
|
||||||
|
if (!src2.IsImmediate()) {
|
||||||
|
src2 = ir.BitwiseAnd(src2, ir.Imm32(0x1F));
|
||||||
|
}
|
||||||
SetDst(inst.dst[0], ir.BitFieldExtract(src0, src1, src2, is_signed));
|
SetDst(inst.dst[0], ir.BitFieldExtract(src0, src1, src2, is_signed));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -98,7 +98,9 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
|||||||
|
|
||||||
// Buffer store operations
|
// Buffer store operations
|
||||||
case Opcode::IMAGE_STORE:
|
case Opcode::IMAGE_STORE:
|
||||||
return IMAGE_STORE(inst);
|
return IMAGE_STORE(false, inst);
|
||||||
|
case Opcode::IMAGE_STORE_MIP:
|
||||||
|
return IMAGE_STORE(true, inst);
|
||||||
|
|
||||||
// Image misc operations
|
// Image misc operations
|
||||||
case Opcode::IMAGE_GET_RESINFO:
|
case Opcode::IMAGE_GET_RESINFO:
|
||||||
@ -187,7 +189,8 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst)
|
|||||||
buffer_info.index_enable.Assign(mtbuf.idxen);
|
buffer_info.index_enable.Assign(mtbuf.idxen);
|
||||||
buffer_info.offset_enable.Assign(mtbuf.offen);
|
buffer_info.offset_enable.Assign(mtbuf.offen);
|
||||||
buffer_info.inst_offset.Assign(mtbuf.offset);
|
buffer_info.inst_offset.Assign(mtbuf.offset);
|
||||||
buffer_info.ring_access.Assign(is_ring);
|
buffer_info.globally_coherent.Assign(mtbuf.glc);
|
||||||
|
buffer_info.system_coherent.Assign(mtbuf.slc);
|
||||||
if (is_typed) {
|
if (is_typed) {
|
||||||
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
|
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
|
||||||
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
|
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
|
||||||
@ -245,11 +248,15 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
|
|||||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||||
|
|
||||||
if (info.stage != Stage::Export && info.stage != Stage::Geometry) {
|
if (info.stage != Stage::Export && info.stage != Stage::Hull && info.stage != Stage::Geometry) {
|
||||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0,
|
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0,
|
||||||
"Non immediate offset not supported");
|
"Non immediate offset not supported");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (info.stage == Stage::Hull) {
|
||||||
|
// printf("here\n"); // break
|
||||||
|
}
|
||||||
|
|
||||||
IR::Value address = [&] -> IR::Value {
|
IR::Value address = [&] -> IR::Value {
|
||||||
if (is_ring) {
|
if (is_ring) {
|
||||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset);
|
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset);
|
||||||
@ -267,7 +274,8 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
|
|||||||
buffer_info.index_enable.Assign(mtbuf.idxen);
|
buffer_info.index_enable.Assign(mtbuf.idxen);
|
||||||
buffer_info.offset_enable.Assign(mtbuf.offen);
|
buffer_info.offset_enable.Assign(mtbuf.offen);
|
||||||
buffer_info.inst_offset.Assign(mtbuf.offset);
|
buffer_info.inst_offset.Assign(mtbuf.offset);
|
||||||
buffer_info.ring_access.Assign(is_ring);
|
buffer_info.globally_coherent.Assign(mtbuf.glc);
|
||||||
|
buffer_info.system_coherent.Assign(mtbuf.slc);
|
||||||
if (is_typed) {
|
if (is_typed) {
|
||||||
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
|
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
|
||||||
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
|
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
|
||||||
@ -423,7 +431,7 @@ void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::IMAGE_STORE(const GcnInst& inst) {
|
void Translator::IMAGE_STORE(bool has_mip, const GcnInst& inst) {
|
||||||
const auto& mimg = inst.control.mimg;
|
const auto& mimg = inst.control.mimg;
|
||||||
IR::VectorReg addr_reg{inst.src[0].code};
|
IR::VectorReg addr_reg{inst.src[0].code};
|
||||||
IR::VectorReg data_reg{inst.dst[0].code};
|
IR::VectorReg data_reg{inst.dst[0].code};
|
||||||
@ -434,6 +442,9 @@ void Translator::IMAGE_STORE(const GcnInst& inst) {
|
|||||||
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
|
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
|
||||||
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
|
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
|
||||||
|
|
||||||
|
IR::TextureInstInfo info{};
|
||||||
|
info.has_lod.Assign(has_mip);
|
||||||
|
|
||||||
boost::container::static_vector<IR::F32, 4> comps;
|
boost::container::static_vector<IR::F32, 4> comps;
|
||||||
for (u32 i = 0; i < 4; i++) {
|
for (u32 i = 0; i < 4; i++) {
|
||||||
if (((mimg.dmask >> i) & 1) == 0) {
|
if (((mimg.dmask >> i) & 1) == 0) {
|
||||||
@ -443,7 +454,7 @@ void Translator::IMAGE_STORE(const GcnInst& inst) {
|
|||||||
comps.push_back(ir.GetVectorReg<IR::F32>(data_reg++));
|
comps.push_back(ir.GetVectorReg<IR::F32>(data_reg++));
|
||||||
}
|
}
|
||||||
const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]);
|
const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]);
|
||||||
ir.ImageWrite(handle, body, value, {});
|
ir.ImageWrite(handle, body, {}, value, info);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) {
|
void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) {
|
||||||
@ -527,6 +538,7 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal
|
|||||||
info.has_offset.Assign(flags.test(MimgModifier::Offset));
|
info.has_offset.Assign(flags.test(MimgModifier::Offset));
|
||||||
info.has_lod.Assign(flags.any(MimgModifier::Lod));
|
info.has_lod.Assign(flags.any(MimgModifier::Lod));
|
||||||
info.is_array.Assign(mimg.da);
|
info.is_array.Assign(mimg.da);
|
||||||
|
info.is_unnormalized.Assign(mimg.unrm);
|
||||||
|
|
||||||
if (gather) {
|
if (gather) {
|
||||||
info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1);
|
info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1);
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "shader_recompiler/backend/bindings.h"
|
#include "shader_recompiler/backend/bindings.h"
|
||||||
#include "shader_recompiler/frontend/copy_shader.h"
|
#include "shader_recompiler/frontend/copy_shader.h"
|
||||||
|
#include "shader_recompiler/frontend/tessellation.h"
|
||||||
#include "shader_recompiler/ir/attribute.h"
|
#include "shader_recompiler/ir/attribute.h"
|
||||||
#include "shader_recompiler/ir/passes/srt.h"
|
#include "shader_recompiler/ir/passes/srt.h"
|
||||||
#include "shader_recompiler/ir/reg.h"
|
#include "shader_recompiler/ir/reg.h"
|
||||||
@ -163,6 +164,7 @@ struct Info {
|
|||||||
UserDataMask ud_mask{};
|
UserDataMask ud_mask{};
|
||||||
|
|
||||||
CopyShaderData gs_copy_data;
|
CopyShaderData gs_copy_data;
|
||||||
|
u32 uses_patches{};
|
||||||
|
|
||||||
BufferResourceList buffers;
|
BufferResourceList buffers;
|
||||||
TextureBufferResourceList texture_buffers;
|
TextureBufferResourceList texture_buffers;
|
||||||
@ -173,8 +175,12 @@ struct Info {
|
|||||||
PersistentSrtInfo srt_info;
|
PersistentSrtInfo srt_info;
|
||||||
std::vector<u32> flattened_ud_buf;
|
std::vector<u32> flattened_ud_buf;
|
||||||
|
|
||||||
|
IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max;
|
||||||
|
s32 tess_consts_dword_offset = -1;
|
||||||
|
|
||||||
std::span<const u32> user_data;
|
std::span<const u32> user_data;
|
||||||
Stage stage;
|
Stage stage;
|
||||||
|
LogicalStage l_stage;
|
||||||
|
|
||||||
u64 pgm_hash{};
|
u64 pgm_hash{};
|
||||||
VAddr pgm_base;
|
VAddr pgm_base;
|
||||||
@ -190,14 +196,16 @@ struct Info {
|
|||||||
bool uses_shared{};
|
bool uses_shared{};
|
||||||
bool uses_fp16{};
|
bool uses_fp16{};
|
||||||
bool uses_fp64{};
|
bool uses_fp64{};
|
||||||
|
bool stores_tess_level_outer{};
|
||||||
|
bool stores_tess_level_inner{};
|
||||||
bool translation_failed{}; // indicates that shader has unsupported instructions
|
bool translation_failed{}; // indicates that shader has unsupported instructions
|
||||||
bool has_readconst{};
|
bool has_readconst{};
|
||||||
u8 mrt_mask{0u};
|
u8 mrt_mask{0u};
|
||||||
bool has_fetch_shader{false};
|
bool has_fetch_shader{false};
|
||||||
u32 fetch_shader_sgpr_base{0u};
|
u32 fetch_shader_sgpr_base{0u};
|
||||||
|
|
||||||
explicit Info(Stage stage_, ShaderParams params)
|
explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params)
|
||||||
: stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
|
: stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
|
||||||
user_data{params.user_data} {}
|
user_data{params.user_data} {}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -244,6 +252,16 @@ struct Info {
|
|||||||
srt_info.walker_func(user_data.data(), flattened_ud_buf.data());
|
srt_info.walker_func(user_data.data(), flattened_ud_buf.data());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ReadTessConstantBuffer(TessellationDataConstantBuffer& tess_constants) const {
|
||||||
|
ASSERT(tess_consts_dword_offset >= 0); // We've already tracked the V# UD
|
||||||
|
auto buf = ReadUdReg<AmdGpu::Buffer>(static_cast<u32>(tess_consts_ptr_base),
|
||||||
|
static_cast<u32>(tess_consts_dword_offset));
|
||||||
|
VAddr tess_constants_addr = buf.base_address;
|
||||||
|
memcpy(&tess_constants,
|
||||||
|
reinterpret_cast<TessellationDataConstantBuffer*>(tess_constants_addr),
|
||||||
|
sizeof(tess_constants));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {
|
constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {
|
||||||
|
@ -104,6 +104,8 @@ std::string NameOf(Attribute attribute) {
|
|||||||
return "VertexId";
|
return "VertexId";
|
||||||
case Attribute::InstanceId:
|
case Attribute::InstanceId:
|
||||||
return "InstanceId";
|
return "InstanceId";
|
||||||
|
case Attribute::PrimitiveId:
|
||||||
|
return "PrimitiveId";
|
||||||
case Attribute::FragCoord:
|
case Attribute::FragCoord:
|
||||||
return "FragCoord";
|
return "FragCoord";
|
||||||
case Attribute::IsFrontFace:
|
case Attribute::IsFrontFace:
|
||||||
@ -114,6 +116,16 @@ std::string NameOf(Attribute attribute) {
|
|||||||
return "LocalInvocationId";
|
return "LocalInvocationId";
|
||||||
case Attribute::LocalInvocationIndex:
|
case Attribute::LocalInvocationIndex:
|
||||||
return "LocalInvocationIndex";
|
return "LocalInvocationIndex";
|
||||||
|
case Attribute::InvocationId:
|
||||||
|
return "InvocationId";
|
||||||
|
case Attribute::PatchVertices:
|
||||||
|
return "PatchVertices";
|
||||||
|
case Attribute::TessellationEvaluationPointU:
|
||||||
|
return "TessellationEvaluationPointU";
|
||||||
|
case Attribute::TessellationEvaluationPointV:
|
||||||
|
return "TessellationEvaluationPointV";
|
||||||
|
case Attribute::PackedHullInvocationInfo:
|
||||||
|
return "PackedHullInvocationInfo";
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -74,6 +74,11 @@ enum class Attribute : u64 {
|
|||||||
FragCoord = 77,
|
FragCoord = 77,
|
||||||
InstanceId0 = 78, // step rate 0
|
InstanceId0 = 78, // step rate 0
|
||||||
InstanceId1 = 79, // step rate 1
|
InstanceId1 = 79, // step rate 1
|
||||||
|
InvocationId = 80, // TCS id in output patch and instanced geometry shader id
|
||||||
|
PatchVertices = 81,
|
||||||
|
TessellationEvaluationPointU = 82,
|
||||||
|
TessellationEvaluationPointV = 83,
|
||||||
|
PackedHullInvocationInfo = 84, // contains patch id within the VGT and invocation ID
|
||||||
Max,
|
Max,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -85,6 +90,11 @@ constexpr bool IsPosition(Attribute attribute) noexcept {
|
|||||||
return attribute >= Attribute::Position0 && attribute <= Attribute::Position3;
|
return attribute >= Attribute::Position0 && attribute <= Attribute::Position3;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr bool IsTessCoord(Attribute attribute) noexcept {
|
||||||
|
return attribute >= Attribute::TessellationEvaluationPointU &&
|
||||||
|
attribute <= Attribute::TessellationEvaluationPointV;
|
||||||
|
}
|
||||||
|
|
||||||
constexpr bool IsParam(Attribute attribute) noexcept {
|
constexpr bool IsParam(Attribute attribute) noexcept {
|
||||||
return attribute >= Attribute::Param0 && attribute <= Attribute::Param31;
|
return attribute >= Attribute::Param0 && attribute <= Attribute::Param31;
|
||||||
}
|
}
|
||||||
|
@ -94,6 +94,8 @@ static std::string ArgToIndex(std::map<const Inst*, size_t>& inst_to_index, size
|
|||||||
return fmt::format("{}", arg.VectorReg());
|
return fmt::format("{}", arg.VectorReg());
|
||||||
case Type::Attribute:
|
case Type::Attribute:
|
||||||
return fmt::format("{}", arg.Attribute());
|
return fmt::format("{}", arg.Attribute());
|
||||||
|
case Type::Patch:
|
||||||
|
return fmt::format("{}", arg.Patch());
|
||||||
default:
|
default:
|
||||||
return "<unknown immediate type>";
|
return "<unknown immediate type>";
|
||||||
}
|
}
|
||||||
|
@ -266,8 +266,8 @@ void IREmitter::SetM0(const U32& value) {
|
|||||||
Inst(Opcode::SetM0, value);
|
Inst(Opcode::SetM0, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, u32 index) {
|
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, IR::Value index) {
|
||||||
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), Imm32(index));
|
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), index);
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) {
|
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) {
|
||||||
@ -278,6 +278,24 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp
|
|||||||
Inst(Opcode::SetAttribute, attribute, value, Imm32(comp));
|
Inst(Opcode::SetAttribute, attribute, value, Imm32(comp));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
F32 IREmitter::GetTessGenericAttribute(const U32& vertex_index, const U32& attr_index,
|
||||||
|
const U32& comp_index) {
|
||||||
|
return Inst<F32>(IR::Opcode::GetTessGenericAttribute, vertex_index, attr_index, comp_index);
|
||||||
|
}
|
||||||
|
|
||||||
|
void IREmitter::SetTcsGenericAttribute(const F32& value, const U32& attr_index,
|
||||||
|
const U32& comp_index) {
|
||||||
|
Inst(Opcode::SetTcsGenericAttribute, value, attr_index, comp_index);
|
||||||
|
}
|
||||||
|
|
||||||
|
F32 IREmitter::GetPatch(Patch patch) {
|
||||||
|
return Inst<F32>(Opcode::GetPatch, patch);
|
||||||
|
}
|
||||||
|
|
||||||
|
void IREmitter::SetPatch(Patch patch, const F32& value) {
|
||||||
|
Inst(Opcode::SetPatch, patch, value);
|
||||||
|
}
|
||||||
|
|
||||||
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
||||||
switch (bit_size) {
|
switch (bit_size) {
|
||||||
case 32:
|
case 32:
|
||||||
@ -552,6 +570,19 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Value IREmitter::CompositeConstruct(std::span<const Value> elements) {
|
||||||
|
switch (elements.size()) {
|
||||||
|
case 2:
|
||||||
|
return CompositeConstruct(elements[0], elements[1]);
|
||||||
|
case 3:
|
||||||
|
return CompositeConstruct(elements[0], elements[1], elements[2]);
|
||||||
|
case 4:
|
||||||
|
return CompositeConstruct(elements[0], elements[1], elements[2], elements[3]);
|
||||||
|
default:
|
||||||
|
UNREACHABLE_MSG("Composite construct with greater than 4 elements");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
|
Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
|
||||||
const auto read{[&](Opcode opcode, size_t limit) -> Value {
|
const auto read{[&](Opcode opcode, size_t limit) -> Value {
|
||||||
if (element >= limit) {
|
if (element >= limit) {
|
||||||
@ -692,6 +723,20 @@ F32F64 IREmitter::FPMul(const F32F64& a, const F32F64& b) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
F32F64 IREmitter::FPDiv(const F32F64& a, const F32F64& b) {
|
||||||
|
if (a.Type() != b.Type()) {
|
||||||
|
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
|
||||||
|
}
|
||||||
|
switch (a.Type()) {
|
||||||
|
case Type::F32:
|
||||||
|
return Inst<F32>(Opcode::FPDiv32, a, b);
|
||||||
|
case Type::F64:
|
||||||
|
return Inst<F64>(Opcode::FPDiv64, a, b);
|
||||||
|
default:
|
||||||
|
ThrowInvalidType(a.Type());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
F32F64 IREmitter::FPFma(const F32F64& a, const F32F64& b, const F32F64& c) {
|
F32F64 IREmitter::FPFma(const F32F64& a, const F32F64& b, const F32F64& c) {
|
||||||
if (a.Type() != b.Type() || a.Type() != c.Type()) {
|
if (a.Type() != b.Type() || a.Type() != c.Type()) {
|
||||||
UNREACHABLE_MSG("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type());
|
UNREACHABLE_MSG("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type());
|
||||||
@ -855,8 +900,37 @@ F32F64 IREmitter::FPTrunc(const F32F64& value) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
F32 IREmitter::Fract(const F32& value) {
|
F32F64 IREmitter::FPFract(const F32F64& value) {
|
||||||
return Inst<F32>(Opcode::FPFract, value);
|
switch (value.Type()) {
|
||||||
|
case Type::F32:
|
||||||
|
return Inst<F32>(Opcode::FPFract32, value);
|
||||||
|
case Type::F64:
|
||||||
|
return Inst<F64>(Opcode::FPFract64, value);
|
||||||
|
default:
|
||||||
|
ThrowInvalidType(value.Type());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
F32F64 IREmitter::FPFrexpSig(const F32F64& value) {
|
||||||
|
switch (value.Type()) {
|
||||||
|
case Type::F32:
|
||||||
|
return Inst<F32>(Opcode::FPFrexpSig32, value);
|
||||||
|
case Type::F64:
|
||||||
|
return Inst<F64>(Opcode::FPFrexpSig64, value);
|
||||||
|
default:
|
||||||
|
ThrowInvalidType(value.Type());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
U32 IREmitter::FPFrexpExp(const F32F64& value) {
|
||||||
|
switch (value.Type()) {
|
||||||
|
case Type::F32:
|
||||||
|
return Inst<U32>(Opcode::FPFrexpExp32, value);
|
||||||
|
case Type::F64:
|
||||||
|
return Inst<U32>(Opcode::FPFrexpExp64, value);
|
||||||
|
default:
|
||||||
|
ThrowInvalidType(value.Type());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
U1 IREmitter::FPEqual(const F32F64& lhs, const F32F64& rhs, bool ordered) {
|
U1 IREmitter::FPEqual(const F32F64& lhs, const F32F64& rhs, bool ordered) {
|
||||||
@ -1556,9 +1630,9 @@ Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const
|
|||||||
return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, dref);
|
return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, dref);
|
||||||
}
|
}
|
||||||
|
|
||||||
Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset,
|
Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const U32& lod,
|
||||||
const U32& lod, const U32& multisampling, TextureInstInfo info) {
|
const Value& offset, const U32& multisampling, TextureInstInfo info) {
|
||||||
return Inst(Opcode::ImageFetch, Flags{info}, handle, coords, offset, lod, multisampling);
|
return Inst(Opcode::ImageFetch, Flags{info}, handle, coords, lod, offset, multisampling);
|
||||||
}
|
}
|
||||||
|
|
||||||
Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod,
|
Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod,
|
||||||
@ -1582,13 +1656,14 @@ Value IREmitter::ImageGradient(const Value& handle, const Value& coords,
|
|||||||
offset, lod_clamp);
|
offset, lod_clamp);
|
||||||
}
|
}
|
||||||
|
|
||||||
Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) {
|
Value IREmitter::ImageRead(const Value& handle, const Value& coords, const U32& lod,
|
||||||
return Inst(Opcode::ImageRead, Flags{info}, handle, coords);
|
TextureInstInfo info) {
|
||||||
|
return Inst(Opcode::ImageRead, Flags{info}, handle, coords, lod);
|
||||||
}
|
}
|
||||||
|
|
||||||
void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color,
|
void IREmitter::ImageWrite(const Value& handle, const Value& coords, const U32& lod,
|
||||||
TextureInstInfo info) {
|
const Value& color, TextureInstInfo info) {
|
||||||
Inst(Opcode::ImageWrite, Flags{info}, handle, coords, color);
|
Inst(Opcode::ImageWrite, Flags{info}, handle, coords, lod, color);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Debug print maps to SPIRV's NonSemantic DebugPrintf instruction
|
// Debug print maps to SPIRV's NonSemantic DebugPrintf instruction
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include "shader_recompiler/ir/attribute.h"
|
#include "shader_recompiler/ir/attribute.h"
|
||||||
#include "shader_recompiler/ir/basic_block.h"
|
#include "shader_recompiler/ir/basic_block.h"
|
||||||
#include "shader_recompiler/ir/condition.h"
|
#include "shader_recompiler/ir/condition.h"
|
||||||
|
#include "shader_recompiler/ir/patch.h"
|
||||||
#include "shader_recompiler/ir/value.h"
|
#include "shader_recompiler/ir/value.h"
|
||||||
|
|
||||||
namespace Shader::IR {
|
namespace Shader::IR {
|
||||||
@ -80,10 +81,18 @@ public:
|
|||||||
|
|
||||||
[[nodiscard]] U1 Condition(IR::Condition cond);
|
[[nodiscard]] U1 Condition(IR::Condition cond);
|
||||||
|
|
||||||
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, u32 index = 0);
|
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0,
|
||||||
|
IR::Value index = IR::Value(u32(0u)));
|
||||||
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
|
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
|
||||||
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
|
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
|
||||||
|
|
||||||
|
[[nodiscard]] F32 GetTessGenericAttribute(const U32& vertex_index, const U32& attr_index,
|
||||||
|
const U32& comp_index);
|
||||||
|
void SetTcsGenericAttribute(const F32& value, const U32& attr_index, const U32& comp_index);
|
||||||
|
|
||||||
|
[[nodiscard]] F32 GetPatch(Patch patch);
|
||||||
|
void SetPatch(Patch patch, const F32& value);
|
||||||
|
|
||||||
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
|
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
|
||||||
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
||||||
|
|
||||||
@ -138,6 +147,8 @@ public:
|
|||||||
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
|
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
|
||||||
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
|
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
|
||||||
const Value& e4);
|
const Value& e4);
|
||||||
|
[[nodiscard]] Value CompositeConstruct(std::span<const Value> values);
|
||||||
|
|
||||||
[[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
|
[[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
|
||||||
[[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);
|
[[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);
|
||||||
|
|
||||||
@ -158,6 +169,7 @@ public:
|
|||||||
[[nodiscard]] F32F64 FPAdd(const F32F64& a, const F32F64& b);
|
[[nodiscard]] F32F64 FPAdd(const F32F64& a, const F32F64& b);
|
||||||
[[nodiscard]] F32F64 FPSub(const F32F64& a, const F32F64& b);
|
[[nodiscard]] F32F64 FPSub(const F32F64& a, const F32F64& b);
|
||||||
[[nodiscard]] F32F64 FPMul(const F32F64& a, const F32F64& b);
|
[[nodiscard]] F32F64 FPMul(const F32F64& a, const F32F64& b);
|
||||||
|
[[nodiscard]] F32F64 FPDiv(const F32F64& a, const F32F64& b);
|
||||||
[[nodiscard]] F32F64 FPFma(const F32F64& a, const F32F64& b, const F32F64& c);
|
[[nodiscard]] F32F64 FPFma(const F32F64& a, const F32F64& b, const F32F64& c);
|
||||||
|
|
||||||
[[nodiscard]] F32F64 FPAbs(const F32F64& value);
|
[[nodiscard]] F32F64 FPAbs(const F32F64& value);
|
||||||
@ -179,7 +191,9 @@ public:
|
|||||||
[[nodiscard]] F32F64 FPFloor(const F32F64& value);
|
[[nodiscard]] F32F64 FPFloor(const F32F64& value);
|
||||||
[[nodiscard]] F32F64 FPCeil(const F32F64& value);
|
[[nodiscard]] F32F64 FPCeil(const F32F64& value);
|
||||||
[[nodiscard]] F32F64 FPTrunc(const F32F64& value);
|
[[nodiscard]] F32F64 FPTrunc(const F32F64& value);
|
||||||
[[nodiscard]] F32 Fract(const F32& value);
|
[[nodiscard]] F32F64 FPFract(const F32F64& value);
|
||||||
|
[[nodiscard]] F32F64 FPFrexpSig(const F32F64& value);
|
||||||
|
[[nodiscard]] U32 FPFrexpExp(const F32F64& value);
|
||||||
|
|
||||||
[[nodiscard]] U1 FPEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
|
[[nodiscard]] U1 FPEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
|
||||||
[[nodiscard]] U1 FPNotEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
|
[[nodiscard]] U1 FPNotEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
|
||||||
@ -311,14 +325,16 @@ public:
|
|||||||
TextureInstInfo info);
|
TextureInstInfo info);
|
||||||
[[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords,
|
[[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords,
|
||||||
const Value& offset, const F32& dref, TextureInstInfo info);
|
const Value& offset, const F32& dref, TextureInstInfo info);
|
||||||
[[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset,
|
[[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const U32& lod,
|
||||||
const U32& lod, const U32& multisampling, TextureInstInfo info);
|
const Value& offset, const U32& multisampling,
|
||||||
|
TextureInstInfo info);
|
||||||
[[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords,
|
[[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords,
|
||||||
const Value& derivatives_dx, const Value& derivatives_dy,
|
const Value& derivatives_dx, const Value& derivatives_dy,
|
||||||
const Value& offset, const F32& lod_clamp,
|
const Value& offset, const F32& lod_clamp,
|
||||||
TextureInstInfo info);
|
TextureInstInfo info);
|
||||||
[[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info);
|
[[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, const U32& lod,
|
||||||
void ImageWrite(const Value& handle, const Value& coords, const Value& color,
|
TextureInstInfo info);
|
||||||
|
void ImageWrite(const Value& handle, const Value& coords, const U32& lod, const Value& color,
|
||||||
TextureInstInfo info);
|
TextureInstInfo info);
|
||||||
|
|
||||||
void EmitVertex();
|
void EmitVertex();
|
||||||
@ -330,6 +346,7 @@ private:
|
|||||||
template <typename T = Value, typename... Args>
|
template <typename T = Value, typename... Args>
|
||||||
T Inst(Opcode op, Args... args) {
|
T Inst(Opcode op, Args... args) {
|
||||||
auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})};
|
auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})};
|
||||||
|
it->SetParent(block);
|
||||||
return T{Value{&*it}};
|
return T{Value{&*it}};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -347,6 +364,7 @@ private:
|
|||||||
u32 raw_flags{};
|
u32 raw_flags{};
|
||||||
std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy));
|
std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy));
|
||||||
auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
|
auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
|
||||||
|
it->SetParent(block);
|
||||||
return T{Value{&*it}};
|
return T{Value{&*it}};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -52,6 +52,8 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
|||||||
case Opcode::Discard:
|
case Opcode::Discard:
|
||||||
case Opcode::DiscardCond:
|
case Opcode::DiscardCond:
|
||||||
case Opcode::SetAttribute:
|
case Opcode::SetAttribute:
|
||||||
|
case Opcode::SetTcsGenericAttribute:
|
||||||
|
case Opcode::SetPatch:
|
||||||
case Opcode::StoreBufferU32:
|
case Opcode::StoreBufferU32:
|
||||||
case Opcode::StoreBufferU32x2:
|
case Opcode::StoreBufferU32x2:
|
||||||
case Opcode::StoreBufferU32x3:
|
case Opcode::StoreBufferU32x3:
|
||||||
|
@ -30,7 +30,7 @@ constexpr Type Opaque{Type::Opaque};
|
|||||||
constexpr Type ScalarReg{Type::ScalarReg};
|
constexpr Type ScalarReg{Type::ScalarReg};
|
||||||
constexpr Type VectorReg{Type::VectorReg};
|
constexpr Type VectorReg{Type::VectorReg};
|
||||||
constexpr Type Attribute{Type::Attribute};
|
constexpr Type Attribute{Type::Attribute};
|
||||||
constexpr Type SystemValue{Type::SystemValue};
|
constexpr Type Patch{Type::Patch};
|
||||||
constexpr Type U1{Type::U1};
|
constexpr Type U1{Type::U1};
|
||||||
constexpr Type U8{Type::U8};
|
constexpr Type U8{Type::U8};
|
||||||
constexpr Type U16{Type::U16};
|
constexpr Type U16{Type::U16};
|
||||||
|
@ -60,6 +60,10 @@ OPCODE(SetGotoVariable, Void, U32,
|
|||||||
OPCODE(GetAttribute, F32, Attribute, U32, U32, )
|
OPCODE(GetAttribute, F32, Attribute, U32, U32, )
|
||||||
OPCODE(GetAttributeU32, U32, Attribute, U32, )
|
OPCODE(GetAttributeU32, U32, Attribute, U32, )
|
||||||
OPCODE(SetAttribute, Void, Attribute, F32, U32, )
|
OPCODE(SetAttribute, Void, Attribute, F32, U32, )
|
||||||
|
OPCODE(GetPatch, F32, Patch, )
|
||||||
|
OPCODE(SetPatch, Void, Patch, F32, )
|
||||||
|
OPCODE(GetTessGenericAttribute, F32, U32, U32, U32, )
|
||||||
|
OPCODE(SetTcsGenericAttribute, Void, F32, U32, U32, )
|
||||||
|
|
||||||
// Flags
|
// Flags
|
||||||
OPCODE(GetScc, U1, Void, )
|
OPCODE(GetScc, U1, Void, )
|
||||||
@ -184,6 +188,8 @@ OPCODE(FPMin32, F32, F32,
|
|||||||
OPCODE(FPMin64, F64, F64, F64, )
|
OPCODE(FPMin64, F64, F64, F64, )
|
||||||
OPCODE(FPMul32, F32, F32, F32, )
|
OPCODE(FPMul32, F32, F32, F32, )
|
||||||
OPCODE(FPMul64, F64, F64, F64, )
|
OPCODE(FPMul64, F64, F64, F64, )
|
||||||
|
OPCODE(FPDiv32, F32, F32, F32, )
|
||||||
|
OPCODE(FPDiv64, F64, F64, F64, )
|
||||||
OPCODE(FPNeg32, F32, F32, )
|
OPCODE(FPNeg32, F32, F32, )
|
||||||
OPCODE(FPNeg64, F64, F64, )
|
OPCODE(FPNeg64, F64, F64, )
|
||||||
OPCODE(FPRecip32, F32, F32, )
|
OPCODE(FPRecip32, F32, F32, )
|
||||||
@ -208,7 +214,12 @@ OPCODE(FPCeil32, F32, F32,
|
|||||||
OPCODE(FPCeil64, F64, F64, )
|
OPCODE(FPCeil64, F64, F64, )
|
||||||
OPCODE(FPTrunc32, F32, F32, )
|
OPCODE(FPTrunc32, F32, F32, )
|
||||||
OPCODE(FPTrunc64, F64, F64, )
|
OPCODE(FPTrunc64, F64, F64, )
|
||||||
OPCODE(FPFract, F32, F32, )
|
OPCODE(FPFract32, F32, F32, )
|
||||||
|
OPCODE(FPFract64, F64, F64, )
|
||||||
|
OPCODE(FPFrexpSig32, F32, F32, )
|
||||||
|
OPCODE(FPFrexpSig64, F64, F64, )
|
||||||
|
OPCODE(FPFrexpExp32, U32, F32, )
|
||||||
|
OPCODE(FPFrexpExp64, U32, F64, )
|
||||||
|
|
||||||
OPCODE(FPOrdEqual32, U1, F32, F32, )
|
OPCODE(FPOrdEqual32, U1, F32, F32, )
|
||||||
OPCODE(FPOrdEqual64, U1, F64, F64, )
|
OPCODE(FPOrdEqual64, U1, F64, F64, )
|
||||||
@ -327,12 +338,12 @@ OPCODE(ImageSampleDrefImplicitLod, F32x4, Opaq
|
|||||||
OPCODE(ImageSampleDrefExplicitLod, F32x4, Opaque, Opaque, F32, F32, Opaque, )
|
OPCODE(ImageSampleDrefExplicitLod, F32x4, Opaque, Opaque, F32, F32, Opaque, )
|
||||||
OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, )
|
OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, )
|
||||||
OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, F32, )
|
OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, F32, )
|
||||||
OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, )
|
OPCODE(ImageFetch, F32x4, Opaque, Opaque, U32, Opaque, Opaque, )
|
||||||
OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, )
|
OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, )
|
||||||
OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, )
|
OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, )
|
||||||
OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, F32, )
|
OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, F32, )
|
||||||
OPCODE(ImageRead, U32x4, Opaque, Opaque, )
|
OPCODE(ImageRead, U32x4, Opaque, Opaque, U32, )
|
||||||
OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
|
OPCODE(ImageWrite, Void, Opaque, Opaque, U32, U32x4, )
|
||||||
|
|
||||||
// Image atomic operations
|
// Image atomic operations
|
||||||
OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, )
|
OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, )
|
||||||
|
@ -216,6 +216,18 @@ void FoldAdd(IR::Block& block, IR::Inst& inst) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void FoldMul(IR::Block& block, IR::Inst& inst) {
|
||||||
|
if (!FoldCommutative<T>(inst, [](T a, T b) { return a * b; })) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const IR::Value rhs{inst.Arg(1)};
|
||||||
|
if (rhs.IsImmediate() && Arg<T>(rhs) == 0) {
|
||||||
|
inst.ReplaceUsesWithAndRemove(IR::Value(0u));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void FoldCmpClass(IR::Block& block, IR::Inst& inst) {
|
void FoldCmpClass(IR::Block& block, IR::Inst& inst) {
|
||||||
ASSERT_MSG(inst.Arg(1).IsImmediate(), "Unable to resolve compare operation");
|
ASSERT_MSG(inst.Arg(1).IsImmediate(), "Unable to resolve compare operation");
|
||||||
const auto class_mask = static_cast<IR::FloatClassFunc>(inst.Arg(1).U32());
|
const auto class_mask = static_cast<IR::FloatClassFunc>(inst.Arg(1).U32());
|
||||||
@ -292,7 +304,19 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||||||
FoldWhenAllImmediates(inst, [](u32 a) { return static_cast<float>(a); });
|
FoldWhenAllImmediates(inst, [](u32 a) { return static_cast<float>(a); });
|
||||||
return;
|
return;
|
||||||
case IR::Opcode::IMul32:
|
case IR::Opcode::IMul32:
|
||||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
|
FoldMul<u32>(block, inst);
|
||||||
|
return;
|
||||||
|
case IR::Opcode::UDiv32:
|
||||||
|
FoldWhenAllImmediates(inst, [](u32 a, u32 b) {
|
||||||
|
ASSERT_MSG(b != 0, "Folding UDiv32 with divisor 0");
|
||||||
|
return a / b;
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
case IR::Opcode::UMod32:
|
||||||
|
FoldWhenAllImmediates(inst, [](u32 a, u32 b) {
|
||||||
|
ASSERT_MSG(b != 0, "Folding UMod32 with modulo 0");
|
||||||
|
return a % b;
|
||||||
|
});
|
||||||
return;
|
return;
|
||||||
case IR::Opcode::FPCmpClass32:
|
case IR::Opcode::FPCmpClass32:
|
||||||
FoldCmpClass(block, inst);
|
FoldCmpClass(block, inst);
|
||||||
|
4
src/shader_recompiler/ir/passes/constant_propogation.h
Normal file
4
src/shader_recompiler/ir/passes/constant_propogation.h
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
746
src/shader_recompiler/ir/passes/hull_shader_transform.cpp
Normal file
746
src/shader_recompiler/ir/passes/hull_shader_transform.cpp
Normal file
@ -0,0 +1,746 @@
|
|||||||
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "shader_recompiler/info.h"
|
||||||
|
#include "shader_recompiler/ir/attribute.h"
|
||||||
|
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||||
|
#include "shader_recompiler/ir/ir_emitter.h"
|
||||||
|
#include "shader_recompiler/ir/opcodes.h"
|
||||||
|
#include "shader_recompiler/ir/pattern_matching.h"
|
||||||
|
#include "shader_recompiler/ir/program.h"
|
||||||
|
#include "shader_recompiler/runtime_info.h"
|
||||||
|
|
||||||
|
namespace Shader::Optimization {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tessellation shaders pass outputs to the next shader using LDS.
|
||||||
|
* The Hull shader stage receives input control points stored in LDS.
|
||||||
|
*
|
||||||
|
* These passes attempt to resolve LDS accesses to attribute accesses and correctly
|
||||||
|
* write to the tessellation factor tables.
|
||||||
|
*
|
||||||
|
* The LDS layout is:
|
||||||
|
* - TCS inputs for patch 0
|
||||||
|
* - TCS inputs for patch 1
|
||||||
|
* - TCS inputs for patch 2
|
||||||
|
* - ...
|
||||||
|
* - TCS outputs for patch 0
|
||||||
|
* - TCS outputs for patch 1
|
||||||
|
* - TCS outputs for patch 2
|
||||||
|
* - ...
|
||||||
|
* - PatchConst TCS outputs for patch 0
|
||||||
|
* - PatchConst TCS outputs for patch 1
|
||||||
|
* - PatchConst TCS outputs for patch 2
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* If the Hull stage does not write any new control points the driver will
|
||||||
|
* optimize LDS layout so input and output control point spaces overlap.
|
||||||
|
* (Passthrough)
|
||||||
|
*
|
||||||
|
* The gnm driver requires a V# holding special constants to be bound
|
||||||
|
* for reads by the shader.
|
||||||
|
* The Hull and Domain shaders read values from this buffer which
|
||||||
|
* contain size and offset information required to address input, output,
|
||||||
|
* or PatchConst attributes within the current patch.
|
||||||
|
* See the TessellationDataConstantBuffer struct to see the layout of this V#.
|
||||||
|
*
|
||||||
|
* Tessellation factors are stored to a special tessellation factor V# that is automatically bound
|
||||||
|
* by the driver. This is the input to the fixed function tessellator that actually subdivides the
|
||||||
|
* domain. We translate these to writes to SPIR-V builtins for tessellation factors in the Hull
|
||||||
|
* shader.
|
||||||
|
* The offset into the tess factor buffer determines which factor the shader is writing.
|
||||||
|
* Additionally, most hull shaders seem to redundantly write tess factors to PatchConst
|
||||||
|
* attributes, even if dead in the domain shader. We just treat these as generic PatchConst writes.
|
||||||
|
*
|
||||||
|
* LDS reads in the Hull shader can be from input control points, and in the the Domain shader can
|
||||||
|
* be hs output control points (output from the perspective of the Hull shader) and patchconst
|
||||||
|
* values.
|
||||||
|
* LDS stores in the Hull shader can either be output control point writes or per-patch
|
||||||
|
* (PatchConst) data writes. The Domain shader exports attributes using EXP instructions, unless its
|
||||||
|
* followed by the geometry stage (but we havent seen this yet), so nothing special there.
|
||||||
|
* The address calculations can vary significantly and can't be easily pattern matched. We are at
|
||||||
|
* the mercy of instruction selection the ps4 compiler wanted to use.
|
||||||
|
* Generally though, they could look something like this:
|
||||||
|
* Input control point:
|
||||||
|
* addr = PatchIdInVgt * input_cp_stride * #input_cp_per_patch + index * input_cp_stride
|
||||||
|
* + attr# * 16 + component
|
||||||
|
* Output control point:
|
||||||
|
* addr = #patches * input_cp_stride * #input_cp_per_patch
|
||||||
|
* + PatchIdInVgt * output_patch_stride + InvocationID * output_cp_stride
|
||||||
|
+ attr# * 16 + component
|
||||||
|
* Per patch output:
|
||||||
|
* addr = #patches * input_cp_stride * #cp_per_input_patch
|
||||||
|
* + #patches * output_patch_stride
|
||||||
|
* + PatchIdInVgt * per_patch_output_stride + attr# * 16 + component
|
||||||
|
*
|
||||||
|
* output_patch_stride and output_cp_stride are usually compile time constants in the gcn
|
||||||
|
*
|
||||||
|
* Hull shaders can probably also read output control points corresponding to other threads, like
|
||||||
|
* shared memory (but we havent seen this yet).
|
||||||
|
* ^ This is an UNREACHABLE for now. We may need to insert additional barriers if this happens.
|
||||||
|
* They should also be able to read PatchConst values,
|
||||||
|
* although not sure if this happens in practice.
|
||||||
|
*
|
||||||
|
* To determine which type of attribute (input, output, patchconst) we the check the users of
|
||||||
|
* TessConstants V# reads to deduce which type of attribute a given load/store to LDS
|
||||||
|
* is touching.
|
||||||
|
*
|
||||||
|
* In the Hull shader, both the PatchId within the VGT group (PatchIdInVgt) and the output control
|
||||||
|
* point id (InvocationId) are packed in VGPR1 by the driver like
|
||||||
|
* V1 = InvocationId << 8 | PatchIdInVgt
|
||||||
|
* The shader typically uses V_BFE_(U|S)32 to extract them. We use the starting bit_pos to determine
|
||||||
|
* which is which.
|
||||||
|
*
|
||||||
|
* This pass does not attempt to deduce the exact attribute referenced in a LDS load/store.
|
||||||
|
* Instead, it feeds the address in the LDS load/store to the get/set Insts we use for TCS in/out's,
|
||||||
|
* TES in's, and PatchConst in/out's.
|
||||||
|
*
|
||||||
|
* TCS/TES Input attributes:
|
||||||
|
* We define input attributes using an array in the shader roughly like this:
|
||||||
|
* // equivalent GLSL in TCS
|
||||||
|
* layout (location = 0) in vec4 in_attrs[][NUM_INPUT_ATTRIBUTES];
|
||||||
|
*
|
||||||
|
* Here the NUM_INPUT_ATTRIBUTES is derived from the ls_stride member of the TessConstants V#.
|
||||||
|
* We divide ls_stride (in bytes) by 16 to get the number of vec4 attributes.
|
||||||
|
* For TES, the number of attributes comes from hs_cp_stride / 16.
|
||||||
|
* The first (outer) dimension is unsized but corresponds to the number of vertices in the hs input
|
||||||
|
* patch (for Hull) or the hs output patch (for Domain).
|
||||||
|
*
|
||||||
|
* For input reads in TCS or TES, we emit SPIR-V like:
|
||||||
|
* float value = in_attrs[addr / ls_stride][(addr % ls_stride) >> 4][(addr & 0xF) >> 2];
|
||||||
|
*
|
||||||
|
* For output writes, we assume the control point index is InvocationId, since high level languages
|
||||||
|
* impose that restriction (although maybe it's technically possible on hardware). So SPIR-V looks
|
||||||
|
* like this:
|
||||||
|
* layout (location = 0) in vec4 in_attrs[][NUM_OUTPUT_ATTRIBUTES];
|
||||||
|
* out_attrs[InvocationId][(addr % hs_cp_stride) >> 4][(addr & 0xF) >> 2] = value;
|
||||||
|
*
|
||||||
|
* NUM_OUTPUT_ATTRIBUTES is derived by hs_cp_stride / 16, so it can link with the TES in_attrs
|
||||||
|
* variable.
|
||||||
|
*
|
||||||
|
* Another challenge is the fact that the GCN shader needs to address attributes from LDS as a whole
|
||||||
|
* which contains the attributes from many patches. On the other hand, higher level shading
|
||||||
|
* languages restrict attribute access to the patch of the current thread, which is naturally a
|
||||||
|
* restriction in SPIR-V also.
|
||||||
|
* The addresses the ps4 compiler generates for loads/stores and the fact that LDS holds many
|
||||||
|
* patches' attributes are just implementation details of the ps4 driver/compiler. To deal with
|
||||||
|
* this, we can replace certain TessConstant V# reads with 0, which only contribute to the base
|
||||||
|
* address of the current patch's attributes in LDS and not the indexes within the local patch.
|
||||||
|
*
|
||||||
|
* (A perfect implementation might need emulation of the VGTs in mesh/compute, loading/storing
|
||||||
|
* attributes to buffers and not caring about whether they are hs input, hs output, or patchconst
|
||||||
|
* attributes)
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
using namespace Shader::Optimiation::PatternMatching;
|
||||||
|
|
||||||
|
static void InitTessConstants(IR::ScalarReg sharp_ptr_base, s32 sharp_dword_offset,
|
||||||
|
Shader::Info& info, Shader::RuntimeInfo& runtime_info,
|
||||||
|
TessellationDataConstantBuffer& tess_constants) {
|
||||||
|
info.tess_consts_ptr_base = sharp_ptr_base;
|
||||||
|
info.tess_consts_dword_offset = sharp_dword_offset;
|
||||||
|
info.ReadTessConstantBuffer(tess_constants);
|
||||||
|
if (info.l_stage == LogicalStage::TessellationControl) {
|
||||||
|
runtime_info.hs_info.InitFromTessConstants(tess_constants);
|
||||||
|
} else {
|
||||||
|
runtime_info.vs_info.InitFromTessConstants(tess_constants);
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct TessSharpLocation {
|
||||||
|
IR::ScalarReg ptr_base;
|
||||||
|
u32 dword_off;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::optional<TessSharpLocation> FindTessConstantSharp(IR::Inst* read_const_buffer) {
|
||||||
|
IR::Value sharp_ptr_base;
|
||||||
|
IR::Value sharp_dword_offset;
|
||||||
|
|
||||||
|
IR::Value rv = IR::Value{read_const_buffer};
|
||||||
|
IR::Value handle = read_const_buffer->Arg(0);
|
||||||
|
|
||||||
|
if (M_COMPOSITECONSTRUCTU32X4(M_GETUSERDATA(MatchImm(sharp_dword_offset)), MatchIgnore(),
|
||||||
|
MatchIgnore(), MatchIgnore())
|
||||||
|
.Match(handle)) {
|
||||||
|
return TessSharpLocation{.ptr_base = IR::ScalarReg::Max,
|
||||||
|
.dword_off = static_cast<u32>(sharp_dword_offset.ScalarReg())};
|
||||||
|
} else if (M_COMPOSITECONSTRUCTU32X4(
|
||||||
|
M_READCONST(M_COMPOSITECONSTRUCTU32X2(M_GETUSERDATA(MatchImm(sharp_ptr_base)),
|
||||||
|
MatchIgnore()),
|
||||||
|
MatchImm(sharp_dword_offset)),
|
||||||
|
MatchIgnore(), MatchIgnore(), MatchIgnore())
|
||||||
|
.Match(handle)) {
|
||||||
|
return TessSharpLocation{.ptr_base = sharp_ptr_base.ScalarReg(),
|
||||||
|
.dword_off = sharp_dword_offset.U32()};
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Walker that helps deduce what type of attribute a DS instruction is reading
|
||||||
|
// or writing, which could be an input control point, output control point,
|
||||||
|
// or per-patch constant (PatchConst).
|
||||||
|
// For certain ReadConstBuffer instructions using the tess constants V#,, we visit the users
|
||||||
|
// recursively and increment a counter on the Load/WriteShared users.
|
||||||
|
// Namely NumPatch (from m_hsNumPatch), HsOutputBase (m_hsOutputBase),
|
||||||
|
// and PatchConstBase (m_patchConstBase).
|
||||||
|
// In addr calculations, the term NumPatch * ls_stride * #input_cp_in_patch
|
||||||
|
// is used as an addend to skip the region for input control points, and similarly
|
||||||
|
// NumPatch * hs_cp_stride * #output_cp_in_patch is used to skip the region
|
||||||
|
// for output control points.
|
||||||
|
//
|
||||||
|
// TODO: this will break if AMD compiler used distributive property like
|
||||||
|
// TcsNumPatches * (ls_stride * #input_cp_in_patch + hs_cp_stride * #output_cp_in_patch)
|
||||||
|
class TessConstantUseWalker {
|
||||||
|
public:
|
||||||
|
void MarkTessAttributeUsers(IR::Inst* read_const_buffer, TessConstantAttribute attr) {
|
||||||
|
u32 inc;
|
||||||
|
switch (attr) {
|
||||||
|
case TessConstantAttribute::HsNumPatch:
|
||||||
|
case TessConstantAttribute::HsOutputBase:
|
||||||
|
inc = 1;
|
||||||
|
break;
|
||||||
|
case TessConstantAttribute::PatchConstBase:
|
||||||
|
inc = 2;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (IR::Use use : read_const_buffer->Uses()) {
|
||||||
|
MarkTessAttributeUsersHelper(use, inc);
|
||||||
|
}
|
||||||
|
|
||||||
|
++seq_num;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void MarkTessAttributeUsersHelper(IR::Use use, u32 inc) {
|
||||||
|
IR::Inst* inst = use.user;
|
||||||
|
|
||||||
|
switch (use.user->GetOpcode()) {
|
||||||
|
case IR::Opcode::LoadSharedU32:
|
||||||
|
case IR::Opcode::LoadSharedU64:
|
||||||
|
case IR::Opcode::LoadSharedU128:
|
||||||
|
case IR::Opcode::WriteSharedU32:
|
||||||
|
case IR::Opcode::WriteSharedU64:
|
||||||
|
case IR::Opcode::WriteSharedU128: {
|
||||||
|
u32 counter = inst->Flags<u32>();
|
||||||
|
inst->SetFlags<u32>(counter + inc);
|
||||||
|
// Stop here
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
case IR::Opcode::Phi: {
|
||||||
|
struct PhiCounter {
|
||||||
|
u16 seq_num;
|
||||||
|
u8 unique_edge;
|
||||||
|
u8 counter;
|
||||||
|
};
|
||||||
|
|
||||||
|
PhiCounter count = inst->Flags<PhiCounter>();
|
||||||
|
ASSERT_MSG(count.counter == 0 || count.unique_edge == use.operand);
|
||||||
|
// the point of seq_num is to tell us if we've already traversed this
|
||||||
|
// phi on the current walk. Alternatively we could keep a set of phi's
|
||||||
|
// seen on the current walk. This is to handle phi cycles
|
||||||
|
if (count.seq_num == 0) {
|
||||||
|
// First time we've encountered this phi
|
||||||
|
count.seq_num = seq_num;
|
||||||
|
// Mark the phi as having been traversed originally through this edge
|
||||||
|
count.unique_edge = use.operand;
|
||||||
|
count.counter = inc;
|
||||||
|
} else if (count.seq_num < seq_num) {
|
||||||
|
count.seq_num = seq_num;
|
||||||
|
// For now, assume we are visiting this phi via the same edge
|
||||||
|
// as on other walks. If not, some dataflow analysis might be necessary
|
||||||
|
ASSERT(count.unique_edge == use.operand);
|
||||||
|
count.counter += inc;
|
||||||
|
} else {
|
||||||
|
// count.seq_num == seq_num
|
||||||
|
// there's a cycle, and we've already been here on this walk
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
inst->SetFlags<PhiCounter>(count);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (IR::Use use : inst->Uses()) {
|
||||||
|
MarkTessAttributeUsersHelper(use, inc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 seq_num{1u};
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class AttributeRegion : u32 { InputCP, OutputCP, PatchConst };
|
||||||
|
|
||||||
|
static AttributeRegion GetAttributeRegionKind(IR::Inst* ring_access, const Shader::Info& info,
|
||||||
|
const Shader::RuntimeInfo& runtime_info) {
|
||||||
|
u32 count = ring_access->Flags<u32>();
|
||||||
|
if (count == 0) {
|
||||||
|
return AttributeRegion::InputCP;
|
||||||
|
} else if (info.l_stage == LogicalStage::TessellationControl &&
|
||||||
|
runtime_info.hs_info.IsPassthrough()) {
|
||||||
|
ASSERT(count <= 1);
|
||||||
|
return AttributeRegion::PatchConst;
|
||||||
|
} else {
|
||||||
|
ASSERT(count <= 2);
|
||||||
|
return AttributeRegion(count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool IsDivisibleByStride(IR::Value term, u32 stride) {
|
||||||
|
IR::Value a, b;
|
||||||
|
if (MatchU32(stride).Match(term)) {
|
||||||
|
return true;
|
||||||
|
} else if (M_BITFIELDUEXTRACT(MatchValue(a), MatchU32(0), MatchU32(24)).Match(term) ||
|
||||||
|
M_BITFIELDSEXTRACT(MatchValue(a), MatchU32(0), MatchU32(24)).Match(term)) {
|
||||||
|
return IsDivisibleByStride(a, stride);
|
||||||
|
} else if (M_IMUL32(MatchValue(a), MatchValue(b)).Match(term)) {
|
||||||
|
return IsDivisibleByStride(a, stride) || IsDivisibleByStride(b, stride);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return true if we can eliminate any addends
|
||||||
|
static bool TryOptimizeAddendInModulo(IR::Value addend, u32 stride, std::vector<IR::U32>& addends) {
|
||||||
|
IR::Value a, b;
|
||||||
|
if (M_IADD32(MatchValue(a), MatchValue(b)).Match(addend)) {
|
||||||
|
bool ret = false;
|
||||||
|
ret = TryOptimizeAddendInModulo(a, stride, addends);
|
||||||
|
ret |= TryOptimizeAddendInModulo(b, stride, addends);
|
||||||
|
return ret;
|
||||||
|
} else if (!IsDivisibleByStride(addend, stride)) {
|
||||||
|
addends.push_back(IR::U32{addend});
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// In calculation (a + b + ...) % stride
|
||||||
|
// Use this fact
|
||||||
|
// (a + b) mod N = (a mod N + b mod N) mod N
|
||||||
|
// If any addend is divisible by stride, then we can replace it with 0 in the attribute
|
||||||
|
// or component index calculation
|
||||||
|
static IR::U32 TryOptimizeAddressModulo(IR::U32 addr, u32 stride, IR::IREmitter& ir) {
|
||||||
|
std::vector<IR::U32> addends;
|
||||||
|
if (TryOptimizeAddendInModulo(addr, stride, addends)) {
|
||||||
|
addr = ir.Imm32(0);
|
||||||
|
for (auto& addend : addends) {
|
||||||
|
addr = ir.IAdd(addr, addend);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return addr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: can optimize div in control point index similarly to mod
|
||||||
|
|
||||||
|
// Read a TCS input (InputCP region) or TES input (OutputCP region)
|
||||||
|
static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmitter& ir,
|
||||||
|
u32 off_dw) {
|
||||||
|
if (off_dw > 0) {
|
||||||
|
addr = ir.IAdd(addr, ir.Imm32(off_dw));
|
||||||
|
}
|
||||||
|
const IR::U32 control_point_index = ir.IDiv(addr, ir.Imm32(stride));
|
||||||
|
const IR::U32 addr_for_attrs = TryOptimizeAddressModulo(addr, stride, ir);
|
||||||
|
const IR::U32 attr_index =
|
||||||
|
ir.ShiftRightLogical(ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u));
|
||||||
|
const IR::U32 comp_index =
|
||||||
|
ir.ShiftRightLogical(ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u));
|
||||||
|
return ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
|
||||||
|
const Info& info = program.info;
|
||||||
|
|
||||||
|
for (IR::Block* block : program.blocks) {
|
||||||
|
for (IR::Inst& inst : block->Instructions()) {
|
||||||
|
const auto opcode = inst.GetOpcode();
|
||||||
|
switch (opcode) {
|
||||||
|
case IR::Opcode::StoreBufferU32:
|
||||||
|
case IR::Opcode::StoreBufferU32x2:
|
||||||
|
case IR::Opcode::StoreBufferU32x3:
|
||||||
|
case IR::Opcode::StoreBufferU32x4: {
|
||||||
|
const auto info = inst.Flags<IR::BufferInstInfo>();
|
||||||
|
if (!info.globally_coherent) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
|
const auto GetValue = [&](IR::Value data) -> IR::F32 {
|
||||||
|
if (auto* inst = data.TryInstRecursive();
|
||||||
|
inst && inst->GetOpcode() == IR::Opcode::BitCastU32F32) {
|
||||||
|
return IR::F32{inst->Arg(0)};
|
||||||
|
}
|
||||||
|
return ir.BitCast<IR::F32, IR::U32>(IR::U32{data});
|
||||||
|
};
|
||||||
|
const u32 num_dwords = u32(opcode) - u32(IR::Opcode::StoreBufferU32) + 1;
|
||||||
|
IR::U32 index = IR::U32{inst.Arg(1)};
|
||||||
|
ASSERT(index.IsImmediate());
|
||||||
|
const u32 gcn_factor_idx = (info.inst_offset.Value() + index.U32()) >> 2;
|
||||||
|
|
||||||
|
const IR::Value data = inst.Arg(2);
|
||||||
|
auto get_factor_attr = [&](u32 gcn_factor_idx) -> IR::Patch {
|
||||||
|
// The hull outputs tess factors in different formats depending on the shader.
|
||||||
|
// For triangle domains, it seems to pack the entries into 4 consecutive floats,
|
||||||
|
// with the 3 edge factors followed by the 1 interior factor.
|
||||||
|
// For quads, it does 4 edge factors then 2 interior.
|
||||||
|
// There is a tess factor stride member of the GNMX hull constants struct in
|
||||||
|
// a hull program shader binary archive, but this doesn't seem to be
|
||||||
|
// communicated to the driver.
|
||||||
|
// The layout seems to be implied by the type of the abstract domain.
|
||||||
|
switch (runtime_info.hs_info.tess_type) {
|
||||||
|
case AmdGpu::TessellationType::Isoline:
|
||||||
|
ASSERT(gcn_factor_idx < 2);
|
||||||
|
return IR::PatchFactor(gcn_factor_idx);
|
||||||
|
case AmdGpu::TessellationType::Triangle:
|
||||||
|
ASSERT(gcn_factor_idx < 4);
|
||||||
|
if (gcn_factor_idx == 3) {
|
||||||
|
return IR::Patch::TessellationLodInteriorU;
|
||||||
|
}
|
||||||
|
return IR::PatchFactor(gcn_factor_idx);
|
||||||
|
case AmdGpu::TessellationType::Quad:
|
||||||
|
ASSERT(gcn_factor_idx < 6);
|
||||||
|
return IR::PatchFactor(gcn_factor_idx);
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
inst.Invalidate();
|
||||||
|
if (num_dwords == 1) {
|
||||||
|
ir.SetPatch(get_factor_attr(gcn_factor_idx), GetValue(data));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
auto* inst = data.TryInstRecursive();
|
||||||
|
ASSERT(inst && (inst->GetOpcode() == IR::Opcode::CompositeConstructU32x2 ||
|
||||||
|
inst->GetOpcode() == IR::Opcode::CompositeConstructU32x3 ||
|
||||||
|
inst->GetOpcode() == IR::Opcode::CompositeConstructU32x4));
|
||||||
|
for (s32 i = 0; i < num_dwords; i++) {
|
||||||
|
ir.SetPatch(get_factor_attr(gcn_factor_idx + i), GetValue(inst->Arg(i)));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case IR::Opcode::WriteSharedU32:
|
||||||
|
case IR::Opcode::WriteSharedU64:
|
||||||
|
case IR::Opcode::WriteSharedU128: {
|
||||||
|
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
|
const u32 num_dwords = opcode == IR::Opcode::WriteSharedU32
|
||||||
|
? 1
|
||||||
|
: (opcode == IR::Opcode::WriteSharedU64 ? 2 : 4);
|
||||||
|
const IR::U32 addr{inst.Arg(0)};
|
||||||
|
const IR::U32 data{inst.Arg(1).Resolve()};
|
||||||
|
|
||||||
|
const auto SetOutput = [&](IR::U32 addr, IR::U32 value, AttributeRegion output_kind,
|
||||||
|
u32 off_dw) {
|
||||||
|
const IR::F32 data_component = ir.BitCast<IR::F32, IR::U32>(value);
|
||||||
|
|
||||||
|
if (output_kind == AttributeRegion::OutputCP) {
|
||||||
|
if (off_dw > 0) {
|
||||||
|
addr = ir.IAdd(addr, ir.Imm32(off_dw));
|
||||||
|
}
|
||||||
|
u32 stride = runtime_info.hs_info.hs_output_cp_stride;
|
||||||
|
// Invocation ID array index is implicit, handled by SPIRV backend
|
||||||
|
const IR::U32 addr_for_attrs = TryOptimizeAddressModulo(addr, stride, ir);
|
||||||
|
const IR::U32 attr_index = ir.ShiftRightLogical(
|
||||||
|
ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u));
|
||||||
|
const IR::U32 comp_index = ir.ShiftRightLogical(
|
||||||
|
ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u));
|
||||||
|
ir.SetTcsGenericAttribute(data_component, attr_index, comp_index);
|
||||||
|
} else {
|
||||||
|
ASSERT(output_kind == AttributeRegion::PatchConst);
|
||||||
|
ASSERT_MSG(addr.IsImmediate(), "patch addr non imm, inst {}",
|
||||||
|
fmt::ptr(addr.Inst()));
|
||||||
|
ir.SetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw), data_component);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
|
||||||
|
if (num_dwords == 1) {
|
||||||
|
SetOutput(addr, data, region, 0);
|
||||||
|
} else {
|
||||||
|
for (auto i = 0; i < num_dwords; i++) {
|
||||||
|
SetOutput(addr, IR::U32{data.Inst()->Arg(i)}, region, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
inst.Invalidate();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case IR::Opcode::LoadSharedU32: {
|
||||||
|
case IR::Opcode::LoadSharedU64:
|
||||||
|
case IR::Opcode::LoadSharedU128:
|
||||||
|
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
|
const IR::U32 addr{inst.Arg(0)};
|
||||||
|
AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
|
||||||
|
const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32
|
||||||
|
? 1
|
||||||
|
: (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4);
|
||||||
|
ASSERT_MSG(region == AttributeRegion::InputCP,
|
||||||
|
"Unhandled read of output or patchconst attribute in hull shader");
|
||||||
|
IR::Value attr_read;
|
||||||
|
if (num_dwords == 1) {
|
||||||
|
attr_read = ir.BitCast<IR::U32>(
|
||||||
|
ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, 0));
|
||||||
|
} else {
|
||||||
|
boost::container::static_vector<IR::Value, 4> read_components;
|
||||||
|
for (auto i = 0; i < num_dwords; i++) {
|
||||||
|
const IR::F32 component =
|
||||||
|
ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, i);
|
||||||
|
read_components.push_back(ir.BitCast<IR::U32>(component));
|
||||||
|
}
|
||||||
|
attr_read = ir.CompositeConstruct(read_components);
|
||||||
|
}
|
||||||
|
inst.ReplaceUsesWithAndRemove(attr_read);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (runtime_info.hs_info.IsPassthrough()) {
|
||||||
|
// Copy input attributes to output attributes, indexed by InvocationID
|
||||||
|
// Passthrough should imply that input and output patches have same number of vertices
|
||||||
|
IR::Block* entry_block = *program.blocks.begin();
|
||||||
|
auto it = std::ranges::find_if(entry_block->Instructions(), [](IR::Inst& inst) {
|
||||||
|
return inst.GetOpcode() == IR::Opcode::Prologue;
|
||||||
|
});
|
||||||
|
ASSERT(it != entry_block->end());
|
||||||
|
++it;
|
||||||
|
ASSERT(it != entry_block->end());
|
||||||
|
++it;
|
||||||
|
// Prologue
|
||||||
|
// SetExec #true
|
||||||
|
// <- insert here
|
||||||
|
// ...
|
||||||
|
IR::IREmitter ir{*entry_block, it};
|
||||||
|
|
||||||
|
ASSERT(runtime_info.hs_info.ls_stride % 16 == 0);
|
||||||
|
u32 num_attributes = runtime_info.hs_info.ls_stride / 16;
|
||||||
|
const auto invocation_id = ir.GetAttributeU32(IR::Attribute::InvocationId);
|
||||||
|
for (u32 attr_no = 0; attr_no < num_attributes; attr_no++) {
|
||||||
|
for (u32 comp = 0; comp < 4; comp++) {
|
||||||
|
IR::F32 attr_read =
|
||||||
|
ir.GetTessGenericAttribute(invocation_id, ir.Imm32(attr_no), ir.Imm32(comp));
|
||||||
|
// InvocationId is implicit index for output control point writes
|
||||||
|
ir.SetTcsGenericAttribute(attr_read, ir.Imm32(attr_no), ir.Imm32(comp));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// We could wrap the rest of the program in an if stmt
|
||||||
|
// CopyInputAttrsToOutputs(); // psuedocode
|
||||||
|
// if (InvocationId == 0) {
|
||||||
|
// PatchConstFunction();
|
||||||
|
// }
|
||||||
|
// But as long as we treat invocation ID as 0 for all threads, shouldn't matter functionally
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
|
||||||
|
Info& info = program.info;
|
||||||
|
|
||||||
|
for (IR::Block* block : program.blocks) {
|
||||||
|
for (IR::Inst& inst : block->Instructions()) {
|
||||||
|
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
|
const auto opcode = inst.GetOpcode();
|
||||||
|
switch (inst.GetOpcode()) {
|
||||||
|
case IR::Opcode::LoadSharedU32: {
|
||||||
|
case IR::Opcode::LoadSharedU64:
|
||||||
|
case IR::Opcode::LoadSharedU128:
|
||||||
|
const IR::U32 addr{inst.Arg(0)};
|
||||||
|
AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
|
||||||
|
const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32
|
||||||
|
? 1
|
||||||
|
: (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4);
|
||||||
|
const auto GetInput = [&](IR::U32 addr, u32 off_dw) -> IR::F32 {
|
||||||
|
if (region == AttributeRegion::OutputCP) {
|
||||||
|
return ReadTessInputComponent(
|
||||||
|
addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw);
|
||||||
|
} else {
|
||||||
|
ASSERT(region == AttributeRegion::PatchConst);
|
||||||
|
return ir.GetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
IR::Value attr_read;
|
||||||
|
if (num_dwords == 1) {
|
||||||
|
attr_read = ir.BitCast<IR::U32>(GetInput(addr, 0));
|
||||||
|
} else {
|
||||||
|
boost::container::static_vector<IR::Value, 4> read_components;
|
||||||
|
for (auto i = 0; i < num_dwords; i++) {
|
||||||
|
const IR::F32 component = GetInput(addr, i);
|
||||||
|
read_components.push_back(ir.BitCast<IR::U32>(component));
|
||||||
|
}
|
||||||
|
attr_read = ir.CompositeConstruct(read_components);
|
||||||
|
}
|
||||||
|
inst.ReplaceUsesWithAndRemove(attr_read);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run before either hull or domain transform
|
||||||
|
void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info) {
|
||||||
|
TessellationDataConstantBuffer tess_constants;
|
||||||
|
Shader::Info& info = program.info;
|
||||||
|
// Find the TessellationDataConstantBuffer V#
|
||||||
|
for (IR::Block* block : program.blocks) {
|
||||||
|
for (IR::Inst& inst : block->Instructions()) {
|
||||||
|
auto found_tess_consts_sharp = [&]() -> bool {
|
||||||
|
switch (inst.GetOpcode()) {
|
||||||
|
case IR::Opcode::LoadSharedU32:
|
||||||
|
case IR::Opcode::LoadSharedU64:
|
||||||
|
case IR::Opcode::LoadSharedU128:
|
||||||
|
case IR::Opcode::WriteSharedU32:
|
||||||
|
case IR::Opcode::WriteSharedU64:
|
||||||
|
case IR::Opcode::WriteSharedU128: {
|
||||||
|
IR::Value addr = inst.Arg(0);
|
||||||
|
auto read_const_buffer = IR::BreadthFirstSearch(
|
||||||
|
addr, [](IR::Inst* maybe_tess_const) -> std::optional<IR::Inst*> {
|
||||||
|
if (maybe_tess_const->GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||||
|
return maybe_tess_const;
|
||||||
|
}
|
||||||
|
return std::nullopt;
|
||||||
|
});
|
||||||
|
if (read_const_buffer) {
|
||||||
|
auto sharp_location = FindTessConstantSharp(read_const_buffer.value());
|
||||||
|
if (sharp_location) {
|
||||||
|
if (info.tess_consts_dword_offset >= 0) {
|
||||||
|
// Its possible theres a readconstbuffer that contributes to an
|
||||||
|
// LDS address and isnt a TessConstant V# read. Could improve on
|
||||||
|
// this somehow
|
||||||
|
ASSERT_MSG(static_cast<s32>(sharp_location->dword_off) ==
|
||||||
|
info.tess_consts_dword_offset &&
|
||||||
|
sharp_location->ptr_base ==
|
||||||
|
info.tess_consts_ptr_base,
|
||||||
|
"TessConstants V# is ambiguous");
|
||||||
|
}
|
||||||
|
InitTessConstants(sharp_location->ptr_base,
|
||||||
|
static_cast<s32>(sharp_location->dword_off), info,
|
||||||
|
runtime_info, tess_constants);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
UNREACHABLE_MSG("Failed to match tess constant sharp");
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
|
||||||
|
if (found_tess_consts_sharp) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT(info.tess_consts_dword_offset >= 0);
|
||||||
|
|
||||||
|
TessConstantUseWalker walker;
|
||||||
|
|
||||||
|
for (IR::Block* block : program.blocks) {
|
||||||
|
for (IR::Inst& inst : block->Instructions()) {
|
||||||
|
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||||
|
auto sharp_location = FindTessConstantSharp(&inst);
|
||||||
|
if (sharp_location && sharp_location->ptr_base == info.tess_consts_ptr_base &&
|
||||||
|
sharp_location->dword_off == info.tess_consts_dword_offset) {
|
||||||
|
// The shader is reading from the TessConstants V#
|
||||||
|
IR::Value index = inst.Arg(1);
|
||||||
|
|
||||||
|
ASSERT_MSG(index.IsImmediate(),
|
||||||
|
"Tessellation constant read with dynamic index");
|
||||||
|
u32 off_dw = index.U32();
|
||||||
|
ASSERT(off_dw <=
|
||||||
|
static_cast<u32>(TessConstantAttribute::FirstEdgeTessFactorIndex));
|
||||||
|
|
||||||
|
auto tess_const_attr = static_cast<TessConstantAttribute>(off_dw);
|
||||||
|
switch (tess_const_attr) {
|
||||||
|
case TessConstantAttribute::LsStride:
|
||||||
|
// If not, we may need to make this runtime state for TES
|
||||||
|
ASSERT(info.l_stage == LogicalStage::TessellationControl);
|
||||||
|
inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.ls_stride));
|
||||||
|
break;
|
||||||
|
case TessConstantAttribute::HsCpStride:
|
||||||
|
inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.hs_cp_stride));
|
||||||
|
break;
|
||||||
|
case TessConstantAttribute::HsNumPatch:
|
||||||
|
case TessConstantAttribute::HsOutputBase:
|
||||||
|
case TessConstantAttribute::PatchConstBase:
|
||||||
|
walker.MarkTessAttributeUsers(&inst, tess_const_attr);
|
||||||
|
// We should be able to safely set these to 0 so that indexing happens only
|
||||||
|
// within the local patch in the recompiled Vulkan shader. This assumes
|
||||||
|
// these values only contribute to address calculations for in/out
|
||||||
|
// attributes in the original gcn shader.
|
||||||
|
// See the explanation for why we set V2 to 0 when emitting the prologue.
|
||||||
|
inst.ReplaceUsesWithAndRemove(IR::Value(0u));
|
||||||
|
break;
|
||||||
|
case Shader::TessConstantAttribute::PatchConstSize:
|
||||||
|
case Shader::TessConstantAttribute::PatchOutputSize:
|
||||||
|
case Shader::TessConstantAttribute::OffChipTessellationFactorThreshold:
|
||||||
|
case Shader::TessConstantAttribute::FirstEdgeTessFactorIndex:
|
||||||
|
// May need to replace PatchConstSize and PatchOutputSize with 0
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE_MSG("Read past end of TessConstantsBuffer");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// These pattern matching are neccessary for now unless we support dynamic indexing of
|
||||||
|
// PatchConst attributes and tess factors. PatchConst should be easy, turn those into a single
|
||||||
|
// vec4 array like in/out attrs. Not sure about tess factors.
|
||||||
|
if (info.l_stage == LogicalStage::TessellationControl) {
|
||||||
|
// Replace the BFEs on V1 (packed with patch id within VGT and output cp id)
|
||||||
|
for (IR::Block* block : program.blocks) {
|
||||||
|
for (auto it = block->Instructions().begin(); it != block->Instructions().end(); it++) {
|
||||||
|
IR::Inst& inst = *it;
|
||||||
|
if (M_BITFIELDUEXTRACT(
|
||||||
|
M_GETATTRIBUTEU32(MatchAttribute(IR::Attribute::PackedHullInvocationInfo),
|
||||||
|
MatchIgnore()),
|
||||||
|
MatchU32(0), MatchU32(8))
|
||||||
|
.Match(IR::Value{&inst})) {
|
||||||
|
IR::IREmitter emit(*block, it);
|
||||||
|
// This is the patch id within the VGT, not the actual PrimitiveId
|
||||||
|
// in the draw
|
||||||
|
IR::Value replacement(0u);
|
||||||
|
inst.ReplaceUsesWithAndRemove(replacement);
|
||||||
|
} else if (M_BITFIELDUEXTRACT(
|
||||||
|
M_GETATTRIBUTEU32(
|
||||||
|
MatchAttribute(IR::Attribute::PackedHullInvocationInfo),
|
||||||
|
MatchIgnore()),
|
||||||
|
MatchU32(8), MatchU32(5))
|
||||||
|
.Match(IR::Value{&inst})) {
|
||||||
|
IR::IREmitter ir(*block, it);
|
||||||
|
IR::Value replacement;
|
||||||
|
if (runtime_info.hs_info.IsPassthrough()) {
|
||||||
|
// Deal with annoying pattern in BB where InvocationID use makes no
|
||||||
|
// sense (in addr calculation for patchconst or tess factor write)
|
||||||
|
replacement = ir.Imm32(0);
|
||||||
|
} else {
|
||||||
|
replacement = ir.GetAttributeU32(IR::Attribute::InvocationId);
|
||||||
|
}
|
||||||
|
inst.ReplaceUsesWithAndRemove(replacement);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Shader::Optimization
|
@ -6,6 +6,10 @@
|
|||||||
#include "shader_recompiler/ir/basic_block.h"
|
#include "shader_recompiler/ir/basic_block.h"
|
||||||
#include "shader_recompiler/ir/program.h"
|
#include "shader_recompiler/ir/program.h"
|
||||||
|
|
||||||
|
namespace Shader {
|
||||||
|
struct Profile;
|
||||||
|
}
|
||||||
|
|
||||||
namespace Shader::Optimization {
|
namespace Shader::Optimization {
|
||||||
|
|
||||||
void SsaRewritePass(IR::BlockList& program);
|
void SsaRewritePass(IR::BlockList& program);
|
||||||
@ -18,5 +22,9 @@ void CollectShaderInfoPass(IR::Program& program);
|
|||||||
void LowerSharedMemToRegisters(IR::Program& program);
|
void LowerSharedMemToRegisters(IR::Program& program);
|
||||||
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
|
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
|
||||||
Stage stage);
|
Stage stage);
|
||||||
|
void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info);
|
||||||
|
void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
|
||||||
|
void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
|
||||||
|
void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile);
|
||||||
|
|
||||||
} // namespace Shader::Optimization
|
} // namespace Shader::Optimization
|
||||||
|
@ -137,6 +137,35 @@ bool IsImageInstruction(const IR::Inst& inst) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
IR::Value SwizzleVector(IR::IREmitter& ir, auto sharp, IR::Value texel) {
|
||||||
|
boost::container::static_vector<IR::Value, 4> comps;
|
||||||
|
for (u32 i = 0; i < 4; i++) {
|
||||||
|
switch (sharp.GetSwizzle(i)) {
|
||||||
|
case AmdGpu::CompSwizzle::Zero:
|
||||||
|
comps.emplace_back(ir.Imm32(0.f));
|
||||||
|
break;
|
||||||
|
case AmdGpu::CompSwizzle::One:
|
||||||
|
comps.emplace_back(ir.Imm32(1.f));
|
||||||
|
break;
|
||||||
|
case AmdGpu::CompSwizzle::Red:
|
||||||
|
comps.emplace_back(ir.CompositeExtract(texel, 0));
|
||||||
|
break;
|
||||||
|
case AmdGpu::CompSwizzle::Green:
|
||||||
|
comps.emplace_back(ir.CompositeExtract(texel, 1));
|
||||||
|
break;
|
||||||
|
case AmdGpu::CompSwizzle::Blue:
|
||||||
|
comps.emplace_back(ir.CompositeExtract(texel, 2));
|
||||||
|
break;
|
||||||
|
case AmdGpu::CompSwizzle::Alpha:
|
||||||
|
comps.emplace_back(ir.CompositeExtract(texel, 3));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]);
|
||||||
|
};
|
||||||
|
|
||||||
class Descriptors {
|
class Descriptors {
|
||||||
public:
|
public:
|
||||||
explicit Descriptors(Info& info_)
|
explicit Descriptors(Info& info_)
|
||||||
@ -388,6 +417,15 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
inst.SetArg(0, ir.Imm32(binding));
|
inst.SetArg(0, ir.Imm32(binding));
|
||||||
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
|
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
|
||||||
|
|
||||||
|
// Apply dst_sel swizzle on formatted buffer instructions
|
||||||
|
if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) {
|
||||||
|
inst.SetArg(2, SwizzleVector(ir, buffer, inst.Arg(2)));
|
||||||
|
} else {
|
||||||
|
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||||
|
const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info);
|
||||||
|
inst.ReplaceUsesWith(SwizzleVector(ir, buffer, texel));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t,
|
IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t,
|
||||||
@ -420,26 +458,29 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||||||
Descriptors& descriptors, const IR::Inst* producer,
|
Descriptors& descriptors, const IR::Inst* producer,
|
||||||
const u32 image_binding, const AmdGpu::Image& image) {
|
const u32 image_binding, const AmdGpu::Image& image) {
|
||||||
// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
|
// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
|
||||||
const u32 sampler_binding = [&] {
|
const auto [sampler_binding, sampler] = [&] -> std::pair<u32, AmdGpu::Sampler> {
|
||||||
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
|
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
|
||||||
const IR::Value& handle = producer->Arg(1);
|
const IR::Value& handle = producer->Arg(1);
|
||||||
// Inline sampler resource.
|
// Inline sampler resource.
|
||||||
if (handle.IsImmediate()) {
|
if (handle.IsImmediate()) {
|
||||||
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
|
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
|
||||||
return descriptors.Add(SamplerResource{
|
const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()};
|
||||||
|
const auto binding = descriptors.Add(SamplerResource{
|
||||||
.sharp_idx = std::numeric_limits<u32>::max(),
|
.sharp_idx = std::numeric_limits<u32>::max(),
|
||||||
.inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()},
|
.inline_sampler = inline_sampler,
|
||||||
});
|
});
|
||||||
|
return {binding, inline_sampler};
|
||||||
}
|
}
|
||||||
// Normal sampler resource.
|
// Normal sampler resource.
|
||||||
const auto ssharp_handle = handle.InstRecursive();
|
const auto ssharp_handle = handle.InstRecursive();
|
||||||
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
|
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
|
||||||
const auto ssharp = TrackSharp(ssharp_ud, info);
|
const auto ssharp = TrackSharp(ssharp_ud, info);
|
||||||
return descriptors.Add(SamplerResource{
|
const auto binding = descriptors.Add(SamplerResource{
|
||||||
.sharp_idx = ssharp,
|
.sharp_idx = ssharp,
|
||||||
.associated_image = image_binding,
|
.associated_image = image_binding,
|
||||||
.disable_aniso = disable_aniso,
|
.disable_aniso = disable_aniso,
|
||||||
});
|
});
|
||||||
|
return {binding, info.ReadUdSharp<AmdGpu::Sampler>(ssharp)};
|
||||||
}();
|
}();
|
||||||
|
|
||||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
@ -539,28 +580,47 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||||||
}
|
}
|
||||||
}();
|
}();
|
||||||
|
|
||||||
|
const auto unnormalized = sampler.force_unnormalized || inst_info.is_unnormalized;
|
||||||
|
// Query dimensions of image if needed for normalization.
|
||||||
|
// We can't use the image sharp because it could be bound to a different image later.
|
||||||
|
const auto dimensions =
|
||||||
|
unnormalized ? ir.ImageQueryDimension(ir.Imm32(image_binding), ir.Imm32(0u), ir.Imm1(false))
|
||||||
|
: IR::Value{};
|
||||||
|
const auto get_coord = [&](u32 coord_idx, u32 dim_idx) -> IR::Value {
|
||||||
|
const auto coord = get_addr_reg(coord_idx);
|
||||||
|
if (unnormalized) {
|
||||||
|
// Normalize the coordinate for sampling, dividing by its corresponding dimension.
|
||||||
|
const auto dim =
|
||||||
|
ir.ConvertUToF(32, 32, IR::U32{ir.CompositeExtract(dimensions, dim_idx)});
|
||||||
|
return ir.FPDiv(coord, dim);
|
||||||
|
}
|
||||||
|
return coord;
|
||||||
|
};
|
||||||
|
|
||||||
// Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler
|
// Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler
|
||||||
const IR::Value coords = [&] -> IR::Value {
|
const IR::Value coords = [&] -> IR::Value {
|
||||||
switch (image.GetType()) {
|
switch (image.GetType()) {
|
||||||
case AmdGpu::ImageType::Color1D: // x
|
case AmdGpu::ImageType::Color1D: // x
|
||||||
addr_reg = addr_reg + 1;
|
addr_reg = addr_reg + 1;
|
||||||
return get_addr_reg(addr_reg - 1);
|
return get_coord(addr_reg - 1, 0);
|
||||||
case AmdGpu::ImageType::Color1DArray: // x, slice
|
case AmdGpu::ImageType::Color1DArray: // x, slice
|
||||||
[[fallthrough]];
|
[[fallthrough]];
|
||||||
case AmdGpu::ImageType::Color2D: // x, y
|
case AmdGpu::ImageType::Color2D: // x, y
|
||||||
addr_reg = addr_reg + 2;
|
addr_reg = addr_reg + 2;
|
||||||
return ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1));
|
return ir.CompositeConstruct(get_coord(addr_reg - 2, 0), get_coord(addr_reg - 1, 1));
|
||||||
case AmdGpu::ImageType::Color2DArray: // x, y, slice
|
case AmdGpu::ImageType::Color2DArray: // x, y, slice
|
||||||
[[fallthrough]];
|
[[fallthrough]];
|
||||||
case AmdGpu::ImageType::Color2DMsaa: // x, y, frag
|
case AmdGpu::ImageType::Color2DMsaa: // x, y, frag
|
||||||
[[fallthrough]];
|
addr_reg = addr_reg + 3;
|
||||||
|
return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
|
||||||
|
get_addr_reg(addr_reg - 1));
|
||||||
case AmdGpu::ImageType::Color3D: // x, y, z
|
case AmdGpu::ImageType::Color3D: // x, y, z
|
||||||
addr_reg = addr_reg + 3;
|
addr_reg = addr_reg + 3;
|
||||||
return ir.CompositeConstruct(get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2),
|
return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
|
||||||
get_addr_reg(addr_reg - 1));
|
get_coord(addr_reg - 1, 2));
|
||||||
case AmdGpu::ImageType::Cube: // x, y, face
|
case AmdGpu::ImageType::Cube: // x, y, face
|
||||||
addr_reg = addr_reg + 3;
|
addr_reg = addr_reg + 3;
|
||||||
return PatchCubeCoord(ir, get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2),
|
return PatchCubeCoord(ir, get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
|
||||||
get_addr_reg(addr_reg - 1), false, inst_info.is_array);
|
get_addr_reg(addr_reg - 1), false, inst_info.is_array);
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
@ -711,11 +771,17 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||||||
}();
|
}();
|
||||||
inst.SetArg(1, coords);
|
inst.SetArg(1, coords);
|
||||||
|
|
||||||
|
if (inst.GetOpcode() == IR::Opcode::ImageWrite) {
|
||||||
|
inst.SetArg(3, SwizzleVector(ir, image, inst.Arg(3)));
|
||||||
|
}
|
||||||
|
|
||||||
if (inst_info.has_lod) {
|
if (inst_info.has_lod) {
|
||||||
ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch);
|
ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch ||
|
||||||
|
inst.GetOpcode() == IR::Opcode::ImageRead ||
|
||||||
|
inst.GetOpcode() == IR::Opcode::ImageWrite);
|
||||||
ASSERT(image.GetType() != AmdGpu::ImageType::Color2DMsaa &&
|
ASSERT(image.GetType() != AmdGpu::ImageType::Color2DMsaa &&
|
||||||
image.GetType() != AmdGpu::ImageType::Color2DMsaaArray);
|
image.GetType() != AmdGpu::ImageType::Color2DMsaaArray);
|
||||||
inst.SetArg(3, arg);
|
inst.SetArg(2, arg);
|
||||||
} else if (image.GetType() == AmdGpu::ImageType::Color2DMsaa ||
|
} else if (image.GetType() == AmdGpu::ImageType::Color2DMsaa ||
|
||||||
image.GetType() == AmdGpu::ImageType::Color2DMsaaArray) {
|
image.GetType() == AmdGpu::ImageType::Color2DMsaaArray) {
|
||||||
inst.SetArg(4, arg);
|
inst.SetArg(4, arg);
|
||||||
|
@ -1,11 +1,13 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include "common/assert.h"
|
||||||
#include "shader_recompiler/ir/ir_emitter.h"
|
#include "shader_recompiler/ir/ir_emitter.h"
|
||||||
#include "shader_recompiler/ir/opcodes.h"
|
#include "shader_recompiler/ir/opcodes.h"
|
||||||
#include "shader_recompiler/ir/program.h"
|
#include "shader_recompiler/ir/program.h"
|
||||||
#include "shader_recompiler/ir/reg.h"
|
#include "shader_recompiler/ir/reg.h"
|
||||||
#include "shader_recompiler/recompiler.h"
|
#include "shader_recompiler/recompiler.h"
|
||||||
|
#include "shader_recompiler/runtime_info.h"
|
||||||
|
|
||||||
namespace Shader::Optimization {
|
namespace Shader::Optimization {
|
||||||
|
|
||||||
@ -23,12 +25,45 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
|||||||
};
|
};
|
||||||
|
|
||||||
switch (stage) {
|
switch (stage) {
|
||||||
|
case Stage::Local: {
|
||||||
|
ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
|
||||||
|
const auto opcode = inst.GetOpcode();
|
||||||
|
switch (opcode) {
|
||||||
|
case IR::Opcode::WriteSharedU64:
|
||||||
|
case IR::Opcode::WriteSharedU32: {
|
||||||
|
bool is_composite = opcode == IR::Opcode::WriteSharedU64;
|
||||||
|
u32 num_components = opcode == IR::Opcode::WriteSharedU32 ? 1 : 2;
|
||||||
|
|
||||||
|
u32 offset = 0;
|
||||||
|
const auto* addr = inst.Arg(0).InstRecursive();
|
||||||
|
if (addr->GetOpcode() == IR::Opcode::IAdd32) {
|
||||||
|
ASSERT(addr->Arg(1).IsImmediate());
|
||||||
|
offset = addr->Arg(1).U32();
|
||||||
|
}
|
||||||
|
IR::Value data = inst.Arg(1).Resolve();
|
||||||
|
for (s32 i = 0; i < num_components; i++) {
|
||||||
|
const auto attrib = IR::Attribute::Param0 + (offset / 16);
|
||||||
|
const auto comp = (offset / 4) % 4;
|
||||||
|
const IR::U32 value = IR::U32{is_composite ? data.Inst()->Arg(i) : data};
|
||||||
|
ir.SetAttribute(attrib, ir.BitCast<IR::F32, IR::U32>(value), comp);
|
||||||
|
offset += 4;
|
||||||
|
}
|
||||||
|
inst.Invalidate();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
case Stage::Export: {
|
case Stage::Export: {
|
||||||
ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
|
ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
|
||||||
const auto opcode = inst.GetOpcode();
|
const auto opcode = inst.GetOpcode();
|
||||||
switch (opcode) {
|
switch (opcode) {
|
||||||
case IR::Opcode::StoreBufferU32: {
|
case IR::Opcode::StoreBufferU32: {
|
||||||
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
|
const auto info = inst.Flags<IR::BufferInstInfo>();
|
||||||
|
if (!info.system_coherent || !info.globally_coherent) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -61,12 +96,13 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
|||||||
const auto opcode = inst.GetOpcode();
|
const auto opcode = inst.GetOpcode();
|
||||||
switch (opcode) {
|
switch (opcode) {
|
||||||
case IR::Opcode::LoadBufferU32: {
|
case IR::Opcode::LoadBufferU32: {
|
||||||
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
|
const auto info = inst.Flags<IR::BufferInstInfo>();
|
||||||
|
if (!info.system_coherent || !info.globally_coherent) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto shl_inst = inst.Arg(1).TryInstRecursive();
|
const auto shl_inst = inst.Arg(1).TryInstRecursive();
|
||||||
const auto vertex_id = shl_inst->Arg(0).Resolve().U32() >> 2;
|
const auto vertex_id = ir.Imm32(shl_inst->Arg(0).Resolve().U32() >> 2);
|
||||||
const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1);
|
const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1);
|
||||||
const auto bucket = offset.Resolve().U32() / 256u;
|
const auto bucket = offset.Resolve().U32() / 256u;
|
||||||
const auto attrib = bucket < 4 ? IR::Attribute::Position0
|
const auto attrib = bucket < 4 ? IR::Attribute::Position0
|
||||||
@ -80,7 +116,8 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case IR::Opcode::StoreBufferU32: {
|
case IR::Opcode::StoreBufferU32: {
|
||||||
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
|
const auto buffer_info = inst.Flags<IR::BufferInstInfo>();
|
||||||
|
if (!buffer_info.system_coherent || !buffer_info.globally_coherent) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,6 +17,22 @@ void Visit(Info& info, IR::Inst& inst) {
|
|||||||
case IR::Opcode::GetUserData:
|
case IR::Opcode::GetUserData:
|
||||||
info.ud_mask.Set(inst.Arg(0).ScalarReg());
|
info.ud_mask.Set(inst.Arg(0).ScalarReg());
|
||||||
break;
|
break;
|
||||||
|
case IR::Opcode::SetPatch: {
|
||||||
|
const auto patch = inst.Arg(0).Patch();
|
||||||
|
if (patch <= IR::Patch::TessellationLodBottom) {
|
||||||
|
info.stores_tess_level_outer = true;
|
||||||
|
} else if (patch <= IR::Patch::TessellationLodInteriorV) {
|
||||||
|
info.stores_tess_level_inner = true;
|
||||||
|
} else {
|
||||||
|
info.uses_patches |= 1U << IR::GenericPatchIndex(patch);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case IR::Opcode::GetPatch: {
|
||||||
|
const auto patch = inst.Arg(0).Patch();
|
||||||
|
info.uses_patches |= 1U << IR::GenericPatchIndex(patch);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case IR::Opcode::LoadSharedU32:
|
case IR::Opcode::LoadSharedU32:
|
||||||
case IR::Opcode::LoadSharedU64:
|
case IR::Opcode::LoadSharedU64:
|
||||||
case IR::Opcode::WriteSharedU32:
|
case IR::Opcode::WriteSharedU32:
|
||||||
|
@ -0,0 +1,47 @@
|
|||||||
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||||
|
#include "shader_recompiler/ir/ir_emitter.h"
|
||||||
|
#include "shader_recompiler/ir/program.h"
|
||||||
|
#include "shader_recompiler/profile.h"
|
||||||
|
|
||||||
|
namespace Shader::Optimization {
|
||||||
|
|
||||||
|
void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) {
|
||||||
|
if (!program.info.uses_shared || !profile.needs_lds_barriers) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
using Type = IR::AbstractSyntaxNode::Type;
|
||||||
|
u32 branch_depth{};
|
||||||
|
for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
|
||||||
|
if (node.type == Type::EndIf) {
|
||||||
|
--branch_depth;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (node.type != Type::If) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
u32 curr_depth = branch_depth++;
|
||||||
|
if (curr_depth != 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const IR::U1 cond = node.data.if_node.cond;
|
||||||
|
const auto insert_barrier =
|
||||||
|
IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional<bool> {
|
||||||
|
if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 &&
|
||||||
|
inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return std::nullopt;
|
||||||
|
});
|
||||||
|
if (insert_barrier) {
|
||||||
|
IR::Block* const merge = node.data.if_node.merge;
|
||||||
|
auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi);
|
||||||
|
IR::IREmitter ir{*merge, insert_point};
|
||||||
|
ir.Barrier();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Shader::Optimization
|
28
src/shader_recompiler/ir/patch.cpp
Normal file
28
src/shader_recompiler/ir/patch.cpp
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include "shader_recompiler/ir/patch.h"
|
||||||
|
|
||||||
|
namespace Shader::IR {
|
||||||
|
|
||||||
|
std::string NameOf(Patch patch) {
|
||||||
|
switch (patch) {
|
||||||
|
case Patch::TessellationLodLeft:
|
||||||
|
return "TessellationLodLeft";
|
||||||
|
case Patch::TessellationLodTop:
|
||||||
|
return "TessellationLodTop";
|
||||||
|
case Patch::TessellationLodRight:
|
||||||
|
return "TessellationLodRight";
|
||||||
|
case Patch::TessellationLodBottom:
|
||||||
|
return "TessellationLodBottom";
|
||||||
|
case Patch::TessellationLodInteriorU:
|
||||||
|
return "TessellationLodInteriorU";
|
||||||
|
case Patch::TessellationLodInteriorV:
|
||||||
|
return "TessellationLodInteriorV";
|
||||||
|
default:
|
||||||
|
const u32 index = u32(patch) - u32(Patch::Component0);
|
||||||
|
return fmt::format("Component{}", index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Shader::IR
|
173
src/shader_recompiler/ir/patch.h
Normal file
173
src/shader_recompiler/ir/patch.h
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <fmt/format.h>
|
||||||
|
#include "common/types.h"
|
||||||
|
|
||||||
|
namespace Shader::IR {
|
||||||
|
|
||||||
|
enum class Patch : u64 {
|
||||||
|
TessellationLodLeft,
|
||||||
|
TessellationLodTop,
|
||||||
|
TessellationLodRight,
|
||||||
|
TessellationLodBottom,
|
||||||
|
TessellationLodInteriorU,
|
||||||
|
TessellationLodInteriorV,
|
||||||
|
Component0,
|
||||||
|
Component1,
|
||||||
|
Component2,
|
||||||
|
Component3,
|
||||||
|
Component4,
|
||||||
|
Component5,
|
||||||
|
Component6,
|
||||||
|
Component7,
|
||||||
|
Component8,
|
||||||
|
Component9,
|
||||||
|
Component10,
|
||||||
|
Component11,
|
||||||
|
Component12,
|
||||||
|
Component13,
|
||||||
|
Component14,
|
||||||
|
Component15,
|
||||||
|
Component16,
|
||||||
|
Component17,
|
||||||
|
Component18,
|
||||||
|
Component19,
|
||||||
|
Component20,
|
||||||
|
Component21,
|
||||||
|
Component22,
|
||||||
|
Component23,
|
||||||
|
Component24,
|
||||||
|
Component25,
|
||||||
|
Component26,
|
||||||
|
Component27,
|
||||||
|
Component28,
|
||||||
|
Component29,
|
||||||
|
Component30,
|
||||||
|
Component31,
|
||||||
|
Component32,
|
||||||
|
Component33,
|
||||||
|
Component34,
|
||||||
|
Component35,
|
||||||
|
Component36,
|
||||||
|
Component37,
|
||||||
|
Component38,
|
||||||
|
Component39,
|
||||||
|
Component40,
|
||||||
|
Component41,
|
||||||
|
Component42,
|
||||||
|
Component43,
|
||||||
|
Component44,
|
||||||
|
Component45,
|
||||||
|
Component46,
|
||||||
|
Component47,
|
||||||
|
Component48,
|
||||||
|
Component49,
|
||||||
|
Component50,
|
||||||
|
Component51,
|
||||||
|
Component52,
|
||||||
|
Component53,
|
||||||
|
Component54,
|
||||||
|
Component55,
|
||||||
|
Component56,
|
||||||
|
Component57,
|
||||||
|
Component58,
|
||||||
|
Component59,
|
||||||
|
Component60,
|
||||||
|
Component61,
|
||||||
|
Component62,
|
||||||
|
Component63,
|
||||||
|
Component64,
|
||||||
|
Component65,
|
||||||
|
Component66,
|
||||||
|
Component67,
|
||||||
|
Component68,
|
||||||
|
Component69,
|
||||||
|
Component70,
|
||||||
|
Component71,
|
||||||
|
Component72,
|
||||||
|
Component73,
|
||||||
|
Component74,
|
||||||
|
Component75,
|
||||||
|
Component76,
|
||||||
|
Component77,
|
||||||
|
Component78,
|
||||||
|
Component79,
|
||||||
|
Component80,
|
||||||
|
Component81,
|
||||||
|
Component82,
|
||||||
|
Component83,
|
||||||
|
Component84,
|
||||||
|
Component85,
|
||||||
|
Component86,
|
||||||
|
Component87,
|
||||||
|
Component88,
|
||||||
|
Component89,
|
||||||
|
Component90,
|
||||||
|
Component91,
|
||||||
|
Component92,
|
||||||
|
Component93,
|
||||||
|
Component94,
|
||||||
|
Component95,
|
||||||
|
Component96,
|
||||||
|
Component97,
|
||||||
|
Component98,
|
||||||
|
Component99,
|
||||||
|
Component100,
|
||||||
|
Component101,
|
||||||
|
Component102,
|
||||||
|
Component103,
|
||||||
|
Component104,
|
||||||
|
Component105,
|
||||||
|
Component106,
|
||||||
|
Component107,
|
||||||
|
Component108,
|
||||||
|
Component109,
|
||||||
|
Component110,
|
||||||
|
Component111,
|
||||||
|
Component112,
|
||||||
|
Component113,
|
||||||
|
Component114,
|
||||||
|
Component115,
|
||||||
|
Component116,
|
||||||
|
Component117,
|
||||||
|
Component118,
|
||||||
|
Component119,
|
||||||
|
};
|
||||||
|
static_assert(static_cast<u64>(Patch::Component119) == 125);
|
||||||
|
|
||||||
|
constexpr bool IsGeneric(Patch patch) noexcept {
|
||||||
|
return patch >= Patch::Component0 && patch <= Patch::Component119;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr Patch PatchFactor(u32 index) {
|
||||||
|
return static_cast<Patch>(index);
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr Patch PatchGeneric(u32 index) {
|
||||||
|
return static_cast<Patch>(static_cast<u32>(Patch::Component0) + index);
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr u32 GenericPatchIndex(Patch patch) {
|
||||||
|
return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) / 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr u32 GenericPatchElement(Patch patch) {
|
||||||
|
return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) % 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] std::string NameOf(Patch patch);
|
||||||
|
|
||||||
|
} // namespace Shader::IR
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct fmt::formatter<Shader::IR::Patch> {
|
||||||
|
constexpr auto parse(format_parse_context& ctx) {
|
||||||
|
return ctx.begin();
|
||||||
|
}
|
||||||
|
auto format(const Shader::IR::Patch patch, format_context& ctx) const {
|
||||||
|
return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(patch));
|
||||||
|
}
|
||||||
|
};
|
127
src/shader_recompiler/ir/pattern_matching.h
Normal file
127
src/shader_recompiler/ir/pattern_matching.h
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include "shader_recompiler/ir/attribute.h"
|
||||||
|
#include "shader_recompiler/ir/value.h"
|
||||||
|
|
||||||
|
namespace Shader::Optimiation::PatternMatching {
|
||||||
|
|
||||||
|
// Attempt at pattern matching for Insts and Values
|
||||||
|
// Needs improvement, mostly a convenience
|
||||||
|
|
||||||
|
template <typename Derived>
|
||||||
|
struct MatchObject {
|
||||||
|
inline bool Match(IR::Value v) {
|
||||||
|
return static_cast<Derived*>(this)->Match(v);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MatchValue : MatchObject<MatchValue> {
|
||||||
|
MatchValue(IR::Value& return_val_) : return_val(return_val_) {}
|
||||||
|
|
||||||
|
inline bool Match(IR::Value v) {
|
||||||
|
return_val = v;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
IR::Value& return_val;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MatchIgnore : MatchObject<MatchIgnore> {
|
||||||
|
MatchIgnore() {}
|
||||||
|
|
||||||
|
inline bool Match(IR::Value v) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MatchImm : MatchObject<MatchImm> {
|
||||||
|
MatchImm(IR::Value& v) : return_val(v) {}
|
||||||
|
|
||||||
|
inline bool Match(IR::Value v) {
|
||||||
|
if (!v.IsImmediate()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return_val = v;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
IR::Value& return_val;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MatchAttribute : MatchObject<MatchAttribute> {
|
||||||
|
MatchAttribute(IR::Attribute attribute_) : attribute(attribute_) {}
|
||||||
|
|
||||||
|
inline bool Match(IR::Value v) {
|
||||||
|
return v.Type() == IR::Type::Attribute && v.Attribute() == attribute;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
IR::Attribute attribute;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MatchU32 : MatchObject<MatchU32> {
|
||||||
|
MatchU32(u32 imm_) : imm(imm_) {}
|
||||||
|
|
||||||
|
inline bool Match(IR::Value v) {
|
||||||
|
return v.IsImmediate() && v.Type() == IR::Type::U32 && v.U32() == imm;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
u32 imm;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <IR::Opcode opcode, typename... Args>
|
||||||
|
struct MatchInstObject : MatchObject<MatchInstObject<opcode>> {
|
||||||
|
static_assert(sizeof...(Args) == IR::NumArgsOf(opcode));
|
||||||
|
MatchInstObject(Args&&... args) : pattern(std::forward_as_tuple(args...)) {}
|
||||||
|
|
||||||
|
inline bool Match(IR::Value v) {
|
||||||
|
IR::Inst* inst = v.TryInstRecursive();
|
||||||
|
if (!inst || inst->GetOpcode() != opcode) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool matched = true;
|
||||||
|
|
||||||
|
[&]<std::size_t... Is>(std::index_sequence<Is...>) {
|
||||||
|
((matched = matched && std::get<Is>(pattern).Match(inst->Arg(Is))), ...);
|
||||||
|
}(std::make_index_sequence<sizeof...(Args)>{});
|
||||||
|
|
||||||
|
return matched;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
using MatchArgs = std::tuple<Args&...>;
|
||||||
|
MatchArgs pattern;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <IR::Opcode opcode, typename... Args>
|
||||||
|
inline auto MakeInstPattern(Args&&... args) {
|
||||||
|
return MatchInstObject<opcode, Args...>(std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Conveniences. TODO probably simpler way of doing this
|
||||||
|
#define M_READCONST(...) MakeInstPattern<IR::Opcode::ReadConst>(__VA_ARGS__)
|
||||||
|
#define M_GETUSERDATA(...) MakeInstPattern<IR::Opcode::GetUserData>(__VA_ARGS__)
|
||||||
|
#define M_BITFIELDUEXTRACT(...) MakeInstPattern<IR::Opcode::BitFieldUExtract>(__VA_ARGS__)
|
||||||
|
#define M_BITFIELDSEXTRACT(...) MakeInstPattern<IR::Opcode::BitFieldSExtract>(__VA_ARGS__)
|
||||||
|
#define M_GETATTRIBUTEU32(...) MakeInstPattern<IR::Opcode::GetAttributeU32>(__VA_ARGS__)
|
||||||
|
#define M_UMOD32(...) MakeInstPattern<IR::Opcode::UMod32>(__VA_ARGS__)
|
||||||
|
#define M_SHIFTRIGHTLOGICAL32(...) MakeInstPattern<IR::Opcode::ShiftRightLogical32>(__VA_ARGS__)
|
||||||
|
#define M_IADD32(...) MakeInstPattern<IR::Opcode::IAdd32>(__VA_ARGS__)
|
||||||
|
#define M_IMUL32(...) MakeInstPattern<IR::Opcode::IMul32>(__VA_ARGS__)
|
||||||
|
#define M_BITWISEAND32(...) MakeInstPattern<IR::Opcode::BitwiseAnd32>(__VA_ARGS__)
|
||||||
|
#define M_GETTESSGENERICATTRIBUTE(...) \
|
||||||
|
MakeInstPattern<IR::Opcode::GetTessGenericAttribute>(__VA_ARGS__)
|
||||||
|
#define M_SETTCSGENERICATTRIBUTE(...) \
|
||||||
|
MakeInstPattern<IR::Opcode::SetTcsGenericAttribute>(__VA_ARGS__)
|
||||||
|
#define M_COMPOSITECONSTRUCTU32X2(...) \
|
||||||
|
MakeInstPattern<IR::Opcode::CompositeConstructU32x2>(__VA_ARGS__)
|
||||||
|
#define M_COMPOSITECONSTRUCTU32X4(...) \
|
||||||
|
MakeInstPattern<IR::Opcode::CompositeConstructU32x4>(__VA_ARGS__)
|
||||||
|
|
||||||
|
} // namespace Shader::Optimiation::PatternMatching
|
@ -40,7 +40,8 @@ union TextureInstInfo {
|
|||||||
BitField<6, 2, u32> gather_comp;
|
BitField<6, 2, u32> gather_comp;
|
||||||
BitField<8, 1, u32> has_derivatives;
|
BitField<8, 1, u32> has_derivatives;
|
||||||
BitField<9, 1, u32> is_array;
|
BitField<9, 1, u32> is_array;
|
||||||
BitField<10, 1, u32> is_gather;
|
BitField<10, 1, u32> is_unnormalized;
|
||||||
|
BitField<11, 1, u32> is_gather;
|
||||||
};
|
};
|
||||||
|
|
||||||
union BufferInstInfo {
|
union BufferInstInfo {
|
||||||
@ -48,7 +49,8 @@ union BufferInstInfo {
|
|||||||
BitField<0, 1, u32> index_enable;
|
BitField<0, 1, u32> index_enable;
|
||||||
BitField<1, 1, u32> offset_enable;
|
BitField<1, 1, u32> offset_enable;
|
||||||
BitField<2, 12, u32> inst_offset;
|
BitField<2, 12, u32> inst_offset;
|
||||||
BitField<14, 1, u32> ring_access; // global + system coherency
|
BitField<14, 1, u32> system_coherent;
|
||||||
|
BitField<15, 1, u32> globally_coherent;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class ScalarReg : u32 {
|
enum class ScalarReg : u32 {
|
||||||
|
@ -15,7 +15,7 @@ enum class Type {
|
|||||||
ScalarReg = 1 << 1,
|
ScalarReg = 1 << 1,
|
||||||
VectorReg = 1 << 2,
|
VectorReg = 1 << 2,
|
||||||
Attribute = 1 << 3,
|
Attribute = 1 << 3,
|
||||||
SystemValue = 1 << 4,
|
Patch = 1 << 4,
|
||||||
U1 = 1 << 5,
|
U1 = 1 << 5,
|
||||||
U8 = 1 << 6,
|
U8 = 1 << 6,
|
||||||
U16 = 1 << 7,
|
U16 = 1 << 7,
|
||||||
|
@ -16,6 +16,8 @@ Value::Value(IR::VectorReg reg) noexcept : type{Type::VectorReg}, vreg{reg} {}
|
|||||||
|
|
||||||
Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {}
|
Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {}
|
||||||
|
|
||||||
|
Value::Value(IR::Patch patch) noexcept : type{Type::Patch}, patch{patch} {}
|
||||||
|
|
||||||
Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {}
|
Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {}
|
||||||
|
|
||||||
Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {}
|
Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {}
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
#include "shader_recompiler/exception.h"
|
#include "shader_recompiler/exception.h"
|
||||||
#include "shader_recompiler/ir/attribute.h"
|
#include "shader_recompiler/ir/attribute.h"
|
||||||
#include "shader_recompiler/ir/opcodes.h"
|
#include "shader_recompiler/ir/opcodes.h"
|
||||||
|
#include "shader_recompiler/ir/patch.h"
|
||||||
#include "shader_recompiler/ir/reg.h"
|
#include "shader_recompiler/ir/reg.h"
|
||||||
#include "shader_recompiler/ir/type.h"
|
#include "shader_recompiler/ir/type.h"
|
||||||
|
|
||||||
@ -34,6 +35,7 @@ public:
|
|||||||
explicit Value(IR::ScalarReg reg) noexcept;
|
explicit Value(IR::ScalarReg reg) noexcept;
|
||||||
explicit Value(IR::VectorReg reg) noexcept;
|
explicit Value(IR::VectorReg reg) noexcept;
|
||||||
explicit Value(IR::Attribute value) noexcept;
|
explicit Value(IR::Attribute value) noexcept;
|
||||||
|
explicit Value(IR::Patch patch) noexcept;
|
||||||
explicit Value(bool value) noexcept;
|
explicit Value(bool value) noexcept;
|
||||||
explicit Value(u8 value) noexcept;
|
explicit Value(u8 value) noexcept;
|
||||||
explicit Value(u16 value) noexcept;
|
explicit Value(u16 value) noexcept;
|
||||||
@ -56,6 +58,7 @@ public:
|
|||||||
[[nodiscard]] IR::ScalarReg ScalarReg() const;
|
[[nodiscard]] IR::ScalarReg ScalarReg() const;
|
||||||
[[nodiscard]] IR::VectorReg VectorReg() const;
|
[[nodiscard]] IR::VectorReg VectorReg() const;
|
||||||
[[nodiscard]] IR::Attribute Attribute() const;
|
[[nodiscard]] IR::Attribute Attribute() const;
|
||||||
|
[[nodiscard]] IR::Patch Patch() const;
|
||||||
[[nodiscard]] bool U1() const;
|
[[nodiscard]] bool U1() const;
|
||||||
[[nodiscard]] u8 U8() const;
|
[[nodiscard]] u8 U8() const;
|
||||||
[[nodiscard]] u16 U16() const;
|
[[nodiscard]] u16 U16() const;
|
||||||
@ -75,6 +78,7 @@ private:
|
|||||||
IR::ScalarReg sreg;
|
IR::ScalarReg sreg;
|
||||||
IR::VectorReg vreg;
|
IR::VectorReg vreg;
|
||||||
IR::Attribute attribute;
|
IR::Attribute attribute;
|
||||||
|
IR::Patch patch;
|
||||||
bool imm_u1;
|
bool imm_u1;
|
||||||
u8 imm_u8;
|
u8 imm_u8;
|
||||||
u16 imm_u16;
|
u16 imm_u16;
|
||||||
@ -330,6 +334,11 @@ inline IR::Attribute Value::Attribute() const {
|
|||||||
return attribute;
|
return attribute;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline IR::Patch Value::Patch() const {
|
||||||
|
DEBUG_ASSERT(type == Type::Patch);
|
||||||
|
return patch;
|
||||||
|
}
|
||||||
|
|
||||||
inline bool Value::U1() const {
|
inline bool Value::U1() const {
|
||||||
if (IsIdentity()) {
|
if (IsIdentity()) {
|
||||||
return inst->Arg(0).U1();
|
return inst->Arg(0).U1();
|
||||||
|
@ -23,9 +23,11 @@ struct Profile {
|
|||||||
bool support_fp32_denorm_flush{};
|
bool support_fp32_denorm_flush{};
|
||||||
bool support_explicit_workgroup_layout{};
|
bool support_explicit_workgroup_layout{};
|
||||||
bool support_legacy_vertex_attributes{};
|
bool support_legacy_vertex_attributes{};
|
||||||
|
bool supports_image_load_store_lod{};
|
||||||
bool has_broken_spirv_clamp{};
|
bool has_broken_spirv_clamp{};
|
||||||
bool lower_left_origin_mode{};
|
bool lower_left_origin_mode{};
|
||||||
bool needs_manual_interpolation{};
|
bool needs_manual_interpolation{};
|
||||||
|
bool needs_lds_barriers{};
|
||||||
u64 min_ssbo_alignment{};
|
u64 min_ssbo_alignment{};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include "common/config.h"
|
||||||
|
#include "common/io_file.h"
|
||||||
|
#include "common/path_util.h"
|
||||||
#include "shader_recompiler/frontend/control_flow_graph.h"
|
#include "shader_recompiler/frontend/control_flow_graph.h"
|
||||||
#include "shader_recompiler/frontend/decode.h"
|
#include "shader_recompiler/frontend/decode.h"
|
||||||
#include "shader_recompiler/frontend/structured_control_flow.h"
|
#include "shader_recompiler/frontend/structured_control_flow.h"
|
||||||
@ -29,7 +32,7 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info,
|
IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info,
|
||||||
const RuntimeInfo& runtime_info, const Profile& profile) {
|
RuntimeInfo& runtime_info, const Profile& profile) {
|
||||||
// Ensure first instruction is expected.
|
// Ensure first instruction is expected.
|
||||||
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
|
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
|
||||||
if (code[0] != token_mov_vcchi) {
|
if (code[0] != token_mov_vcchi) {
|
||||||
@ -60,17 +63,35 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
|
|||||||
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
|
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
|
||||||
|
|
||||||
// Run optimization passes
|
// Run optimization passes
|
||||||
|
const auto stage = program.info.stage;
|
||||||
|
|
||||||
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
|
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
|
||||||
|
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
||||||
|
if (info.l_stage == LogicalStage::TessellationControl) {
|
||||||
|
// Tess passes require previous const prop passes for now (for simplicity). TODO allow
|
||||||
|
// fine grained folding or opportunistic folding we set an operand to an immediate
|
||||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||||
if (program.info.stage != Stage::Compute) {
|
Shader::Optimization::TessellationPreprocess(program, runtime_info);
|
||||||
|
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||||
|
Shader::Optimization::HullShaderTransform(program, runtime_info);
|
||||||
|
} else if (info.l_stage == LogicalStage::TessellationEval) {
|
||||||
|
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||||
|
Shader::Optimization::TessellationPreprocess(program, runtime_info);
|
||||||
|
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||||
|
Shader::Optimization::DomainShaderTransform(program, runtime_info);
|
||||||
|
}
|
||||||
|
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||||
|
Shader::Optimization::RingAccessElimination(program, runtime_info, stage);
|
||||||
|
if (stage != Stage::Compute) {
|
||||||
Shader::Optimization::LowerSharedMemToRegisters(program);
|
Shader::Optimization::LowerSharedMemToRegisters(program);
|
||||||
}
|
}
|
||||||
Shader::Optimization::RingAccessElimination(program, runtime_info, program.info.stage);
|
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||||
Shader::Optimization::FlattenExtendedUserdataPass(program);
|
Shader::Optimization::FlattenExtendedUserdataPass(program);
|
||||||
Shader::Optimization::ResourceTrackingPass(program);
|
Shader::Optimization::ResourceTrackingPass(program);
|
||||||
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
||||||
Shader::Optimization::DeadCodeEliminationPass(program);
|
Shader::Optimization::DeadCodeEliminationPass(program);
|
||||||
Shader::Optimization::CollectShaderInfoPass(program);
|
Shader::Optimization::CollectShaderInfoPass(program);
|
||||||
|
Shader::Optimization::SharedMemoryBarrierPass(program, profile);
|
||||||
|
|
||||||
return program;
|
return program;
|
||||||
}
|
}
|
||||||
|
@ -28,6 +28,6 @@ struct Pools {
|
|||||||
};
|
};
|
||||||
|
|
||||||
[[nodiscard]] IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info,
|
[[nodiscard]] IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info,
|
||||||
const RuntimeInfo& runtime_info, const Profile& profile);
|
RuntimeInfo& runtime_info, const Profile& profile);
|
||||||
|
|
||||||
} // namespace Shader
|
} // namespace Shader
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
#include <span>
|
#include <span>
|
||||||
#include <boost/container/static_vector.hpp>
|
#include <boost/container/static_vector.hpp>
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
#include "shader_recompiler/frontend/tessellation.h"
|
||||||
#include "video_core/amdgpu/liverpool.h"
|
#include "video_core/amdgpu/liverpool.h"
|
||||||
#include "video_core/amdgpu/types.h"
|
#include "video_core/amdgpu/types.h"
|
||||||
|
|
||||||
@ -21,12 +22,31 @@ enum class Stage : u32 {
|
|||||||
Local,
|
Local,
|
||||||
Compute,
|
Compute,
|
||||||
};
|
};
|
||||||
constexpr u32 MaxStageTypes = 7;
|
|
||||||
|
// Vertex intentionally comes after TCS/TES due to order of compilation
|
||||||
|
enum class LogicalStage : u32 {
|
||||||
|
Fragment,
|
||||||
|
TessellationControl,
|
||||||
|
TessellationEval,
|
||||||
|
Vertex,
|
||||||
|
Geometry,
|
||||||
|
Compute,
|
||||||
|
NumLogicalStages
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr u32 MaxStageTypes = static_cast<u32>(LogicalStage::NumLogicalStages);
|
||||||
|
|
||||||
[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept {
|
[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept {
|
||||||
return static_cast<Stage>(index);
|
return static_cast<Stage>(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct LocalRuntimeInfo {
|
||||||
|
u32 ls_stride;
|
||||||
|
bool links_with_tcs;
|
||||||
|
|
||||||
|
auto operator<=>(const LocalRuntimeInfo&) const noexcept = default;
|
||||||
|
};
|
||||||
|
|
||||||
struct ExportRuntimeInfo {
|
struct ExportRuntimeInfo {
|
||||||
u32 vertex_data_size;
|
u32 vertex_data_size;
|
||||||
|
|
||||||
@ -64,9 +84,57 @@ struct VertexRuntimeInfo {
|
|||||||
u32 num_outputs;
|
u32 num_outputs;
|
||||||
std::array<VsOutputMap, 3> outputs;
|
std::array<VsOutputMap, 3> outputs;
|
||||||
bool emulate_depth_negative_one_to_one{};
|
bool emulate_depth_negative_one_to_one{};
|
||||||
|
// Domain
|
||||||
|
AmdGpu::TessellationType tess_type;
|
||||||
|
AmdGpu::TessellationTopology tess_topology;
|
||||||
|
AmdGpu::TessellationPartitioning tess_partitioning;
|
||||||
|
u32 hs_output_cp_stride{};
|
||||||
|
|
||||||
bool operator==(const VertexRuntimeInfo& other) const noexcept {
|
bool operator==(const VertexRuntimeInfo& other) const noexcept {
|
||||||
return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one;
|
return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one &&
|
||||||
|
tess_type == other.tess_type && tess_topology == other.tess_topology &&
|
||||||
|
tess_partitioning == other.tess_partitioning &&
|
||||||
|
hs_output_cp_stride == other.hs_output_cp_stride;
|
||||||
|
}
|
||||||
|
|
||||||
|
void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
|
||||||
|
hs_output_cp_stride = tess_constants.hs_cp_stride;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct HullRuntimeInfo {
|
||||||
|
// from registers
|
||||||
|
u32 num_input_control_points;
|
||||||
|
u32 num_threads;
|
||||||
|
AmdGpu::TessellationType tess_type;
|
||||||
|
|
||||||
|
// from tess constants buffer
|
||||||
|
u32 ls_stride;
|
||||||
|
u32 hs_output_cp_stride;
|
||||||
|
u32 hs_output_base;
|
||||||
|
|
||||||
|
auto operator<=>(const HullRuntimeInfo&) const noexcept = default;
|
||||||
|
|
||||||
|
// It might be possible for a non-passthrough TCS to have these conditions, in some
|
||||||
|
// dumb situation.
|
||||||
|
// In that case, it should be fine to assume passthrough and declare some extra
|
||||||
|
// output control points and attributes that shouldnt be read by the TES anyways
|
||||||
|
bool IsPassthrough() const {
|
||||||
|
return hs_output_base == 0 && ls_stride == hs_output_cp_stride && num_threads == 1;
|
||||||
|
};
|
||||||
|
|
||||||
|
// regs.ls_hs_config.hs_output_control_points contains the number of threads, which
|
||||||
|
// isn't exactly the number of output control points.
|
||||||
|
// For passthrough shaders, the register field is set to 1, so use the number of
|
||||||
|
// input control points
|
||||||
|
u32 NumOutputControlPoints() const {
|
||||||
|
return IsPassthrough() ? num_input_control_points : num_threads;
|
||||||
|
}
|
||||||
|
|
||||||
|
void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
|
||||||
|
ls_stride = tess_constants.ls_stride;
|
||||||
|
hs_output_cp_stride = tess_constants.hs_cp_stride;
|
||||||
|
hs_output_base = tess_constants.hs_output_base;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -150,8 +218,10 @@ struct RuntimeInfo {
|
|||||||
AmdGpu::FpDenormMode fp_denorm_mode32;
|
AmdGpu::FpDenormMode fp_denorm_mode32;
|
||||||
AmdGpu::FpRoundMode fp_round_mode32;
|
AmdGpu::FpRoundMode fp_round_mode32;
|
||||||
union {
|
union {
|
||||||
|
LocalRuntimeInfo ls_info;
|
||||||
ExportRuntimeInfo es_info;
|
ExportRuntimeInfo es_info;
|
||||||
VertexRuntimeInfo vs_info;
|
VertexRuntimeInfo vs_info;
|
||||||
|
HullRuntimeInfo hs_info;
|
||||||
GeometryRuntimeInfo gs_info;
|
GeometryRuntimeInfo gs_info;
|
||||||
FragmentRuntimeInfo fs_info;
|
FragmentRuntimeInfo fs_info;
|
||||||
ComputeRuntimeInfo cs_info;
|
ComputeRuntimeInfo cs_info;
|
||||||
@ -174,6 +244,10 @@ struct RuntimeInfo {
|
|||||||
return es_info == other.es_info;
|
return es_info == other.es_info;
|
||||||
case Stage::Geometry:
|
case Stage::Geometry:
|
||||||
return gs_info == other.gs_info;
|
return gs_info == other.gs_info;
|
||||||
|
case Stage::Hull:
|
||||||
|
return hs_info == other.hs_info;
|
||||||
|
case Stage::Local:
|
||||||
|
return ls_info == other.ls_info;
|
||||||
default:
|
default:
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -31,6 +31,7 @@ struct BufferSpecialization {
|
|||||||
|
|
||||||
struct TextureBufferSpecialization {
|
struct TextureBufferSpecialization {
|
||||||
bool is_integer = false;
|
bool is_integer = false;
|
||||||
|
u32 dst_select = 0;
|
||||||
|
|
||||||
auto operator<=>(const TextureBufferSpecialization&) const = default;
|
auto operator<=>(const TextureBufferSpecialization&) const = default;
|
||||||
};
|
};
|
||||||
@ -38,8 +39,12 @@ struct TextureBufferSpecialization {
|
|||||||
struct ImageSpecialization {
|
struct ImageSpecialization {
|
||||||
AmdGpu::ImageType type = AmdGpu::ImageType::Color2D;
|
AmdGpu::ImageType type = AmdGpu::ImageType::Color2D;
|
||||||
bool is_integer = false;
|
bool is_integer = false;
|
||||||
|
u32 dst_select = 0;
|
||||||
|
|
||||||
auto operator<=>(const ImageSpecialization&) const = default;
|
bool operator==(const ImageSpecialization& other) const {
|
||||||
|
return type == other.type && is_integer == other.is_integer &&
|
||||||
|
(dst_select != 0 ? dst_select == other.dst_select : true);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct FMaskSpecialization {
|
struct FMaskSpecialization {
|
||||||
@ -49,6 +54,12 @@ struct FMaskSpecialization {
|
|||||||
auto operator<=>(const FMaskSpecialization&) const = default;
|
auto operator<=>(const FMaskSpecialization&) const = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct SamplerSpecialization {
|
||||||
|
bool force_unnormalized = false;
|
||||||
|
|
||||||
|
auto operator<=>(const SamplerSpecialization&) const = default;
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Alongside runtime information, this structure also checks bound resources
|
* Alongside runtime information, this structure also checks bound resources
|
||||||
* for compatibility. Can be used as a key for storing shader permutations.
|
* for compatibility. Can be used as a key for storing shader permutations.
|
||||||
@ -67,6 +78,7 @@ struct StageSpecialization {
|
|||||||
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
|
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
|
||||||
boost::container::small_vector<ImageSpecialization, 16> images;
|
boost::container::small_vector<ImageSpecialization, 16> images;
|
||||||
boost::container::small_vector<FMaskSpecialization, 8> fmasks;
|
boost::container::small_vector<FMaskSpecialization, 8> fmasks;
|
||||||
|
boost::container::small_vector<SamplerSpecialization, 16> samplers;
|
||||||
Backend::Bindings start{};
|
Backend::Bindings start{};
|
||||||
|
|
||||||
explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_,
|
explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_,
|
||||||
@ -96,17 +108,37 @@ struct StageSpecialization {
|
|||||||
ForEachSharp(binding, tex_buffers, info->texture_buffers,
|
ForEachSharp(binding, tex_buffers, info->texture_buffers,
|
||||||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||||
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
||||||
|
spec.dst_select = sharp.DstSelect();
|
||||||
});
|
});
|
||||||
ForEachSharp(binding, images, info->images,
|
ForEachSharp(binding, images, info->images,
|
||||||
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
||||||
spec.type = sharp.GetBoundType();
|
spec.type = sharp.GetBoundType();
|
||||||
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
||||||
|
if (desc.is_storage) {
|
||||||
|
spec.dst_select = sharp.DstSelect();
|
||||||
|
}
|
||||||
});
|
});
|
||||||
ForEachSharp(binding, fmasks, info->fmasks,
|
ForEachSharp(binding, fmasks, info->fmasks,
|
||||||
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
||||||
spec.width = sharp.width;
|
spec.width = sharp.width;
|
||||||
spec.height = sharp.height;
|
spec.height = sharp.height;
|
||||||
});
|
});
|
||||||
|
ForEachSharp(samplers, info->samplers,
|
||||||
|
[](auto& spec, const auto& desc, AmdGpu::Sampler sharp) {
|
||||||
|
spec.force_unnormalized = sharp.force_unnormalized;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Initialize runtime_info fields that rely on analysis in tessellation passes
|
||||||
|
if (info->l_stage == LogicalStage::TessellationControl ||
|
||||||
|
info->l_stage == LogicalStage::TessellationEval) {
|
||||||
|
Shader::TessellationDataConstantBuffer tess_constants;
|
||||||
|
info->ReadTessConstantBuffer(tess_constants);
|
||||||
|
if (info->l_stage == LogicalStage::TessellationControl) {
|
||||||
|
runtime_info.hs_info.InitFromTessConstants(tess_constants);
|
||||||
|
} else {
|
||||||
|
runtime_info.vs_info.InitFromTessConstants(tess_constants);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) {
|
void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) {
|
||||||
@ -175,6 +207,11 @@ struct StageSpecialization {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for (u32 i = 0; i < samplers.size(); i++) {
|
||||||
|
if (samplers[i] != other.samplers[i]) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include <boost/preprocessor/stringize.hpp>
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/config.h"
|
#include "common/config.h"
|
||||||
#include "common/debug.h"
|
#include "common/debug.h"
|
||||||
@ -18,7 +20,32 @@ namespace AmdGpu {
|
|||||||
|
|
||||||
static const char* dcb_task_name{"DCB_TASK"};
|
static const char* dcb_task_name{"DCB_TASK"};
|
||||||
static const char* ccb_task_name{"CCB_TASK"};
|
static const char* ccb_task_name{"CCB_TASK"};
|
||||||
static const char* acb_task_name{"ACB_TASK"};
|
|
||||||
|
#define MAX_NAMES 56
|
||||||
|
static_assert(Liverpool::NumComputeRings <= MAX_NAMES);
|
||||||
|
|
||||||
|
#define NAME_NUM(z, n, name) BOOST_PP_STRINGIZE(name) BOOST_PP_STRINGIZE(n),
|
||||||
|
#define NAME_ARRAY(name, num) {BOOST_PP_REPEAT(num, NAME_NUM, name)}
|
||||||
|
|
||||||
|
static const char* acb_task_name[] = NAME_ARRAY(ACB_TASK, MAX_NAMES);
|
||||||
|
|
||||||
|
#define YIELD(name) \
|
||||||
|
FIBER_EXIT; \
|
||||||
|
co_yield {}; \
|
||||||
|
FIBER_ENTER(name);
|
||||||
|
|
||||||
|
#define YIELD_CE() YIELD(ccb_task_name)
|
||||||
|
#define YIELD_GFX() YIELD(dcb_task_name)
|
||||||
|
#define YIELD_ASC(id) YIELD(acb_task_name[id])
|
||||||
|
|
||||||
|
#define RESUME(task, name) \
|
||||||
|
FIBER_EXIT; \
|
||||||
|
task.handle.resume(); \
|
||||||
|
FIBER_ENTER(name);
|
||||||
|
|
||||||
|
#define RESUME_CE(task) RESUME(task, ccb_task_name)
|
||||||
|
#define RESUME_GFX(task) RESUME(task, dcb_task_name)
|
||||||
|
#define RESUME_ASC(task, id) RESUME(task, acb_task_name[id])
|
||||||
|
|
||||||
std::array<u8, 48_KB> Liverpool::ConstantEngine::constants_heap;
|
std::array<u8, 48_KB> Liverpool::ConstantEngine::constants_heap;
|
||||||
|
|
||||||
@ -60,7 +87,7 @@ void Liverpool::Process(std::stop_token stoken) {
|
|||||||
|
|
||||||
VideoCore::StartCapture();
|
VideoCore::StartCapture();
|
||||||
|
|
||||||
int qid = -1;
|
curr_qid = -1;
|
||||||
|
|
||||||
while (num_submits || num_commands) {
|
while (num_submits || num_commands) {
|
||||||
|
|
||||||
@ -79,9 +106,9 @@ void Liverpool::Process(std::stop_token stoken) {
|
|||||||
--num_commands;
|
--num_commands;
|
||||||
}
|
}
|
||||||
|
|
||||||
qid = (qid + 1) % NumTotalQueues;
|
curr_qid = (curr_qid + 1) % num_mapped_queues;
|
||||||
|
|
||||||
auto& queue = mapped_queues[qid];
|
auto& queue = mapped_queues[curr_qid];
|
||||||
|
|
||||||
Task::Handle task{};
|
Task::Handle task{};
|
||||||
{
|
{
|
||||||
@ -119,7 +146,7 @@ void Liverpool::Process(std::stop_token stoken) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
||||||
TracyFiberEnter(ccb_task_name);
|
FIBER_ENTER(ccb_task_name);
|
||||||
|
|
||||||
while (!ccb.empty()) {
|
while (!ccb.empty()) {
|
||||||
const auto* header = reinterpret_cast<const PM4Header*>(ccb.data());
|
const auto* header = reinterpret_cast<const PM4Header*>(ccb.data());
|
||||||
@ -155,9 +182,7 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
|||||||
case PM4ItOpcode::WaitOnDeCounterDiff: {
|
case PM4ItOpcode::WaitOnDeCounterDiff: {
|
||||||
const auto diff = it_body[0];
|
const auto diff = it_body[0];
|
||||||
while ((cblock.de_count - cblock.ce_count) >= diff) {
|
while ((cblock.de_count - cblock.ce_count) >= diff) {
|
||||||
TracyFiberLeave;
|
YIELD_CE();
|
||||||
co_yield {};
|
|
||||||
TracyFiberEnter(ccb_task_name);
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -165,13 +190,12 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
|||||||
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
||||||
auto task =
|
auto task =
|
||||||
ProcessCeUpdate({indirect_buffer->Address<const u32>(), indirect_buffer->ib_size});
|
ProcessCeUpdate({indirect_buffer->Address<const u32>(), indirect_buffer->ib_size});
|
||||||
while (!task.handle.done()) {
|
RESUME_CE(task);
|
||||||
task.handle.resume();
|
|
||||||
|
|
||||||
TracyFiberLeave;
|
while (!task.handle.done()) {
|
||||||
co_yield {};
|
YIELD_CE();
|
||||||
TracyFiberEnter(ccb_task_name);
|
RESUME_CE(task);
|
||||||
};
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@ -182,11 +206,11 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
|||||||
ccb = NextPacket(ccb, header->type3.NumWords() + 1);
|
ccb = NextPacket(ccb, header->type3.NumWords() + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
TracyFiberLeave;
|
FIBER_EXIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb) {
|
Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb) {
|
||||||
TracyFiberEnter(dcb_task_name);
|
FIBER_ENTER(dcb_task_name);
|
||||||
|
|
||||||
cblock.Reset();
|
cblock.Reset();
|
||||||
|
|
||||||
@ -197,9 +221,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||||||
if (!ccb.empty()) {
|
if (!ccb.empty()) {
|
||||||
// In case of CCB provided kick off CE asap to have the constant heap ready to use
|
// In case of CCB provided kick off CE asap to have the constant heap ready to use
|
||||||
ce_task = ProcessCeUpdate(ccb);
|
ce_task = ProcessCeUpdate(ccb);
|
||||||
TracyFiberLeave;
|
RESUME_GFX(ce_task);
|
||||||
ce_task.handle.resume();
|
|
||||||
TracyFiberEnter(dcb_task_name);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto base_addr = reinterpret_cast<uintptr_t>(dcb.data());
|
const auto base_addr = reinterpret_cast<uintptr_t>(dcb.data());
|
||||||
@ -353,8 +375,18 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||||||
}
|
}
|
||||||
case PM4ItOpcode::SetShReg: {
|
case PM4ItOpcode::SetShReg: {
|
||||||
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
||||||
|
const auto set_size = (count - 1) * sizeof(u32);
|
||||||
|
|
||||||
|
if (set_data->reg_offset >= 0x200 &&
|
||||||
|
set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) {
|
||||||
|
ASSERT(set_size <= sizeof(ComputeProgram));
|
||||||
|
auto* addr = reinterpret_cast<u32*>(&mapped_queues[GfxQueueId].cs_state) +
|
||||||
|
(set_data->reg_offset - 0x200);
|
||||||
|
std::memcpy(addr, header + 2, set_size);
|
||||||
|
} else {
|
||||||
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
|
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
|
||||||
(count - 1) * sizeof(u32));
|
set_size);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::SetUconfigReg: {
|
case PM4ItOpcode::SetUconfigReg: {
|
||||||
@ -474,15 +506,16 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||||||
}
|
}
|
||||||
case PM4ItOpcode::DispatchDirect: {
|
case PM4ItOpcode::DispatchDirect: {
|
||||||
const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header);
|
const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header);
|
||||||
regs.cs_program.dim_x = dispatch_direct->dim_x;
|
auto& cs_program = GetCsRegs();
|
||||||
regs.cs_program.dim_y = dispatch_direct->dim_y;
|
cs_program.dim_x = dispatch_direct->dim_x;
|
||||||
regs.cs_program.dim_z = dispatch_direct->dim_z;
|
cs_program.dim_y = dispatch_direct->dim_y;
|
||||||
regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
cs_program.dim_z = dispatch_direct->dim_z;
|
||||||
|
cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
||||||
if (DebugState.DumpingCurrentReg()) {
|
if (DebugState.DumpingCurrentReg()) {
|
||||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs,
|
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
|
||||||
true);
|
cs_program);
|
||||||
}
|
}
|
||||||
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
|
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
|
||||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||||
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:Dispatch", cmd_address));
|
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:Dispatch", cmd_address));
|
||||||
rasterizer->DispatchDirect();
|
rasterizer->DispatchDirect();
|
||||||
@ -493,14 +526,15 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||||||
case PM4ItOpcode::DispatchIndirect: {
|
case PM4ItOpcode::DispatchIndirect: {
|
||||||
const auto* dispatch_indirect =
|
const auto* dispatch_indirect =
|
||||||
reinterpret_cast<const PM4CmdDispatchIndirect*>(header);
|
reinterpret_cast<const PM4CmdDispatchIndirect*>(header);
|
||||||
|
auto& cs_program = GetCsRegs();
|
||||||
const auto offset = dispatch_indirect->data_offset;
|
const auto offset = dispatch_indirect->data_offset;
|
||||||
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
|
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
|
||||||
const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions);
|
const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions);
|
||||||
if (DebugState.DumpingCurrentReg()) {
|
if (DebugState.DumpingCurrentReg()) {
|
||||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs,
|
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
|
||||||
true);
|
cs_program);
|
||||||
}
|
}
|
||||||
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
|
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
|
||||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||||
rasterizer->ScopeMarkerBegin(
|
rasterizer->ScopeMarkerBegin(
|
||||||
fmt::format("dcb:{}:DispatchIndirect", cmd_address));
|
fmt::format("dcb:{}:DispatchIndirect", cmd_address));
|
||||||
@ -613,11 +647,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||||||
case PM4ItOpcode::Rewind: {
|
case PM4ItOpcode::Rewind: {
|
||||||
const PM4CmdRewind* rewind = reinterpret_cast<const PM4CmdRewind*>(header);
|
const PM4CmdRewind* rewind = reinterpret_cast<const PM4CmdRewind*>(header);
|
||||||
while (!rewind->Valid()) {
|
while (!rewind->Valid()) {
|
||||||
mapped_queues[GfxQueueId].cs_state = regs.cs_program;
|
YIELD_GFX();
|
||||||
TracyFiberLeave;
|
|
||||||
co_yield {};
|
|
||||||
TracyFiberEnter(dcb_task_name);
|
|
||||||
regs.cs_program = mapped_queues[GfxQueueId].cs_state;
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -629,15 +659,12 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||||||
// there are no other submits to yield to we can sleep the thread
|
// there are no other submits to yield to we can sleep the thread
|
||||||
// instead and allow other tasks to run.
|
// instead and allow other tasks to run.
|
||||||
const u64* wait_addr = wait_reg_mem->Address<u64*>();
|
const u64* wait_addr = wait_reg_mem->Address<u64*>();
|
||||||
if (vo_port->IsVoLabel(wait_addr) && num_submits == 1) {
|
if (vo_port->IsVoLabel(wait_addr) &&
|
||||||
|
num_submits == mapped_queues[GfxQueueId].submits.size()) {
|
||||||
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); });
|
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); });
|
||||||
}
|
}
|
||||||
while (!wait_reg_mem->Test()) {
|
while (!wait_reg_mem->Test()) {
|
||||||
mapped_queues[GfxQueueId].cs_state = regs.cs_program;
|
YIELD_GFX();
|
||||||
TracyFiberLeave;
|
|
||||||
co_yield {};
|
|
||||||
TracyFiberEnter(dcb_task_name);
|
|
||||||
regs.cs_program = mapped_queues[GfxQueueId].cs_state;
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -645,13 +672,12 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||||||
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
||||||
auto task = ProcessGraphics(
|
auto task = ProcessGraphics(
|
||||||
{indirect_buffer->Address<const u32>(), indirect_buffer->ib_size}, {});
|
{indirect_buffer->Address<const u32>(), indirect_buffer->ib_size}, {});
|
||||||
while (!task.handle.done()) {
|
RESUME_GFX(task);
|
||||||
task.handle.resume();
|
|
||||||
|
|
||||||
TracyFiberLeave;
|
while (!task.handle.done()) {
|
||||||
co_yield {};
|
YIELD_GFX();
|
||||||
TracyFiberEnter(dcb_task_name);
|
RESUME_GFX(task);
|
||||||
};
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::IncrementDeCounter: {
|
case PM4ItOpcode::IncrementDeCounter: {
|
||||||
@ -660,9 +686,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||||||
}
|
}
|
||||||
case PM4ItOpcode::WaitOnCeCounter: {
|
case PM4ItOpcode::WaitOnCeCounter: {
|
||||||
while (cblock.ce_count <= cblock.de_count) {
|
while (cblock.ce_count <= cblock.de_count) {
|
||||||
TracyFiberLeave;
|
RESUME_GFX(ce_task);
|
||||||
ce_task.handle.resume();
|
|
||||||
TracyFiberEnter(dcb_task_name);
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -686,11 +710,13 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||||||
ce_task.handle.destroy();
|
ce_task.handle.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
TracyFiberLeave;
|
FIBER_EXIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
template <bool is_indirect>
|
||||||
TracyFiberEnter(acb_task_name);
|
Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
|
||||||
|
FIBER_ENTER(acb_task_name[vqid]);
|
||||||
|
const auto& queue = asc_queues[{vqid}];
|
||||||
|
|
||||||
auto base_addr = reinterpret_cast<uintptr_t>(acb.data());
|
auto base_addr = reinterpret_cast<uintptr_t>(acb.data());
|
||||||
while (!acb.empty()) {
|
while (!acb.empty()) {
|
||||||
@ -711,15 +737,14 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
|||||||
}
|
}
|
||||||
case PM4ItOpcode::IndirectBuffer: {
|
case PM4ItOpcode::IndirectBuffer: {
|
||||||
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
||||||
auto task = ProcessCompute(
|
auto task = ProcessCompute<true>(
|
||||||
{indirect_buffer->Address<const u32>(), indirect_buffer->ib_size}, vqid);
|
{indirect_buffer->Address<const u32>(), indirect_buffer->ib_size}, vqid);
|
||||||
while (!task.handle.done()) {
|
RESUME_ASC(task, vqid);
|
||||||
task.handle.resume();
|
|
||||||
|
|
||||||
TracyFiberLeave;
|
while (!task.handle.done()) {
|
||||||
co_yield {};
|
YIELD_ASC(vqid);
|
||||||
TracyFiberEnter(acb_task_name);
|
RESUME_ASC(task, vqid);
|
||||||
};
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::DmaData: {
|
case PM4ItOpcode::DmaData: {
|
||||||
@ -757,30 +782,38 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
|||||||
case PM4ItOpcode::Rewind: {
|
case PM4ItOpcode::Rewind: {
|
||||||
const PM4CmdRewind* rewind = reinterpret_cast<const PM4CmdRewind*>(header);
|
const PM4CmdRewind* rewind = reinterpret_cast<const PM4CmdRewind*>(header);
|
||||||
while (!rewind->Valid()) {
|
while (!rewind->Valid()) {
|
||||||
mapped_queues[vqid].cs_state = regs.cs_program;
|
YIELD_ASC(vqid);
|
||||||
TracyFiberLeave;
|
|
||||||
co_yield {};
|
|
||||||
TracyFiberEnter(acb_task_name);
|
|
||||||
regs.cs_program = mapped_queues[vqid].cs_state;
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::SetShReg: {
|
case PM4ItOpcode::SetShReg: {
|
||||||
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
||||||
|
const auto set_size = (count - 1) * sizeof(u32);
|
||||||
|
|
||||||
|
if (set_data->reg_offset >= 0x200 &&
|
||||||
|
set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) {
|
||||||
|
ASSERT(set_size <= sizeof(ComputeProgram));
|
||||||
|
auto* addr = reinterpret_cast<u32*>(&mapped_queues[vqid + 1].cs_state) +
|
||||||
|
(set_data->reg_offset - 0x200);
|
||||||
|
std::memcpy(addr, header + 2, set_size);
|
||||||
|
} else {
|
||||||
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
|
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
|
||||||
(count - 1) * sizeof(u32));
|
set_size);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::DispatchDirect: {
|
case PM4ItOpcode::DispatchDirect: {
|
||||||
const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header);
|
const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header);
|
||||||
regs.cs_program.dim_x = dispatch_direct->dim_x;
|
auto& cs_program = GetCsRegs();
|
||||||
regs.cs_program.dim_y = dispatch_direct->dim_y;
|
cs_program.dim_x = dispatch_direct->dim_x;
|
||||||
regs.cs_program.dim_z = dispatch_direct->dim_z;
|
cs_program.dim_y = dispatch_direct->dim_y;
|
||||||
regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
cs_program.dim_z = dispatch_direct->dim_z;
|
||||||
|
cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
||||||
if (DebugState.DumpingCurrentReg()) {
|
if (DebugState.DumpingCurrentReg()) {
|
||||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs, true);
|
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
|
||||||
|
cs_program);
|
||||||
}
|
}
|
||||||
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
|
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
|
||||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||||
rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address));
|
rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address));
|
||||||
rasterizer->DispatchDirect();
|
rasterizer->DispatchDirect();
|
||||||
@ -803,17 +836,13 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
|||||||
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
|
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
|
||||||
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
||||||
while (!wait_reg_mem->Test()) {
|
while (!wait_reg_mem->Test()) {
|
||||||
mapped_queues[vqid].cs_state = regs.cs_program;
|
YIELD_ASC(vqid);
|
||||||
TracyFiberLeave;
|
|
||||||
co_yield {};
|
|
||||||
TracyFiberEnter(acb_task_name);
|
|
||||||
regs.cs_program = mapped_queues[vqid].cs_state;
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::ReleaseMem: {
|
case PM4ItOpcode::ReleaseMem: {
|
||||||
const auto* release_mem = reinterpret_cast<const PM4CmdReleaseMem*>(header);
|
const auto* release_mem = reinterpret_cast<const PM4CmdReleaseMem*>(header);
|
||||||
release_mem->SignalFence(Platform::InterruptId::Compute0RelMem); // <---
|
release_mem->SignalFence(static_cast<Platform::InterruptId>(queue.pipe_id));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@ -821,10 +850,16 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
|||||||
static_cast<u32>(opcode), count);
|
static_cast<u32>(opcode), count);
|
||||||
}
|
}
|
||||||
|
|
||||||
acb = NextPacket(acb, header->type3.NumWords() + 1);
|
const auto packet_size_dw = header->type3.NumWords() + 1;
|
||||||
|
acb = NextPacket(acb, packet_size_dw);
|
||||||
|
|
||||||
|
if constexpr (!is_indirect) {
|
||||||
|
*queue.read_addr += packet_size_dw;
|
||||||
|
*queue.read_addr %= queue.ring_size_dw;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TracyFiberLeave;
|
FIBER_EXIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<std::span<const u32>, std::span<const u32>> Liverpool::CopyCmdBuffers(
|
std::pair<std::span<const u32>, std::span<const u32>> Liverpool::CopyCmdBuffers(
|
||||||
@ -881,10 +916,11 @@ void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb) {
|
|||||||
submit_cv.notify_one();
|
submit_cv.notify_one();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Liverpool::SubmitAsc(u32 vqid, std::span<const u32> acb) {
|
void Liverpool::SubmitAsc(u32 gnm_vqid, std::span<const u32> acb) {
|
||||||
ASSERT_MSG(vqid >= 0 && vqid < NumTotalQueues, "Invalid virtual ASC queue index");
|
ASSERT_MSG(gnm_vqid > 0 && gnm_vqid < NumTotalQueues, "Invalid virtual ASC queue index");
|
||||||
auto& queue = mapped_queues[vqid];
|
auto& queue = mapped_queues[gnm_vqid];
|
||||||
|
|
||||||
|
const auto vqid = gnm_vqid - 1;
|
||||||
const auto& task = ProcessCompute(acb, vqid);
|
const auto& task = ProcessCompute(acb, vqid);
|
||||||
{
|
{
|
||||||
std::scoped_lock lock{queue.m_access};
|
std::scoped_lock lock{queue.m_access};
|
||||||
@ -892,6 +928,7 @@ void Liverpool::SubmitAsc(u32 vqid, std::span<const u32> acb) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::scoped_lock lk{submit_mutex};
|
std::scoped_lock lk{submit_mutex};
|
||||||
|
num_mapped_queues = std::max(num_mapped_queues, gnm_vqid + 1);
|
||||||
++num_submits;
|
++num_submits;
|
||||||
submit_cv.notify_one();
|
submit_cv.notify_one();
|
||||||
}
|
}
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/bit_field.h"
|
#include "common/bit_field.h"
|
||||||
#include "common/polyfill_thread.h"
|
#include "common/polyfill_thread.h"
|
||||||
|
#include "common/slot_vector.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "common/unique_function.h"
|
#include "common/unique_function.h"
|
||||||
#include "shader_recompiler/params.h"
|
#include "shader_recompiler/params.h"
|
||||||
@ -45,7 +46,8 @@ struct Liverpool {
|
|||||||
static constexpr u32 NumGfxRings = 1u; // actually 2, but HP is reserved by system software
|
static constexpr u32 NumGfxRings = 1u; // actually 2, but HP is reserved by system software
|
||||||
static constexpr u32 NumComputePipes = 7u; // actually 8, but #7 is reserved by system software
|
static constexpr u32 NumComputePipes = 7u; // actually 8, but #7 is reserved by system software
|
||||||
static constexpr u32 NumQueuesPerPipe = 8u;
|
static constexpr u32 NumQueuesPerPipe = 8u;
|
||||||
static constexpr u32 NumTotalQueues = NumGfxRings + (NumComputePipes * NumQueuesPerPipe);
|
static constexpr u32 NumComputeRings = NumComputePipes * NumQueuesPerPipe;
|
||||||
|
static constexpr u32 NumTotalQueues = NumGfxRings + NumComputeRings;
|
||||||
static_assert(NumTotalQueues < 64u); // need to fit into u64 bitmap for ffs
|
static_assert(NumTotalQueues < 64u); // need to fit into u64 bitmap for ffs
|
||||||
|
|
||||||
static constexpr u32 NumColorBuffers = 8;
|
static constexpr u32 NumColorBuffers = 8;
|
||||||
@ -143,6 +145,13 @@ struct Liverpool {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct HsTessFactorClamp {
|
||||||
|
// I've only seen min=0.0, max=1.0 so far.
|
||||||
|
// TODO why is max set to 1.0? Makes no sense
|
||||||
|
float hs_max_tess;
|
||||||
|
float hs_min_tess;
|
||||||
|
};
|
||||||
|
|
||||||
struct ComputeProgram {
|
struct ComputeProgram {
|
||||||
u32 dispatch_initiator;
|
u32 dispatch_initiator;
|
||||||
u32 dim_x;
|
u32 dim_x;
|
||||||
@ -431,6 +440,10 @@ struct Liverpool {
|
|||||||
return u64(z_read_base) << 8;
|
return u64(z_read_base) << 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64 StencilAddress() const {
|
||||||
|
return u64(stencil_read_base) << 8;
|
||||||
|
}
|
||||||
|
|
||||||
u32 NumSamples() const {
|
u32 NumSamples() const {
|
||||||
return 1u << z_info.num_samples; // spec doesn't say it is a log2
|
return 1u << z_info.num_samples; // spec doesn't say it is a log2
|
||||||
}
|
}
|
||||||
@ -952,6 +965,7 @@ struct Liverpool {
|
|||||||
enum VgtStages : u32 {
|
enum VgtStages : u32 {
|
||||||
Vs = 0u, // always enabled
|
Vs = 0u, // always enabled
|
||||||
EsGs = 0xB0u,
|
EsGs = 0xB0u,
|
||||||
|
LsHs = 0x45u,
|
||||||
};
|
};
|
||||||
|
|
||||||
VgtStages raw;
|
VgtStages raw;
|
||||||
@ -959,7 +973,8 @@ struct Liverpool {
|
|||||||
BitField<2, 1, u32> hs_en;
|
BitField<2, 1, u32> hs_en;
|
||||||
BitField<3, 2, u32> es_en;
|
BitField<3, 2, u32> es_en;
|
||||||
BitField<5, 1, u32> gs_en;
|
BitField<5, 1, u32> gs_en;
|
||||||
BitField<6, 1, u32> vs_en;
|
BitField<6, 2, u32> vs_en;
|
||||||
|
BitField<8, 1, u32> dynamic_hs;
|
||||||
|
|
||||||
bool IsStageEnabled(u32 stage) const {
|
bool IsStageEnabled(u32 stage) const {
|
||||||
switch (stage) {
|
switch (stage) {
|
||||||
@ -1055,6 +1070,28 @@ struct Liverpool {
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
union LsHsConfig {
|
||||||
|
u32 raw;
|
||||||
|
BitField<0, 8, u32> num_patches;
|
||||||
|
BitField<8, 6, u32> hs_input_control_points;
|
||||||
|
BitField<14, 6, u32> hs_output_control_points;
|
||||||
|
};
|
||||||
|
|
||||||
|
union TessellationConfig {
|
||||||
|
u32 raw;
|
||||||
|
BitField<0, 2, TessellationType> type;
|
||||||
|
BitField<2, 3, TessellationPartitioning> partitioning;
|
||||||
|
BitField<5, 3, TessellationTopology> topology;
|
||||||
|
};
|
||||||
|
|
||||||
|
union TessFactorMemoryBase {
|
||||||
|
u32 base;
|
||||||
|
|
||||||
|
u64 MemoryBase() const {
|
||||||
|
return static_cast<u64>(base) << 8;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
union Eqaa {
|
union Eqaa {
|
||||||
u32 raw;
|
u32 raw;
|
||||||
BitField<0, 1, u32> max_anchor_samples;
|
BitField<0, 1, u32> max_anchor_samples;
|
||||||
@ -1105,10 +1142,10 @@ struct Liverpool {
|
|||||||
ShaderProgram es_program;
|
ShaderProgram es_program;
|
||||||
INSERT_PADDING_WORDS(0x2C);
|
INSERT_PADDING_WORDS(0x2C);
|
||||||
ShaderProgram hs_program;
|
ShaderProgram hs_program;
|
||||||
INSERT_PADDING_WORDS(0x2C);
|
INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20);
|
||||||
ShaderProgram ls_program;
|
ShaderProgram ls_program;
|
||||||
INSERT_PADDING_WORDS(0xA4);
|
INSERT_PADDING_WORDS(0xA4);
|
||||||
ComputeProgram cs_program;
|
ComputeProgram cs_program; // shadowed by `cs_state` in `mapped_queues`
|
||||||
INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5);
|
INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5);
|
||||||
DepthRenderControl depth_render_control;
|
DepthRenderControl depth_render_control;
|
||||||
INSERT_PADDING_WORDS(1);
|
INSERT_PADDING_WORDS(1);
|
||||||
@ -1172,7 +1209,9 @@ struct Liverpool {
|
|||||||
PolygonControl polygon_control;
|
PolygonControl polygon_control;
|
||||||
ViewportControl viewport_control;
|
ViewportControl viewport_control;
|
||||||
VsOutputControl vs_output_control;
|
VsOutputControl vs_output_control;
|
||||||
INSERT_PADDING_WORDS(0xA290 - 0xA207 - 1);
|
INSERT_PADDING_WORDS(0xA287 - 0xA207 - 1);
|
||||||
|
HsTessFactorClamp hs_clamp;
|
||||||
|
INSERT_PADDING_WORDS(0xA290 - 0xA287 - 2);
|
||||||
GsMode vgt_gs_mode;
|
GsMode vgt_gs_mode;
|
||||||
INSERT_PADDING_WORDS(1);
|
INSERT_PADDING_WORDS(1);
|
||||||
ModeControl mode_control;
|
ModeControl mode_control;
|
||||||
@ -1196,9 +1235,10 @@ struct Liverpool {
|
|||||||
BitField<0, 11, u32> vgt_gs_max_vert_out;
|
BitField<0, 11, u32> vgt_gs_max_vert_out;
|
||||||
INSERT_PADDING_WORDS(0xA2D5 - 0xA2CE - 1);
|
INSERT_PADDING_WORDS(0xA2D5 - 0xA2CE - 1);
|
||||||
ShaderStageEnable stage_enable;
|
ShaderStageEnable stage_enable;
|
||||||
INSERT_PADDING_WORDS(1);
|
LsHsConfig ls_hs_config;
|
||||||
u32 vgt_gs_vert_itemsize[4];
|
u32 vgt_gs_vert_itemsize[4];
|
||||||
INSERT_PADDING_WORDS(4);
|
TessellationConfig tess_config;
|
||||||
|
INSERT_PADDING_WORDS(3);
|
||||||
PolygonOffset poly_offset;
|
PolygonOffset poly_offset;
|
||||||
GsInstances vgt_gs_instance_cnt;
|
GsInstances vgt_gs_instance_cnt;
|
||||||
StreamOutConfig vgt_strmout_config;
|
StreamOutConfig vgt_strmout_config;
|
||||||
@ -1212,6 +1252,8 @@ struct Liverpool {
|
|||||||
INSERT_PADDING_WORDS(0xC24C - 0xC243);
|
INSERT_PADDING_WORDS(0xC24C - 0xC243);
|
||||||
u32 num_indices;
|
u32 num_indices;
|
||||||
VgtNumInstances num_instances;
|
VgtNumInstances num_instances;
|
||||||
|
INSERT_PADDING_WORDS(0xC250 - 0xC24D - 1);
|
||||||
|
TessFactorMemoryBase vgt_tf_memory_base;
|
||||||
};
|
};
|
||||||
std::array<u32, NumRegs> reg_array{};
|
std::array<u32, NumRegs> reg_array{};
|
||||||
|
|
||||||
@ -1258,7 +1300,7 @@ public:
|
|||||||
~Liverpool();
|
~Liverpool();
|
||||||
|
|
||||||
void SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb);
|
void SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb);
|
||||||
void SubmitAsc(u32 vqid, std::span<const u32> acb);
|
void SubmitAsc(u32 gnm_vqid, std::span<const u32> acb);
|
||||||
|
|
||||||
void SubmitDone() noexcept {
|
void SubmitDone() noexcept {
|
||||||
std::scoped_lock lk{submit_mutex};
|
std::scoped_lock lk{submit_mutex};
|
||||||
@ -1301,6 +1343,18 @@ public:
|
|||||||
gfx_queue.dcb_buffer.reserve(GfxReservedSize);
|
gfx_queue.dcb_buffer.reserve(GfxReservedSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline ComputeProgram& GetCsRegs() {
|
||||||
|
return mapped_queues[curr_qid].cs_state;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct AscQueueInfo {
|
||||||
|
VAddr map_addr;
|
||||||
|
u32* read_addr;
|
||||||
|
u32 ring_size_dw;
|
||||||
|
u32 pipe_id;
|
||||||
|
};
|
||||||
|
Common::SlotVector<AscQueueInfo> asc_queues{};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct Task {
|
struct Task {
|
||||||
struct promise_type {
|
struct promise_type {
|
||||||
@ -1338,7 +1392,8 @@ private:
|
|||||||
std::span<const u32> ccb);
|
std::span<const u32> ccb);
|
||||||
Task ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb);
|
Task ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb);
|
||||||
Task ProcessCeUpdate(std::span<const u32> ccb);
|
Task ProcessCeUpdate(std::span<const u32> ccb);
|
||||||
Task ProcessCompute(std::span<const u32> acb, int vqid);
|
template <bool is_indirect = false>
|
||||||
|
Task ProcessCompute(std::span<const u32> acb, u32 vqid);
|
||||||
|
|
||||||
void Process(std::stop_token stoken);
|
void Process(std::stop_token stoken);
|
||||||
|
|
||||||
@ -1353,6 +1408,7 @@ private:
|
|||||||
VAddr indirect_args_addr{};
|
VAddr indirect_args_addr{};
|
||||||
};
|
};
|
||||||
std::array<GpuQueue, NumTotalQueues> mapped_queues{};
|
std::array<GpuQueue, NumTotalQueues> mapped_queues{};
|
||||||
|
u32 num_mapped_queues{1u}; // GFX is always available
|
||||||
|
|
||||||
struct ConstantEngine {
|
struct ConstantEngine {
|
||||||
void Reset() {
|
void Reset() {
|
||||||
@ -1381,6 +1437,7 @@ private:
|
|||||||
std::mutex submit_mutex;
|
std::mutex submit_mutex;
|
||||||
std::condition_variable_any submit_cv;
|
std::condition_variable_any submit_cv;
|
||||||
std::queue<Common::UniqueFunction<void>> command_queue{};
|
std::queue<Common::UniqueFunction<void>> command_queue{};
|
||||||
|
int curr_qid{-1};
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
|
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
|
||||||
@ -1427,6 +1484,7 @@ static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202);
|
|||||||
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
|
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
|
||||||
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
|
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
|
||||||
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
|
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
|
||||||
|
static_assert(GFX6_3D_REG_INDEX(hs_clamp) == 0xA287);
|
||||||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290);
|
static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290);
|
||||||
static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292);
|
static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292);
|
||||||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_out_prim_type) == 0xA29B);
|
static_assert(GFX6_3D_REG_INDEX(vgt_gs_out_prim_type) == 0xA29B);
|
||||||
@ -1441,6 +1499,7 @@ static_assert(GFX6_3D_REG_INDEX(vgt_gsvs_ring_itemsize) == 0xA2AC);
|
|||||||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_max_vert_out) == 0xA2CE);
|
static_assert(GFX6_3D_REG_INDEX(vgt_gs_max_vert_out) == 0xA2CE);
|
||||||
static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5);
|
static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5);
|
||||||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_vert_itemsize[0]) == 0xA2D7);
|
static_assert(GFX6_3D_REG_INDEX(vgt_gs_vert_itemsize[0]) == 0xA2D7);
|
||||||
|
static_assert(GFX6_3D_REG_INDEX(tess_config) == 0xA2DB);
|
||||||
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
|
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
|
||||||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_instance_cnt) == 0xA2E4);
|
static_assert(GFX6_3D_REG_INDEX(vgt_gs_instance_cnt) == 0xA2E4);
|
||||||
static_assert(GFX6_3D_REG_INDEX(vgt_strmout_config) == 0xA2E5);
|
static_assert(GFX6_3D_REG_INDEX(vgt_strmout_config) == 0xA2E5);
|
||||||
@ -1452,6 +1511,7 @@ static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A);
|
|||||||
static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381);
|
static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381);
|
||||||
static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242);
|
static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242);
|
||||||
static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D);
|
static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D);
|
||||||
|
static_assert(GFX6_3D_REG_INDEX(vgt_tf_memory_base) == 0xc250);
|
||||||
|
|
||||||
#undef GFX6_3D_REG_INDEX
|
#undef GFX6_3D_REG_INDEX
|
||||||
|
|
||||||
|
@ -52,6 +52,10 @@ struct Buffer {
|
|||||||
return std::memcmp(this, &other, sizeof(Buffer)) == 0;
|
return std::memcmp(this, &other, sizeof(Buffer)) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 DstSelect() const {
|
||||||
|
return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9);
|
||||||
|
}
|
||||||
|
|
||||||
CompSwizzle GetSwizzle(u32 comp) const noexcept {
|
CompSwizzle GetSwizzle(u32 comp) const noexcept {
|
||||||
const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w};
|
const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w};
|
||||||
return static_cast<CompSwizzle>(select[comp]);
|
return static_cast<CompSwizzle>(select[comp]);
|
||||||
@ -204,6 +208,11 @@ struct Image {
|
|||||||
return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9);
|
return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CompSwizzle GetSwizzle(u32 comp) const noexcept {
|
||||||
|
const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w};
|
||||||
|
return static_cast<CompSwizzle>(select[comp]);
|
||||||
|
}
|
||||||
|
|
||||||
static char SelectComp(u32 sel) {
|
static char SelectComp(u32 sel) {
|
||||||
switch (sel) {
|
switch (sel) {
|
||||||
case 0:
|
case 0:
|
||||||
|
@ -3,6 +3,8 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <string_view>
|
||||||
|
#include <fmt/format.h>
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
|
||||||
namespace AmdGpu {
|
namespace AmdGpu {
|
||||||
@ -21,6 +23,69 @@ enum class FpDenormMode : u32 {
|
|||||||
InOutAllow = 3,
|
InOutAllow = 3,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class TessellationType : u32 {
|
||||||
|
Isoline = 0,
|
||||||
|
Triangle = 1,
|
||||||
|
Quad = 2,
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr std::string_view NameOf(TessellationType type) {
|
||||||
|
switch (type) {
|
||||||
|
case TessellationType::Isoline:
|
||||||
|
return "Isoline";
|
||||||
|
case TessellationType::Triangle:
|
||||||
|
return "Triangle";
|
||||||
|
case TessellationType::Quad:
|
||||||
|
return "Quad";
|
||||||
|
default:
|
||||||
|
return "Unknown";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enum class TessellationPartitioning : u32 {
|
||||||
|
Integer = 0,
|
||||||
|
Pow2 = 1,
|
||||||
|
FracOdd = 2,
|
||||||
|
FracEven = 3,
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr std::string_view NameOf(TessellationPartitioning partitioning) {
|
||||||
|
switch (partitioning) {
|
||||||
|
case TessellationPartitioning::Integer:
|
||||||
|
return "Integer";
|
||||||
|
case TessellationPartitioning::Pow2:
|
||||||
|
return "Pow2";
|
||||||
|
case TessellationPartitioning::FracOdd:
|
||||||
|
return "FracOdd";
|
||||||
|
case TessellationPartitioning::FracEven:
|
||||||
|
return "FracEven";
|
||||||
|
default:
|
||||||
|
return "Unknown";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enum class TessellationTopology : u32 {
|
||||||
|
Point = 0,
|
||||||
|
Line = 1,
|
||||||
|
TriangleCw = 2,
|
||||||
|
TriangleCcw = 3,
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr std::string_view NameOf(TessellationTopology topology) {
|
||||||
|
switch (topology) {
|
||||||
|
case TessellationTopology::Point:
|
||||||
|
return "Point";
|
||||||
|
case TessellationTopology::Line:
|
||||||
|
return "Line";
|
||||||
|
case TessellationTopology::TriangleCw:
|
||||||
|
return "TriangleCw";
|
||||||
|
case TessellationTopology::TriangleCcw:
|
||||||
|
return "TriangleCcw";
|
||||||
|
default:
|
||||||
|
return "Unknown";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// See `VGT_PRIMITIVE_TYPE` description in [Radeon Sea Islands 3D/Compute Register Reference Guide]
|
// See `VGT_PRIMITIVE_TYPE` description in [Radeon Sea Islands 3D/Compute Register Reference Guide]
|
||||||
enum class PrimitiveType : u32 {
|
enum class PrimitiveType : u32 {
|
||||||
None = 0,
|
None = 0,
|
||||||
@ -118,3 +183,33 @@ enum class NumberFormat : u32 {
|
|||||||
};
|
};
|
||||||
|
|
||||||
} // namespace AmdGpu
|
} // namespace AmdGpu
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct fmt::formatter<AmdGpu::TessellationType> {
|
||||||
|
constexpr auto parse(format_parse_context& ctx) {
|
||||||
|
return ctx.begin();
|
||||||
|
}
|
||||||
|
auto format(AmdGpu::TessellationType type, format_context& ctx) const {
|
||||||
|
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct fmt::formatter<AmdGpu::TessellationPartitioning> {
|
||||||
|
constexpr auto parse(format_parse_context& ctx) {
|
||||||
|
return ctx.begin();
|
||||||
|
}
|
||||||
|
auto format(AmdGpu::TessellationPartitioning type, format_context& ctx) const {
|
||||||
|
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct fmt::formatter<AmdGpu::TessellationTopology> {
|
||||||
|
constexpr auto parse(format_parse_context& ctx) {
|
||||||
|
return ctx.begin();
|
||||||
|
}
|
||||||
|
auto format(AmdGpu::TessellationTopology type, format_context& ctx) const {
|
||||||
|
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
@ -65,6 +65,33 @@ vk::CompareOp CompareOp(Liverpool::CompareFunc func) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool IsPrimitiveCulled(AmdGpu::PrimitiveType type) {
|
||||||
|
switch (type) {
|
||||||
|
case AmdGpu::PrimitiveType::TriangleList:
|
||||||
|
case AmdGpu::PrimitiveType::TriangleFan:
|
||||||
|
case AmdGpu::PrimitiveType::TriangleStrip:
|
||||||
|
case AmdGpu::PrimitiveType::PatchPrimitive:
|
||||||
|
case AmdGpu::PrimitiveType::AdjTriangleList:
|
||||||
|
case AmdGpu::PrimitiveType::AdjTriangleStrip:
|
||||||
|
case AmdGpu::PrimitiveType::QuadList:
|
||||||
|
case AmdGpu::PrimitiveType::QuadStrip:
|
||||||
|
case AmdGpu::PrimitiveType::Polygon:
|
||||||
|
return true;
|
||||||
|
case AmdGpu::PrimitiveType::None:
|
||||||
|
case AmdGpu::PrimitiveType::PointList:
|
||||||
|
case AmdGpu::PrimitiveType::LineList:
|
||||||
|
case AmdGpu::PrimitiveType::LineStrip:
|
||||||
|
case AmdGpu::PrimitiveType::AdjLineList:
|
||||||
|
case AmdGpu::PrimitiveType::AdjLineStrip:
|
||||||
|
case AmdGpu::PrimitiveType::RectList: // Screen-aligned rectangles that are not culled
|
||||||
|
case AmdGpu::PrimitiveType::LineLoop:
|
||||||
|
return false;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type) {
|
vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type) {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case AmdGpu::PrimitiveType::PointList:
|
case AmdGpu::PrimitiveType::PointList:
|
||||||
@ -672,15 +699,6 @@ vk::Format AdjustColorBufferFormat(vk::Format base_format,
|
|||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else if (comp_swap_reverse) {
|
|
||||||
switch (base_format) {
|
|
||||||
case vk::Format::eR8G8B8A8Unorm:
|
|
||||||
return vk::Format::eA8B8G8R8UnormPack32;
|
|
||||||
case vk::Format::eR8G8B8A8Srgb:
|
|
||||||
return vk::Format::eA8B8G8R8SrgbPack32;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return base_format;
|
return base_format;
|
||||||
}
|
}
|
||||||
|
@ -18,6 +18,8 @@ vk::StencilOp StencilOp(Liverpool::StencilFunc op);
|
|||||||
|
|
||||||
vk::CompareOp CompareOp(Liverpool::CompareFunc func);
|
vk::CompareOp CompareOp(Liverpool::CompareFunc func);
|
||||||
|
|
||||||
|
bool IsPrimitiveCulled(AmdGpu::PrimitiveType type);
|
||||||
|
|
||||||
vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type);
|
vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type);
|
||||||
|
|
||||||
vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode);
|
vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode);
|
||||||
|
@ -3,10 +3,6 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#if defined(__APPLE__) && !USE_SYSTEM_VULKAN_LOADER
|
|
||||||
#define VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Include vulkan-hpp header
|
// Include vulkan-hpp header
|
||||||
#define VK_ENABLE_BETA_EXTENSIONS
|
#define VK_ENABLE_BETA_EXTENSIONS
|
||||||
#define VK_NO_PROTOTYPES
|
#define VK_NO_PROTOTYPES
|
||||||
|
@ -16,7 +16,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
|
|||||||
ComputePipelineKey compute_key_, const Shader::Info& info_,
|
ComputePipelineKey compute_key_, const Shader::Info& info_,
|
||||||
vk::ShaderModule module)
|
vk::ShaderModule module)
|
||||||
: Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache, true}, compute_key{compute_key_} {
|
: Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache, true}, compute_key{compute_key_} {
|
||||||
auto& info = stages[int(Shader::Stage::Compute)];
|
auto& info = stages[int(Shader::LogicalStage::Compute)];
|
||||||
info = &info_;
|
info = &info_;
|
||||||
|
|
||||||
const vk::PipelineShaderStageCreateInfo shader_ci = {
|
const vk::PipelineShaderStageCreateInfo shader_ci = {
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/scope_exit.h"
|
#include "common/scope_exit.h"
|
||||||
|
#include "shader_recompiler/runtime_info.h"
|
||||||
#include "video_core/amdgpu/resource.h"
|
#include "video_core/amdgpu/resource.h"
|
||||||
#include "video_core/buffer_cache/buffer_cache.h"
|
#include "video_core/buffer_cache/buffer_cache.h"
|
||||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||||
@ -52,7 +53,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||||||
boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings;
|
boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings;
|
||||||
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes;
|
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes;
|
||||||
if (fetch_shader && !instance.IsVertexInputDynamicState()) {
|
if (fetch_shader && !instance.IsVertexInputDynamicState()) {
|
||||||
const auto& vs_info = GetStage(Shader::Stage::Vertex);
|
const auto& vs_info = GetStage(Shader::LogicalStage::Vertex);
|
||||||
for (const auto& attrib : fetch_shader->attributes) {
|
for (const auto& attrib : fetch_shader->attributes) {
|
||||||
if (attrib.UsesStepRates()) {
|
if (attrib.UsesStepRates()) {
|
||||||
// Skip attribute binding as the data will be pulled by shader
|
// Skip attribute binding as the data will be pulled by shader
|
||||||
@ -106,11 +107,17 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||||||
key.primitive_restart_index == 0xFFFFFFFF,
|
key.primitive_restart_index == 0xFFFFFFFF,
|
||||||
"Primitive restart index other than -1 is not supported yet");
|
"Primitive restart index other than -1 is not supported yet");
|
||||||
|
|
||||||
|
const vk::PipelineTessellationStateCreateInfo tessellation_state = {
|
||||||
|
.patchControlPoints = key.patch_control_points,
|
||||||
|
};
|
||||||
|
|
||||||
const vk::PipelineRasterizationStateCreateInfo raster_state = {
|
const vk::PipelineRasterizationStateCreateInfo raster_state = {
|
||||||
.depthClampEnable = false,
|
.depthClampEnable = false,
|
||||||
.rasterizerDiscardEnable = false,
|
.rasterizerDiscardEnable = false,
|
||||||
.polygonMode = LiverpoolToVK::PolygonMode(key.polygon_mode),
|
.polygonMode = LiverpoolToVK::PolygonMode(key.polygon_mode),
|
||||||
.cullMode = LiverpoolToVK::CullMode(key.cull_mode),
|
.cullMode = LiverpoolToVK::IsPrimitiveCulled(key.prim_type)
|
||||||
|
? LiverpoolToVK::CullMode(key.cull_mode)
|
||||||
|
: vk::CullModeFlagBits::eNone,
|
||||||
.frontFace = key.front_face == Liverpool::FrontFace::Clockwise
|
.frontFace = key.front_face == Liverpool::FrontFace::Clockwise
|
||||||
? vk::FrontFace::eClockwise
|
? vk::FrontFace::eClockwise
|
||||||
: vk::FrontFace::eCounterClockwise,
|
: vk::FrontFace::eCounterClockwise,
|
||||||
@ -202,7 +209,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||||||
|
|
||||||
boost::container::static_vector<vk::PipelineShaderStageCreateInfo, MaxShaderStages>
|
boost::container::static_vector<vk::PipelineShaderStageCreateInfo, MaxShaderStages>
|
||||||
shader_stages;
|
shader_stages;
|
||||||
auto stage = u32(Shader::Stage::Vertex);
|
auto stage = u32(Shader::LogicalStage::Vertex);
|
||||||
if (infos[stage]) {
|
if (infos[stage]) {
|
||||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||||
.stage = vk::ShaderStageFlagBits::eVertex,
|
.stage = vk::ShaderStageFlagBits::eVertex,
|
||||||
@ -210,7 +217,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||||||
.pName = "main",
|
.pName = "main",
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
stage = u32(Shader::Stage::Geometry);
|
stage = u32(Shader::LogicalStage::Geometry);
|
||||||
if (infos[stage]) {
|
if (infos[stage]) {
|
||||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||||
.stage = vk::ShaderStageFlagBits::eGeometry,
|
.stage = vk::ShaderStageFlagBits::eGeometry,
|
||||||
@ -218,7 +225,23 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||||||
.pName = "main",
|
.pName = "main",
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
stage = u32(Shader::Stage::Fragment);
|
stage = u32(Shader::LogicalStage::TessellationControl);
|
||||||
|
if (infos[stage]) {
|
||||||
|
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||||
|
.stage = vk::ShaderStageFlagBits::eTessellationControl,
|
||||||
|
.module = modules[stage],
|
||||||
|
.pName = "main",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
stage = u32(Shader::LogicalStage::TessellationEval);
|
||||||
|
if (infos[stage]) {
|
||||||
|
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||||
|
.stage = vk::ShaderStageFlagBits::eTessellationEvaluation,
|
||||||
|
.module = modules[stage],
|
||||||
|
.pName = "main",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
stage = u32(Shader::LogicalStage::Fragment);
|
||||||
if (infos[stage]) {
|
if (infos[stage]) {
|
||||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||||
.stage = vk::ShaderStageFlagBits::eFragment,
|
.stage = vk::ShaderStageFlagBits::eFragment,
|
||||||
@ -227,17 +250,15 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto it = std::ranges::find(key.color_formats, vk::Format::eUndefined);
|
|
||||||
const u32 num_color_formats = std::distance(key.color_formats.begin(), it);
|
|
||||||
const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = {
|
const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = {
|
||||||
.colorAttachmentCount = num_color_formats,
|
.colorAttachmentCount = key.num_color_attachments,
|
||||||
.pColorAttachmentFormats = key.color_formats.data(),
|
.pColorAttachmentFormats = key.color_formats.data(),
|
||||||
.depthAttachmentFormat = key.depth_format,
|
.depthAttachmentFormat = key.depth_format,
|
||||||
.stencilAttachmentFormat = key.stencil_format,
|
.stencilAttachmentFormat = key.stencil_format,
|
||||||
};
|
};
|
||||||
|
|
||||||
std::array<vk::PipelineColorBlendAttachmentState, Liverpool::NumColorBuffers> attachments;
|
std::array<vk::PipelineColorBlendAttachmentState, Liverpool::NumColorBuffers> attachments;
|
||||||
for (u32 i = 0; i < num_color_formats; i++) {
|
for (u32 i = 0; i < key.num_color_attachments; i++) {
|
||||||
const auto& control = key.blend_controls[i];
|
const auto& control = key.blend_controls[i];
|
||||||
const auto src_color = LiverpoolToVK::BlendFactor(control.color_src_factor);
|
const auto src_color = LiverpoolToVK::BlendFactor(control.color_src_factor);
|
||||||
const auto dst_color = LiverpoolToVK::BlendFactor(control.color_dst_factor);
|
const auto dst_color = LiverpoolToVK::BlendFactor(control.color_dst_factor);
|
||||||
@ -290,7 +311,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||||||
const vk::PipelineColorBlendStateCreateInfo color_blending = {
|
const vk::PipelineColorBlendStateCreateInfo color_blending = {
|
||||||
.logicOpEnable = false,
|
.logicOpEnable = false,
|
||||||
.logicOp = vk::LogicOp::eCopy,
|
.logicOp = vk::LogicOp::eCopy,
|
||||||
.attachmentCount = num_color_formats,
|
.attachmentCount = key.num_color_attachments,
|
||||||
.pAttachments = attachments.data(),
|
.pAttachments = attachments.data(),
|
||||||
.blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f},
|
.blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f},
|
||||||
};
|
};
|
||||||
@ -301,6 +322,8 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||||||
.pStages = shader_stages.data(),
|
.pStages = shader_stages.data(),
|
||||||
.pVertexInputState = !instance.IsVertexInputDynamicState() ? &vertex_input_info : nullptr,
|
.pVertexInputState = !instance.IsVertexInputDynamicState() ? &vertex_input_info : nullptr,
|
||||||
.pInputAssemblyState = &input_assembly,
|
.pInputAssemblyState = &input_assembly,
|
||||||
|
.pTessellationState =
|
||||||
|
stages[u32(Shader::LogicalStage::TessellationControl)] ? &tessellation_state : nullptr,
|
||||||
.pViewportState = &viewport_info,
|
.pViewportState = &viewport_info,
|
||||||
.pRasterizationState = &raster_state,
|
.pRasterizationState = &raster_state,
|
||||||
.pMultisampleState = &multisampling,
|
.pMultisampleState = &multisampling,
|
||||||
@ -327,7 +350,6 @@ void GraphicsPipeline::BuildDescSetLayout() {
|
|||||||
if (!stage) {
|
if (!stage) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (stage->has_readconst) {
|
if (stage->has_readconst) {
|
||||||
bindings.push_back({
|
bindings.push_back({
|
||||||
.binding = binding++,
|
.binding = binding++,
|
||||||
|
@ -29,6 +29,7 @@ using Liverpool = AmdGpu::Liverpool;
|
|||||||
|
|
||||||
struct GraphicsPipelineKey {
|
struct GraphicsPipelineKey {
|
||||||
std::array<size_t, MaxShaderStages> stage_hashes;
|
std::array<size_t, MaxShaderStages> stage_hashes;
|
||||||
|
u32 num_color_attachments;
|
||||||
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
|
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
|
||||||
std::array<AmdGpu::NumberFormat, Liverpool::NumColorBuffers> color_num_formats;
|
std::array<AmdGpu::NumberFormat, Liverpool::NumColorBuffers> color_num_formats;
|
||||||
std::array<Liverpool::ColorBuffer::SwapMode, Liverpool::NumColorBuffers> mrt_swizzles;
|
std::array<Liverpool::ColorBuffer::SwapMode, Liverpool::NumColorBuffers> mrt_swizzles;
|
||||||
@ -51,6 +52,7 @@ struct GraphicsPipelineKey {
|
|||||||
std::array<Liverpool::BlendControl, Liverpool::NumColorBuffers> blend_controls;
|
std::array<Liverpool::BlendControl, Liverpool::NumColorBuffers> blend_controls;
|
||||||
std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks;
|
std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks;
|
||||||
std::array<vk::Format, MaxVertexBufferCount> vertex_buffer_formats;
|
std::array<vk::Format, MaxVertexBufferCount> vertex_buffer_formats;
|
||||||
|
u32 patch_control_points;
|
||||||
|
|
||||||
bool operator==(const GraphicsPipelineKey& key) const noexcept {
|
bool operator==(const GraphicsPipelineKey& key) const noexcept {
|
||||||
return std::memcmp(this, &key, sizeof(key)) == 0;
|
return std::memcmp(this, &key, sizeof(key)) == 0;
|
||||||
@ -72,7 +74,7 @@ public:
|
|||||||
|
|
||||||
bool IsEmbeddedVs() const noexcept {
|
bool IsEmbeddedVs() const noexcept {
|
||||||
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
|
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
|
||||||
return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash;
|
return key.stage_hashes[u32(Shader::LogicalStage::Vertex)] == EmbeddedVsHash;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto GetWriteMasks() const {
|
auto GetWriteMasks() const {
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/config.h"
|
#include "common/config.h"
|
||||||
|
#include "common/debug.h"
|
||||||
#include "sdl_window.h"
|
#include "sdl_window.h"
|
||||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
@ -68,11 +69,10 @@ std::unordered_map<vk::Format, vk::FormatProperties3> GetFormatProperties(
|
|||||||
}
|
}
|
||||||
// Other miscellaneous formats, e.g. for color buffers, swizzles, or compatibility
|
// Other miscellaneous formats, e.g. for color buffers, swizzles, or compatibility
|
||||||
static constexpr std::array misc_formats = {
|
static constexpr std::array misc_formats = {
|
||||||
vk::Format::eA2R10G10B10UnormPack32, vk::Format::eA8B8G8R8UnormPack32,
|
vk::Format::eA2R10G10B10UnormPack32,
|
||||||
vk::Format::eA8B8G8R8SrgbPack32, vk::Format::eB8G8R8A8Unorm,
|
vk::Format::eB8G8R8A8Unorm,
|
||||||
vk::Format::eB8G8R8A8Snorm, vk::Format::eB8G8R8A8Uint,
|
vk::Format::eB8G8R8A8Srgb,
|
||||||
vk::Format::eB8G8R8A8Sint, vk::Format::eB8G8R8A8Srgb,
|
vk::Format::eD24UnormS8Uint,
|
||||||
vk::Format::eR5G6B5UnormPack16, vk::Format::eD24UnormS8Uint,
|
|
||||||
};
|
};
|
||||||
for (const auto& format : misc_formats) {
|
for (const auto& format : misc_formats) {
|
||||||
if (!format_properties.contains(format)) {
|
if (!format_properties.contains(format)) {
|
||||||
@ -262,11 +262,13 @@ bool Instance::CreateDevice() {
|
|||||||
// The next two extensions are required to be available together in order to support write masks
|
// The next two extensions are required to be available together in order to support write masks
|
||||||
color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
|
color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
|
||||||
color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
||||||
const bool calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME);
|
const bool calibrated_timestamps =
|
||||||
|
TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false;
|
||||||
const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
|
const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
|
||||||
list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME);
|
list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME);
|
||||||
maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
|
maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
|
||||||
legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME);
|
legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME);
|
||||||
|
image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME);
|
||||||
|
|
||||||
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
|
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
|
||||||
// with extensions.
|
// with extensions.
|
||||||
@ -327,6 +329,7 @@ bool Instance::CreateDevice() {
|
|||||||
.imageCubeArray = features.imageCubeArray,
|
.imageCubeArray = features.imageCubeArray,
|
||||||
.independentBlend = features.independentBlend,
|
.independentBlend = features.independentBlend,
|
||||||
.geometryShader = features.geometryShader,
|
.geometryShader = features.geometryShader,
|
||||||
|
.tessellationShader = features.tessellationShader,
|
||||||
.logicOp = features.logicOp,
|
.logicOp = features.logicOp,
|
||||||
.depthBiasClamp = features.depthBiasClamp,
|
.depthBiasClamp = features.depthBiasClamp,
|
||||||
.fillModeNonSolid = features.fillModeNonSolid,
|
.fillModeNonSolid = features.fillModeNonSolid,
|
||||||
@ -580,42 +583,22 @@ bool Instance::IsFormatSupported(const vk::Format format,
|
|||||||
return (GetFormatFeatureFlags(format) & flags) == flags;
|
return (GetFormatFeatureFlags(format) & flags) == flags;
|
||||||
}
|
}
|
||||||
|
|
||||||
static vk::Format GetAlternativeFormat(const vk::Format format) {
|
|
||||||
switch (format) {
|
|
||||||
case vk::Format::eB5G6R5UnormPack16:
|
|
||||||
return vk::Format::eR5G6B5UnormPack16;
|
|
||||||
case vk::Format::eD16UnormS8Uint:
|
|
||||||
return vk::Format::eD24UnormS8Uint;
|
|
||||||
default:
|
|
||||||
return format;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
vk::Format Instance::GetSupportedFormat(const vk::Format format,
|
vk::Format Instance::GetSupportedFormat(const vk::Format format,
|
||||||
const vk::FormatFeatureFlags2 flags) const {
|
const vk::FormatFeatureFlags2 flags) const {
|
||||||
if (IsFormatSupported(format, flags)) [[likely]] {
|
if (!IsFormatSupported(format, flags)) [[unlikely]] {
|
||||||
return format;
|
switch (format) {
|
||||||
|
case vk::Format::eD16UnormS8Uint:
|
||||||
|
if (IsFormatSupported(vk::Format::eD24UnormS8Uint, flags)) {
|
||||||
|
return vk::Format::eD24UnormS8Uint;
|
||||||
|
}
|
||||||
|
if (IsFormatSupported(vk::Format::eD32SfloatS8Uint, flags)) {
|
||||||
|
return vk::Format::eD32SfloatS8Uint;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
const vk::Format alternative = GetAlternativeFormat(format);
|
|
||||||
if (IsFormatSupported(alternative, flags)) [[likely]] {
|
|
||||||
return alternative;
|
|
||||||
}
|
}
|
||||||
return format;
|
return format;
|
||||||
}
|
}
|
||||||
|
|
||||||
vk::ComponentMapping Instance::GetSupportedComponentSwizzle(
|
|
||||||
const vk::Format format, const vk::ComponentMapping swizzle,
|
|
||||||
const vk::FormatFeatureFlags2 flags) const {
|
|
||||||
if (IsFormatSupported(format, flags)) [[likely]] {
|
|
||||||
return swizzle;
|
|
||||||
}
|
|
||||||
|
|
||||||
vk::ComponentMapping supported_swizzle = swizzle;
|
|
||||||
if (format == vk::Format::eB5G6R5UnormPack16) {
|
|
||||||
// B5G6R5 -> R5G6B5
|
|
||||||
std::swap(supported_swizzle.r, supported_swizzle.b);
|
|
||||||
}
|
|
||||||
return supported_swizzle;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
@ -33,10 +33,6 @@ public:
|
|||||||
[[nodiscard]] vk::Format GetSupportedFormat(vk::Format format,
|
[[nodiscard]] vk::Format GetSupportedFormat(vk::Format format,
|
||||||
vk::FormatFeatureFlags2 flags) const;
|
vk::FormatFeatureFlags2 flags) const;
|
||||||
|
|
||||||
/// Re-orders a component swizzle for format compatibility, if needed.
|
|
||||||
[[nodiscard]] vk::ComponentMapping GetSupportedComponentSwizzle(
|
|
||||||
vk::Format format, vk::ComponentMapping swizzle, vk::FormatFeatureFlags2 flags) const;
|
|
||||||
|
|
||||||
/// Returns the Vulkan instance
|
/// Returns the Vulkan instance
|
||||||
vk::Instance GetInstance() const {
|
vk::Instance GetInstance() const {
|
||||||
return *instance;
|
return *instance;
|
||||||
@ -158,6 +154,11 @@ public:
|
|||||||
return legacy_vertex_attributes;
|
return legacy_vertex_attributes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true when VK_AMD_shader_image_load_store_lod is supported.
|
||||||
|
bool IsImageLoadStoreLodSupported() const {
|
||||||
|
return image_load_store_lod;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true when geometry shaders are supported by the device
|
/// Returns true when geometry shaders are supported by the device
|
||||||
bool IsGeometryStageSupported() const {
|
bool IsGeometryStageSupported() const {
|
||||||
return features.geometryShader;
|
return features.geometryShader;
|
||||||
@ -327,6 +328,7 @@ private:
|
|||||||
bool maintenance5{};
|
bool maintenance5{};
|
||||||
bool list_restart{};
|
bool list_restart{};
|
||||||
bool legacy_vertex_attributes{};
|
bool legacy_vertex_attributes{};
|
||||||
|
bool image_load_store_lod{};
|
||||||
u64 min_imported_host_pointer_alignment{};
|
u64 min_imported_host_pointer_alignment{};
|
||||||
u32 subgroup_size{};
|
u32 subgroup_size{};
|
||||||
bool tooling_info{};
|
bool tooling_info{};
|
||||||
|
@ -22,6 +22,8 @@ extern std::unique_ptr<Vulkan::Presenter> presenter;
|
|||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
|
using Shader::LogicalStage;
|
||||||
|
using Shader::Stage;
|
||||||
using Shader::VsOutput;
|
using Shader::VsOutput;
|
||||||
|
|
||||||
constexpr static std::array DescriptorHeapSizes = {
|
constexpr static std::array DescriptorHeapSizes = {
|
||||||
@ -78,7 +80,7 @@ void GatherVertexOutputs(Shader::VertexRuntimeInfo& info,
|
|||||||
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
|
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
|
||||||
}
|
}
|
||||||
|
|
||||||
Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_stage) {
|
||||||
auto info = Shader::RuntimeInfo{stage};
|
auto info = Shader::RuntimeInfo{stage};
|
||||||
const auto& regs = liverpool->regs;
|
const auto& regs = liverpool->regs;
|
||||||
const auto BuildCommon = [&](const auto& program) {
|
const auto BuildCommon = [&](const auto& program) {
|
||||||
@ -89,20 +91,47 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
|||||||
info.fp_round_mode32 = program.settings.fp_round_mode32;
|
info.fp_round_mode32 = program.settings.fp_round_mode32;
|
||||||
};
|
};
|
||||||
switch (stage) {
|
switch (stage) {
|
||||||
case Shader::Stage::Export: {
|
case Stage::Local: {
|
||||||
|
BuildCommon(regs.ls_program);
|
||||||
|
if (regs.stage_enable.IsStageEnabled(static_cast<u32>(Stage::Hull))) {
|
||||||
|
info.ls_info.links_with_tcs = true;
|
||||||
|
Shader::TessellationDataConstantBuffer tess_constants;
|
||||||
|
const auto* pgm = regs.ProgramForStage(static_cast<u32>(Stage::Hull));
|
||||||
|
const auto params = Liverpool::GetParams(*pgm);
|
||||||
|
const auto& hull_info = program_cache.at(params.hash)->info;
|
||||||
|
hull_info.ReadTessConstantBuffer(tess_constants);
|
||||||
|
info.ls_info.ls_stride = tess_constants.ls_stride;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case Stage::Hull: {
|
||||||
|
BuildCommon(regs.hs_program);
|
||||||
|
info.hs_info.num_input_control_points = regs.ls_hs_config.hs_input_control_points.Value();
|
||||||
|
info.hs_info.num_threads = regs.ls_hs_config.hs_output_control_points.Value();
|
||||||
|
info.hs_info.tess_type = regs.tess_config.type;
|
||||||
|
|
||||||
|
// We need to initialize most hs_info fields after finding the V# with tess constants
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case Stage::Export: {
|
||||||
BuildCommon(regs.es_program);
|
BuildCommon(regs.es_program);
|
||||||
info.es_info.vertex_data_size = regs.vgt_esgs_ring_itemsize;
|
info.es_info.vertex_data_size = regs.vgt_esgs_ring_itemsize;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Shader::Stage::Vertex: {
|
case Stage::Vertex: {
|
||||||
BuildCommon(regs.vs_program);
|
BuildCommon(regs.vs_program);
|
||||||
GatherVertexOutputs(info.vs_info, regs.vs_output_control);
|
GatherVertexOutputs(info.vs_info, regs.vs_output_control);
|
||||||
info.vs_info.emulate_depth_negative_one_to_one =
|
info.vs_info.emulate_depth_negative_one_to_one =
|
||||||
!instance.IsDepthClipControlSupported() &&
|
!instance.IsDepthClipControlSupported() &&
|
||||||
regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW;
|
regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW;
|
||||||
|
if (l_stage == LogicalStage::TessellationEval) {
|
||||||
|
info.vs_info.tess_type = regs.tess_config.type;
|
||||||
|
info.vs_info.tess_topology = regs.tess_config.topology;
|
||||||
|
info.vs_info.tess_partitioning = regs.tess_config.partitioning;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Shader::Stage::Geometry: {
|
case Stage::Geometry: {
|
||||||
BuildCommon(regs.gs_program);
|
BuildCommon(regs.gs_program);
|
||||||
auto& gs_info = info.gs_info;
|
auto& gs_info = info.gs_info;
|
||||||
gs_info.output_vertices = regs.vgt_gs_max_vert_out;
|
gs_info.output_vertices = regs.vgt_gs_max_vert_out;
|
||||||
@ -121,7 +150,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
|||||||
DumpShader(gs_info.vs_copy, gs_info.vs_copy_hash, Shader::Stage::Vertex, 0, "copy.bin");
|
DumpShader(gs_info.vs_copy, gs_info.vs_copy_hash, Shader::Stage::Vertex, 0, "copy.bin");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Shader::Stage::Fragment: {
|
case Stage::Fragment: {
|
||||||
BuildCommon(regs.ps_program);
|
BuildCommon(regs.ps_program);
|
||||||
info.fs_info.en_flags = regs.ps_input_ena;
|
info.fs_info.en_flags = regs.ps_input_ena;
|
||||||
info.fs_info.addr_flags = regs.ps_input_addr;
|
info.fs_info.addr_flags = regs.ps_input_addr;
|
||||||
@ -143,10 +172,10 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Shader::Stage::Compute: {
|
case Stage::Compute: {
|
||||||
const auto& cs_pgm = regs.cs_program;
|
const auto& cs_pgm = liverpool->GetCsRegs();
|
||||||
info.num_user_data = cs_pgm.settings.num_user_regs;
|
info.num_user_data = cs_pgm.settings.num_user_regs;
|
||||||
info.num_allocated_vgprs = regs.cs_program.settings.num_vgprs * 4;
|
info.num_allocated_vgprs = cs_pgm.settings.num_vgprs * 4;
|
||||||
info.cs_info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
|
info.cs_info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
|
||||||
cs_pgm.num_thread_z.full};
|
cs_pgm.num_thread_z.full};
|
||||||
info.cs_info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1),
|
info.cs_info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1),
|
||||||
@ -172,8 +201,10 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
|||||||
.support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32),
|
.support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32),
|
||||||
.support_explicit_workgroup_layout = true,
|
.support_explicit_workgroup_layout = true,
|
||||||
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
|
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
|
||||||
|
.supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(),
|
||||||
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
|
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
|
||||||
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
|
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
|
||||||
|
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
|
||||||
};
|
};
|
||||||
auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({});
|
auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({});
|
||||||
ASSERT_MSG(cache_result == vk::Result::eSuccess, "Failed to create pipeline cache: {}",
|
ASSERT_MSG(cache_result == vk::Result::eSuccess, "Failed to create pipeline cache: {}",
|
||||||
@ -268,6 +299,7 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||||||
// `RenderingInfo` is assumed to be initialized with a contiguous array of valid color
|
// `RenderingInfo` is assumed to be initialized with a contiguous array of valid color
|
||||||
// attachments. This might be not a case as HW color buffers can be bound in an arbitrary
|
// attachments. This might be not a case as HW color buffers can be bound in an arbitrary
|
||||||
// order. We need to do some arrays compaction at this stage
|
// order. We need to do some arrays compaction at this stage
|
||||||
|
key.num_color_attachments = 0;
|
||||||
key.color_formats.fill(vk::Format::eUndefined);
|
key.color_formats.fill(vk::Format::eUndefined);
|
||||||
key.color_num_formats.fill(AmdGpu::NumberFormat::Unorm);
|
key.color_num_formats.fill(AmdGpu::NumberFormat::Unorm);
|
||||||
key.blend_controls.fill({});
|
key.blend_controls.fill({});
|
||||||
@ -275,13 +307,26 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||||||
key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard);
|
key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard);
|
||||||
key.vertex_buffer_formats.fill(vk::Format::eUndefined);
|
key.vertex_buffer_formats.fill(vk::Format::eUndefined);
|
||||||
|
|
||||||
|
key.patch_control_points = 0;
|
||||||
|
if (regs.stage_enable.hs_en.Value()) {
|
||||||
|
key.patch_control_points = regs.ls_hs_config.hs_input_control_points.Value();
|
||||||
|
}
|
||||||
|
|
||||||
// First pass of bindings check to idenitfy formats and swizzles and pass them to rhe shader
|
// First pass of bindings check to idenitfy formats and swizzles and pass them to rhe shader
|
||||||
// recompiler.
|
// recompiler.
|
||||||
for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
|
for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
|
||||||
auto const& col_buf = regs.color_buffers[cb];
|
auto const& col_buf = regs.color_buffers[cb];
|
||||||
if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb)) {
|
if (skip_cb_binding || !col_buf) {
|
||||||
|
// No attachment bound and no incremented index.
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const auto remapped_cb = key.num_color_attachments++;
|
||||||
|
if (!regs.color_target_mask.GetMask(cb)) {
|
||||||
|
// Bound to null handle, skip over this attachment index.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
const auto base_format =
|
const auto base_format =
|
||||||
LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat());
|
LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat());
|
||||||
key.color_formats[remapped_cb] =
|
key.color_formats[remapped_cb] =
|
||||||
@ -290,14 +335,12 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||||||
if (base_format == key.color_formats[remapped_cb]) {
|
if (base_format == key.color_formats[remapped_cb]) {
|
||||||
key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value();
|
key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value();
|
||||||
}
|
}
|
||||||
|
|
||||||
++remapped_cb;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fetch_shader = std::nullopt;
|
fetch_shader = std::nullopt;
|
||||||
|
|
||||||
Shader::Backend::Bindings binding{};
|
Shader::Backend::Bindings binding{};
|
||||||
const auto& TryBindStageRemap = [&](Shader::Stage stage_in, Shader::Stage stage_out) -> bool {
|
const auto& TryBindStage = [&](Shader::Stage stage_in, Shader::LogicalStage stage_out) -> bool {
|
||||||
const auto stage_in_idx = static_cast<u32>(stage_in);
|
const auto stage_in_idx = static_cast<u32>(stage_in);
|
||||||
const auto stage_out_idx = static_cast<u32>(stage_out);
|
const auto stage_out_idx = static_cast<u32>(stage_out);
|
||||||
if (!regs.stage_enable.IsStageEnabled(stage_in_idx)) {
|
if (!regs.stage_enable.IsStageEnabled(stage_in_idx)) {
|
||||||
@ -324,23 +367,23 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||||||
auto params = Liverpool::GetParams(*pgm);
|
auto params = Liverpool::GetParams(*pgm);
|
||||||
std::optional<Shader::Gcn::FetchShaderData> fetch_shader_;
|
std::optional<Shader::Gcn::FetchShaderData> fetch_shader_;
|
||||||
std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_,
|
std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_,
|
||||||
key.stage_hashes[stage_out_idx]) = GetProgram(stage_in, params, binding);
|
key.stage_hashes[stage_out_idx]) =
|
||||||
|
GetProgram(stage_in, stage_out, params, binding);
|
||||||
if (fetch_shader_) {
|
if (fetch_shader_) {
|
||||||
fetch_shader = fetch_shader_;
|
fetch_shader = fetch_shader_;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
const auto& TryBindStage = [&](Shader::Stage stage) { return TryBindStageRemap(stage, stage); };
|
|
||||||
|
|
||||||
const auto& IsGsFeaturesSupported = [&]() -> bool {
|
const auto& IsGsFeaturesSupported = [&]() -> bool {
|
||||||
// These checks are temporary until all functionality is implemented.
|
// These checks are temporary until all functionality is implemented.
|
||||||
return !regs.vgt_gs_mode.onchip && !regs.vgt_strmout_config.raw;
|
return !regs.vgt_gs_mode.onchip && !regs.vgt_strmout_config.raw;
|
||||||
};
|
};
|
||||||
|
|
||||||
TryBindStage(Shader::Stage::Fragment);
|
infos.fill(nullptr);
|
||||||
|
TryBindStage(Stage::Fragment, LogicalStage::Fragment);
|
||||||
|
|
||||||
const auto* fs_info = infos[static_cast<u32>(Shader::Stage::Fragment)];
|
const auto* fs_info = infos[static_cast<u32>(LogicalStage::Fragment)];
|
||||||
key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u;
|
key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u;
|
||||||
|
|
||||||
switch (regs.stage_enable.raw) {
|
switch (regs.stage_enable.raw) {
|
||||||
@ -348,22 +391,36 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||||||
if (!instance.IsGeometryStageSupported() || !IsGsFeaturesSupported()) {
|
if (!instance.IsGeometryStageSupported() || !IsGsFeaturesSupported()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!TryBindStageRemap(Shader::Stage::Export, Shader::Stage::Vertex)) {
|
if (!TryBindStage(Stage::Export, LogicalStage::Vertex)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!TryBindStage(Shader::Stage::Geometry)) {
|
if (!TryBindStage(Stage::Geometry, LogicalStage::Geometry)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case Liverpool::ShaderStageEnable::VgtStages::LsHs: {
|
||||||
|
if (!instance.IsTessellationSupported()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (!TryBindStage(Stage::Hull, LogicalStage::TessellationControl)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!TryBindStage(Stage::Vertex, LogicalStage::TessellationEval)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!TryBindStage(Stage::Local, LogicalStage::Vertex)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
TryBindStage(Shader::Stage::Vertex);
|
TryBindStage(Stage::Vertex, LogicalStage::Vertex);
|
||||||
infos[static_cast<u32>(Shader::Stage::Geometry)] = nullptr;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto vs_info = infos[static_cast<u32>(Shader::Stage::Vertex)];
|
const auto vs_info = infos[static_cast<u32>(Shader::LogicalStage::Vertex)];
|
||||||
if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) {
|
if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) {
|
||||||
u32 vertex_binding = 0;
|
u32 vertex_binding = 0;
|
||||||
for (const auto& attrib : fetch_shader->attributes) {
|
for (const auto& attrib : fetch_shader->attributes) {
|
||||||
@ -385,10 +442,18 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||||||
// Second pass to fill remain CB pipeline key data
|
// Second pass to fill remain CB pipeline key data
|
||||||
for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
|
for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
|
||||||
auto const& col_buf = regs.color_buffers[cb];
|
auto const& col_buf = regs.color_buffers[cb];
|
||||||
if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb) ||
|
if (skip_cb_binding || !col_buf) {
|
||||||
(key.mrt_mask & (1u << cb)) == 0) {
|
// No attachment bound and no incremented index.
|
||||||
key.color_formats[cb] = vk::Format::eUndefined;
|
continue;
|
||||||
key.mrt_swizzles[cb] = Liverpool::ColorBuffer::SwapMode::Standard;
|
}
|
||||||
|
|
||||||
|
if (!regs.color_target_mask.GetMask(cb) || (key.mrt_mask & (1u << cb)) == 0) {
|
||||||
|
// Attachment is masked out by either color_target_mask or shader mrt_mask. In the case
|
||||||
|
// of the latter we need to change format to undefined, and either way we need to
|
||||||
|
// increment the index for the null attachment binding.
|
||||||
|
key.color_formats[remapped_cb] = vk::Format::eUndefined;
|
||||||
|
key.mrt_swizzles[remapped_cb] = Liverpool::ColorBuffer::SwapMode::Standard;
|
||||||
|
++remapped_cb;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -397,10 +462,9 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||||||
!col_buf.info.blend_bypass);
|
!col_buf.info.blend_bypass);
|
||||||
key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)};
|
key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)};
|
||||||
key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb));
|
key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb));
|
||||||
|
++remapped_cb;
|
||||||
|
|
||||||
num_samples = std::max(num_samples, 1u << col_buf.attrib.num_samples_log2);
|
num_samples = std::max(num_samples, 1u << col_buf.attrib.num_samples_log2);
|
||||||
|
|
||||||
++remapped_cb;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// It seems that the number of samples > 1 set in the AA config doesn't mean we're always
|
// It seems that the number of samples > 1 set in the AA config doesn't mean we're always
|
||||||
@ -409,19 +473,18 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||||||
key.num_samples = num_samples;
|
key.num_samples = num_samples;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
} // namespace Vulkan
|
||||||
|
|
||||||
bool PipelineCache::RefreshComputeKey() {
|
bool PipelineCache::RefreshComputeKey() {
|
||||||
Shader::Backend::Bindings binding{};
|
Shader::Backend::Bindings binding{};
|
||||||
const auto* cs_pgm = &liverpool->regs.cs_program;
|
const auto& cs_pgm = liverpool->GetCsRegs();
|
||||||
const auto cs_params = Liverpool::GetParams(*cs_pgm);
|
const auto cs_params = Liverpool::GetParams(cs_pgm);
|
||||||
std::tie(infos[0], modules[0], fetch_shader, compute_key.value) =
|
std::tie(infos[0], modules[0], fetch_shader, compute_key.value) =
|
||||||
GetProgram(Shader::Stage::Compute, cs_params, binding);
|
GetProgram(Shader::Stage::Compute, LogicalStage::Compute, cs_params, binding);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
|
vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info,
|
||||||
const Shader::RuntimeInfo& runtime_info,
|
|
||||||
std::span<const u32> code, size_t perm_idx,
|
std::span<const u32> code, size_t perm_idx,
|
||||||
Shader::Backend::Bindings& binding) {
|
Shader::Backend::Bindings& binding) {
|
||||||
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash,
|
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash,
|
||||||
@ -446,19 +509,19 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
|
|||||||
const auto name = fmt::format("{}_{:#018x}_{}", info.stage, info.pgm_hash, perm_idx);
|
const auto name = fmt::format("{}_{:#018x}_{}", info.stage, info.pgm_hash, perm_idx);
|
||||||
Vulkan::SetObjectName(instance.GetDevice(), module, name);
|
Vulkan::SetObjectName(instance.GetDevice(), module, name);
|
||||||
if (Config::collectShadersForDebug()) {
|
if (Config::collectShadersForDebug()) {
|
||||||
DebugState.CollectShader(name, module, spv, code, patch ? *patch : std::span<const u32>{},
|
DebugState.CollectShader(name, info.l_stage, module, spv, code,
|
||||||
is_patched);
|
patch ? *patch : std::span<const u32>{}, is_patched);
|
||||||
}
|
}
|
||||||
return module;
|
return module;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::tuple<const Shader::Info*, vk::ShaderModule, std::optional<Shader::Gcn::FetchShaderData>, u64>
|
PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stage,
|
||||||
PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
|
Shader::ShaderParams params,
|
||||||
Shader::Backend::Bindings& binding) {
|
Shader::Backend::Bindings& binding) {
|
||||||
const auto runtime_info = BuildRuntimeInfo(stage);
|
auto runtime_info = BuildRuntimeInfo(stage, l_stage);
|
||||||
auto [it_pgm, new_program] = program_cache.try_emplace(params.hash);
|
auto [it_pgm, new_program] = program_cache.try_emplace(params.hash);
|
||||||
if (new_program) {
|
if (new_program) {
|
||||||
it_pgm.value() = std::make_unique<Program>(stage, params);
|
it_pgm.value() = std::make_unique<Program>(stage, l_stage, params);
|
||||||
auto& program = it_pgm.value();
|
auto& program = it_pgm.value();
|
||||||
auto start = binding;
|
auto start = binding;
|
||||||
const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding);
|
const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding);
|
||||||
@ -467,6 +530,7 @@ PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
|
|||||||
return std::make_tuple(&program->info, module, spec.fetch_shader_data,
|
return std::make_tuple(&program->info, module, spec.fetch_shader_data,
|
||||||
HashCombine(params.hash, 0));
|
HashCombine(params.hash, 0));
|
||||||
}
|
}
|
||||||
|
it_pgm.value()->info.user_data = params.user_data;
|
||||||
|
|
||||||
auto& program = it_pgm.value();
|
auto& program = it_pgm.value();
|
||||||
auto& info = program->info;
|
auto& info = program->info;
|
||||||
@ -477,7 +541,7 @@ PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
|
|||||||
|
|
||||||
const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec);
|
const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec);
|
||||||
if (it == program->modules.end()) {
|
if (it == program->modules.end()) {
|
||||||
auto new_info = Shader::Info(stage, params);
|
auto new_info = Shader::Info(stage, l_stage, params);
|
||||||
module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding);
|
module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding);
|
||||||
program->AddPermut(module, std::move(spec));
|
program->AddPermut(module, std::move(spec));
|
||||||
} else {
|
} else {
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user