mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-04 16:32:39 +00:00
Merge remote-tracking branch 'origin/main'
This commit is contained in:
commit
1f4cbe705a
14
.github/workflows/build.yml
vendored
14
.github/workflows/build.yml
vendored
@ -174,11 +174,6 @@ jobs:
|
||||
with:
|
||||
xcode-version: latest
|
||||
|
||||
- name: Install MoltenVK
|
||||
run: |
|
||||
arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
|
||||
arch -x86_64 /usr/local/bin/brew install molten-vk
|
||||
|
||||
- name: Cache CMake Configuration
|
||||
uses: actions/cache@v4
|
||||
env:
|
||||
@ -210,7 +205,7 @@ jobs:
|
||||
run: |
|
||||
mkdir upload
|
||||
mv ${{github.workspace}}/build/shadps4 upload
|
||||
cp $(arch -x86_64 /usr/local/bin/brew --prefix)/opt/molten-vk/lib/libMoltenVK.dylib upload
|
||||
cp ${{github.workspace}}/build/externals/MoltenVK/libMoltenVK.dylib upload
|
||||
tar cf shadps4-macos-sdl.tar.gz -C upload .
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
@ -230,11 +225,8 @@ jobs:
|
||||
with:
|
||||
xcode-version: latest
|
||||
|
||||
- name: Install MoltenVK and Setup Qt
|
||||
run: |
|
||||
arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
|
||||
arch -x86_64 /usr/local/bin/brew install molten-vk
|
||||
- uses: jurplel/install-qt-action@v4
|
||||
- name: Setup Qt
|
||||
uses: jurplel/install-qt-action@v4
|
||||
with:
|
||||
version: 6.7.3
|
||||
host: mac
|
||||
|
14
.gitmodules
vendored
14
.gitmodules
vendored
@ -106,4 +106,16 @@
|
||||
[submodule "externals/libpng"]
|
||||
path = externals/libpng
|
||||
url = https://github.com/pnggroup/libpng
|
||||
shallow = true
|
||||
shallow = true
|
||||
[submodule "externals/MoltenVK/SPIRV-Cross"]
|
||||
path = externals/MoltenVK/SPIRV-Cross
|
||||
url = https://github.com/KhronosGroup/SPIRV-Cross
|
||||
shallow = true
|
||||
[submodule "externals/MoltenVK/MoltenVK"]
|
||||
path = externals/MoltenVK/MoltenVK
|
||||
url = https://github.com/KhronosGroup/MoltenVK
|
||||
shallow = true
|
||||
[submodule "externals/MoltenVK/cereal"]
|
||||
path = externals/MoltenVK/cereal
|
||||
url = https://github.com/USCiLab/cereal
|
||||
shallow = true
|
||||
|
@ -664,6 +664,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
||||
src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
|
||||
src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp
|
||||
src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp
|
||||
src/shader_recompiler/ir/passes/hull_shader_transform.cpp
|
||||
src/shader_recompiler/ir/passes/identity_removal_pass.cpp
|
||||
src/shader_recompiler/ir/passes/ir_passes.h
|
||||
src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp
|
||||
@ -683,6 +684,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
||||
src/shader_recompiler/ir/opcodes.cpp
|
||||
src/shader_recompiler/ir/opcodes.h
|
||||
src/shader_recompiler/ir/opcodes.inc
|
||||
src/shader_recompiler/ir/patch.cpp
|
||||
src/shader_recompiler/ir/patch.h
|
||||
src/shader_recompiler/ir/post_order.cpp
|
||||
src/shader_recompiler/ir/post_order.h
|
||||
src/shader_recompiler/ir/program.cpp
|
||||
@ -881,7 +884,7 @@ endif()
|
||||
create_target_directory_groups(shadps4)
|
||||
|
||||
target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG)
|
||||
target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::SPIRV glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers)
|
||||
target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers)
|
||||
|
||||
target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h")
|
||||
target_compile_definitions(Dear_ImGui PRIVATE IMGUI_USER_CONFIG="${PROJECT_SOURCE_DIR}/src/imgui/imgui_config.h")
|
||||
@ -895,13 +898,17 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
||||
endif()
|
||||
|
||||
if (APPLE)
|
||||
option(USE_SYSTEM_VULKAN_LOADER "Enables using the system Vulkan loader instead of directly linking with MoltenVK. Useful for loading validation layers." OFF)
|
||||
if (USE_SYSTEM_VULKAN_LOADER)
|
||||
target_compile_definitions(shadps4 PRIVATE USE_SYSTEM_VULKAN_LOADER=1)
|
||||
if (ENABLE_QT_GUI)
|
||||
# Include MoltenVK in the app bundle, along with an ICD file so it can be found by the system Vulkan loader if used for loading layers.
|
||||
target_sources(shadps4 PRIVATE externals/MoltenVK/MoltenVK_icd.json)
|
||||
set_source_files_properties(externals/MoltenVK/MoltenVK_icd.json
|
||||
PROPERTIES MACOSX_PACKAGE_LOCATION Resources/vulkan/icd.d)
|
||||
add_custom_command(TARGET shadps4 POST_BUILD
|
||||
COMMAND cmake -E copy $<TARGET_LINKER_FILE:MoltenVK> $<TARGET_BUNDLE_DIR:shadps4>/Contents/Frameworks/libMoltenVK.dylib)
|
||||
set_property(TARGET shadps4 APPEND PROPERTY BUILD_RPATH "@executable_path/../Frameworks")
|
||||
else()
|
||||
# Link MoltenVK for Vulkan support
|
||||
find_library(MOLTENVK MoltenVK REQUIRED)
|
||||
target_link_libraries(shadps4 PRIVATE ${MOLTENVK})
|
||||
# For non-bundled SDL build, just do a normal library link.
|
||||
target_link_libraries(shadps4 PRIVATE MoltenVK)
|
||||
endif()
|
||||
|
||||
if (ARCHITECTURE STREQUAL "x86_64")
|
||||
@ -1022,4 +1029,4 @@ if (ENABLE_QT_GUI AND CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
||||
install(FILES "dist/net.shadps4.shadPS4.metainfo.xml" DESTINATION "share/metainfo")
|
||||
install(FILES ".github/shadps4.png" DESTINATION "share/icons/hicolor/512x512/apps" RENAME "net.shadps4.shadPS4.png")
|
||||
install(FILES "src/images/net.shadps4.shadPS4.svg" DESTINATION "share/icons/hicolor/scalable/apps")
|
||||
endif()
|
||||
endif()
|
||||
|
@ -76,6 +76,13 @@ For more information on how to test, debug and report issues with the emulator o
|
||||
|
||||
# Keyboard and Mouse Mappings
|
||||
|
||||
| Button | Function |
|
||||
|-------------|-------------|
|
||||
F10 | FPS Counter
|
||||
Ctrl+F10 | Video Debug Info
|
||||
F11 | Fullscreen
|
||||
F12 | Trigger RenderDoc Capture
|
||||
|
||||
> [!NOTE]
|
||||
> Xbox and DualShock controllers work out of the box.
|
||||
|
||||
|
@ -15,6 +15,7 @@ path = [
|
||||
"documents/changelog.md",
|
||||
"documents/Quickstart/2.png",
|
||||
"documents/Screenshots/*",
|
||||
"externals/MoltenVK/MoltenVK_icd.json",
|
||||
"scripts/ps4_names.txt",
|
||||
"src/images/about_icon.png",
|
||||
"src/images/controller_icon.png",
|
||||
|
@ -24,23 +24,21 @@ eval $(/opt/homebrew/bin/brew shellenv)
|
||||
brew install clang-format cmake
|
||||
```
|
||||
|
||||
Next, install x86_64 Homebrew and libraries.
|
||||
Next, install x86_64 Qt. You can skip these steps and move on to **Cloning and compiling** if you do not intend to build the Qt GUI.
|
||||
|
||||
**If you are on an ARM Mac:**
|
||||
```
|
||||
# Installs x86_64 Homebrew to /usr/local
|
||||
arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
|
||||
# Installs libraries.
|
||||
arch -x86_64 /usr/local/bin/brew install molten-vk qt@6
|
||||
arch -x86_64 /usr/local/bin/brew install qt@6
|
||||
```
|
||||
|
||||
**If you are on an x86_64 Mac:**
|
||||
```
|
||||
brew install molten-vk qt@6
|
||||
brew install qt@6
|
||||
```
|
||||
|
||||
If you don't need the Qt GUI you can remove `qt@6` from the last command.
|
||||
|
||||
### Cloning and compiling:
|
||||
|
||||
Clone the repository recursively:
|
||||
|
28
externals/CMakeLists.txt
vendored
28
externals/CMakeLists.txt
vendored
@ -110,7 +110,7 @@ if (NOT TARGET glslang::glslang)
|
||||
set(ENABLE_OPT OFF CACHE BOOL "")
|
||||
add_subdirectory(glslang)
|
||||
file(COPY glslang/SPIRV DESTINATION glslang/glslang FILES_MATCHING PATTERN "*.h")
|
||||
target_include_directories(SPIRV INTERFACE "${CMAKE_CURRENT_BINARY_DIR}/glslang")
|
||||
target_include_directories(glslang INTERFACE "${CMAKE_CURRENT_BINARY_DIR}/glslang")
|
||||
endif()
|
||||
|
||||
# Robin-map
|
||||
@ -177,15 +177,6 @@ if (NOT TARGET PNG::PNG)
|
||||
add_library(PNG::PNG ALIAS png_static)
|
||||
endif()
|
||||
|
||||
if (APPLE)
|
||||
# date
|
||||
if (NOT TARGET date::date-tz)
|
||||
option(BUILD_TZ_LIB "" ON)
|
||||
option(USE_SYSTEM_TZ_DB "" ON)
|
||||
add_subdirectory(date)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Dear ImGui
|
||||
add_library(Dear_ImGui
|
||||
dear_imgui/imgui.cpp
|
||||
@ -202,7 +193,7 @@ option(TRACY_ENABLE "" ON)
|
||||
option(TRACY_NO_CRASH_HANDLER "" ON) # Otherwise texture cache exceptions will be treaten as a crash
|
||||
option(TRACY_ON_DEMAND "" ON)
|
||||
option(TRACY_NO_FRAME_IMAGE "" ON)
|
||||
option(TRACY_FIBERS "" ON) # For AmdGpu frontend profiling
|
||||
option(TRACY_FIBERS "" OFF) # For AmdGpu frontend profiling, disabled due to instability
|
||||
option(TRACY_NO_SYSTEM_TRACING "" ON)
|
||||
option(TRACY_NO_CALLSTACK "" ON)
|
||||
option(TRACY_NO_CODE_TRANSFER "" ON)
|
||||
@ -232,3 +223,18 @@ if (NOT TARGET stb::headers)
|
||||
target_include_directories(stb INTERFACE stb)
|
||||
add_library(stb::headers ALIAS stb)
|
||||
endif()
|
||||
|
||||
# Apple-only dependencies
|
||||
if (APPLE)
|
||||
# date
|
||||
if (NOT TARGET date::date-tz)
|
||||
option(BUILD_TZ_LIB "" ON)
|
||||
option(USE_SYSTEM_TZ_DB "" ON)
|
||||
add_subdirectory(date)
|
||||
endif()
|
||||
|
||||
# MoltenVK
|
||||
if (NOT TARGET MoltenVK)
|
||||
add_subdirectory(MoltenVK)
|
||||
endif()
|
||||
endif()
|
||||
|
93
externals/MoltenVK/CMakeLists.txt
vendored
Normal file
93
externals/MoltenVK/CMakeLists.txt
vendored
Normal file
@ -0,0 +1,93 @@
|
||||
# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
# Prepare MoltenVK Git revision
|
||||
find_package(Git)
|
||||
if(GIT_FOUND)
|
||||
execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD
|
||||
OUTPUT_VARIABLE MVK_GIT_REV
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK
|
||||
ERROR_QUIET
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
endif()
|
||||
set(MVK_GENERATED_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}/Generated)
|
||||
file(WRITE ${MVK_GENERATED_INCLUDES}/mvkGitRevDerived.h "static const char* mvkRevString = \"${MVK_GIT_REV}\";")
|
||||
message(STATUS "MoltenVK revision: ${MVK_GIT_REV}")
|
||||
|
||||
# Prepare MoltenVK version
|
||||
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/MoltenVK/MoltenVK/API/mvk_private_api.h MVK_PRIVATE_API)
|
||||
string(REGEX MATCH "#define MVK_VERSION_MAJOR [0-9]+" MVK_VERSION_MAJOR_LINE "${MVK_PRIVATE_API}")
|
||||
string(REGEX MATCH "[0-9]+" MVK_VERSION_MAJOR "${MVK_VERSION_MAJOR_LINE}")
|
||||
string(REGEX MATCH "#define MVK_VERSION_MINOR [0-9]+" MVK_VERSION_MINOR_LINE "${MVK_PRIVATE_API}")
|
||||
string(REGEX MATCH "[0-9]+" MVK_VERSION_MINOR "${MVK_VERSION_MINOR_LINE}")
|
||||
string(REGEX MATCH "#define MVK_VERSION_PATCH [0-9]+" MVK_VERSION_PATCH_LINE "${MVK_PRIVATE_API}")
|
||||
string(REGEX MATCH "[0-9]+" MVK_VERSION_PATCH "${MVK_VERSION_PATCH_LINE}")
|
||||
set(MVK_VERSION "${MVK_VERSION_MAJOR}.${MVK_VERSION_MINOR}.${MVK_VERSION_PATCH}")
|
||||
message(STATUS "MoltenVK version: ${MVK_VERSION}")
|
||||
|
||||
# Find required system libraries
|
||||
find_library(APPKIT_LIBRARY AppKit REQUIRED)
|
||||
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
|
||||
find_library(IOKIT_LIBRARY IOKit REQUIRED)
|
||||
find_library(IOSURFACE_LIBRARY IOSurface REQUIRED)
|
||||
find_library(METAL_LIBRARY Metal REQUIRED)
|
||||
find_library(QUARTZCORE_LIBRARY QuartzCore REQUIRED)
|
||||
|
||||
# cereal
|
||||
option(SKIP_PORTABILITY_TEST "" ON)
|
||||
option(BUILD_DOC "" OFF)
|
||||
option(BUILD_SANDBOX "" OFF)
|
||||
option(SKIP_PERFORMANCE_COMPARISON "" ON)
|
||||
option(SPIRV_CROSS_SKIP_INSTALL "" ON)
|
||||
add_subdirectory(cereal)
|
||||
|
||||
# SPIRV-Cross
|
||||
option(SPIRV_CROSS_CLI "" OFF)
|
||||
option(SPIRV_CROSS_ENABLE_TESTS "" OFF)
|
||||
option(SPIRV_CROSS_ENABLE_HLSL "" OFF)
|
||||
option(SPIRV_CROSS_ENABLE_CPP "" OFF)
|
||||
option(SPIRV_CROSS_SKIP_INSTALL "" ON)
|
||||
add_subdirectory(SPIRV-Cross)
|
||||
|
||||
# Common
|
||||
set(MVK_COMMON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/Common)
|
||||
file(GLOB_RECURSE MVK_COMMON_SOURCES CONFIGURE_DEPENDS
|
||||
${MVK_COMMON_DIR}/*.cpp
|
||||
${MVK_COMMON_DIR}/*.m
|
||||
${MVK_COMMON_DIR}/*.mm)
|
||||
set(MVK_COMMON_INCLUDES ${MVK_COMMON_DIR})
|
||||
|
||||
add_library(MoltenVKCommon STATIC ${MVK_COMMON_SOURCES})
|
||||
target_include_directories(MoltenVKCommon PUBLIC ${MVK_COMMON_INCLUDES})
|
||||
target_compile_options(MoltenVKCommon PRIVATE -w)
|
||||
|
||||
# MoltenVKShaderConverter
|
||||
set(MVK_SHADER_CONVERTER_DIR ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/MoltenVKShaderConverter)
|
||||
file(GLOB_RECURSE MVK_SHADER_CONVERTER_SOURCES CONFIGURE_DEPENDS
|
||||
${MVK_SHADER_CONVERTER_DIR}/MoltenVKShaderConverter/*.cpp
|
||||
${MVK_SHADER_CONVERTER_DIR}/MoltenVKShaderConverter/*.m
|
||||
${MVK_SHADER_CONVERTER_DIR}/MoltenVKShaderConverter/*.mm)
|
||||
set(MVK_SHADER_CONVERTER_INCLUDES ${MVK_SHADER_CONVERTER_DIR} ${MVK_SHADER_CONVERTER_DIR}/include)
|
||||
|
||||
add_library(MoltenVKShaderConverter STATIC ${MVK_SHADER_CONVERTER_SOURCES})
|
||||
target_include_directories(MoltenVKShaderConverter PUBLIC ${MVK_SHADER_CONVERTER_INCLUDES})
|
||||
target_compile_options(MoltenVKShaderConverter PRIVATE -w)
|
||||
target_link_libraries(MoltenVKShaderConverter PRIVATE spirv-cross-msl spirv-cross-reflect MoltenVKCommon)
|
||||
target_compile_definitions(MoltenVKShaderConverter PRIVATE MVK_EXCLUDE_SPIRV_TOOLS=1)
|
||||
|
||||
# MoltenVK
|
||||
set(MVK_DIR ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/MoltenVK)
|
||||
file(GLOB_RECURSE MVK_SOURCES CONFIGURE_DEPENDS
|
||||
${MVK_DIR}/MoltenVK/*.cpp
|
||||
${MVK_DIR}/MoltenVK/*.m
|
||||
${MVK_DIR}/MoltenVK/*.mm)
|
||||
file(GLOB MVK_SRC_INCLUDES LIST_DIRECTORIES ON ${MVK_DIR}/MoltenVK/*)
|
||||
set(MVK_INCLUDES ${MVK_SRC_INCLUDES} ${MVK_GENERATED_INCLUDES} ${MVK_DIR}/include)
|
||||
|
||||
add_library(MoltenVK SHARED ${MVK_SOURCES})
|
||||
target_include_directories(MoltenVK PRIVATE ${MVK_INCLUDES})
|
||||
target_compile_options(MoltenVK PRIVATE -w)
|
||||
target_link_libraries(MoltenVK PRIVATE
|
||||
${APPKIT_LIBRARY} ${FOUNDATION_LIBRARY} ${IOKIT_LIBRARY} ${IOSURFACE_LIBRARY} ${METAL_LIBRARY} ${QUARTZCORE_LIBRARY}
|
||||
Vulkan::Headers cereal::cereal spirv-cross-msl MoltenVKCommon MoltenVKShaderConverter)
|
||||
target_compile_definitions(MoltenVK PRIVATE MVK_FRAMEWORK_VERSION=${MVK_VERSION} MVK_USE_METAL_PRIVATE_API=1)
|
1
externals/MoltenVK/MoltenVK
vendored
Submodule
1
externals/MoltenVK/MoltenVK
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 5ad3ee5d2f84342950c3fe93dec97719574d1932
|
8
externals/MoltenVK/MoltenVK_icd.json
vendored
Normal file
8
externals/MoltenVK/MoltenVK_icd.json
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
{
|
||||
"file_format_version": "1.0.0",
|
||||
"ICD": {
|
||||
"library_path": "../../../Frameworks/libMoltenVK.dylib",
|
||||
"api_version": "1.2.0",
|
||||
"is_portability_driver": true
|
||||
}
|
||||
}
|
1
externals/MoltenVK/SPIRV-Cross
vendored
Submodule
1
externals/MoltenVK/SPIRV-Cross
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 6173e24b31f09a0c3217103a130e74c4ddec14a6
|
1
externals/MoltenVK/cereal
vendored
Submodule
1
externals/MoltenVK/cereal
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit d1fcec807b372f04e4c1041b3058e11c12853e6e
|
2
externals/sirit
vendored
2
externals/sirit
vendored
@ -1 +1 @@
|
||||
Subproject commit e12b6b592ce9917a85303c555259488643c56f47
|
||||
Subproject commit 1e74f4ef8d2a0e3221a4de51977663f342b53c35
|
@ -692,6 +692,7 @@ void save(const std::filesystem::path& path) {
|
||||
std::ofstream file(path, std::ios::binary);
|
||||
file << data;
|
||||
file.close();
|
||||
saveMainWindow(path);
|
||||
}
|
||||
|
||||
void saveMainWindow(const std::filesystem::path& path) {
|
||||
|
@ -17,6 +17,8 @@ static inline bool IsProfilerConnected() {
|
||||
return tracy::GetProfiler().IsConnected();
|
||||
}
|
||||
|
||||
#define TRACY_GPU_ENABLED 0
|
||||
|
||||
#define CUSTOM_LOCK(type, varname) \
|
||||
tracy::LockableCtx varname { \
|
||||
[]() -> const tracy::SourceLocationData* { \
|
||||
@ -57,3 +59,11 @@ enum MarkersPalette : int {
|
||||
tracy::SourceLocationData{nullptr, name, TracyFile, (uint32_t)TracyLine, 0};
|
||||
|
||||
#define FRAME_END FrameMark
|
||||
|
||||
#ifdef TRACY_FIBERS
|
||||
#define FIBER_ENTER(name) TracyFiberEnter(name)
|
||||
#define FIBER_EXIT TracyFiberLeave
|
||||
#else
|
||||
#define FIBER_ENTER(name)
|
||||
#define FIBER_EXIT
|
||||
#endif
|
||||
|
@ -142,45 +142,66 @@ void DebugStateImpl::PushQueueDump(QueueDump dump) {
|
||||
frame.queues.push_back(std::move(dump));
|
||||
}
|
||||
|
||||
void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
|
||||
const AmdGpu::Liverpool::Regs& regs, bool is_compute) {
|
||||
std::scoped_lock lock{frame_dump_list_mutex};
|
||||
std::optional<RegDump*> DebugStateImpl::GetRegDump(uintptr_t base_addr, uintptr_t header_addr) {
|
||||
const auto it = waiting_reg_dumps.find(header_addr);
|
||||
if (it == waiting_reg_dumps.end()) {
|
||||
return;
|
||||
return std::nullopt;
|
||||
}
|
||||
auto& frame = *it->second;
|
||||
waiting_reg_dumps.erase(it);
|
||||
waiting_reg_dumps_dbg.erase(waiting_reg_dumps_dbg.find(header_addr));
|
||||
auto& dump = frame.regs[header_addr - base_addr];
|
||||
dump.regs = regs;
|
||||
if (is_compute) {
|
||||
dump.is_compute = true;
|
||||
const auto& cs = dump.regs.cs_program;
|
||||
dump.cs_data = PipelineComputerProgramDump{
|
||||
.cs_program = cs,
|
||||
.code = std::vector<u32>{cs.Code().begin(), cs.Code().end()},
|
||||
};
|
||||
} else {
|
||||
for (int i = 0; i < RegDump::MaxShaderStages; i++) {
|
||||
if (regs.stage_enable.IsStageEnabled(i)) {
|
||||
auto stage = regs.ProgramForStage(i);
|
||||
if (stage->address_lo != 0) {
|
||||
auto code = stage->Code();
|
||||
dump.stages[i] = PipelineShaderProgramDump{
|
||||
.user_data = *stage,
|
||||
.code = std::vector<u32>{code.begin(), code.end()},
|
||||
};
|
||||
}
|
||||
return &frame.regs[header_addr - base_addr];
|
||||
}
|
||||
|
||||
void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
|
||||
const AmdGpu::Liverpool::Regs& regs) {
|
||||
std::scoped_lock lock{frame_dump_list_mutex};
|
||||
|
||||
auto dump = GetRegDump(base_addr, header_addr);
|
||||
if (!dump) {
|
||||
return;
|
||||
}
|
||||
|
||||
(*dump)->regs = regs;
|
||||
|
||||
for (int i = 0; i < RegDump::MaxShaderStages; i++) {
|
||||
if ((*dump)->regs.stage_enable.IsStageEnabled(i)) {
|
||||
auto stage = (*dump)->regs.ProgramForStage(i);
|
||||
if (stage->address_lo != 0) {
|
||||
auto code = stage->Code();
|
||||
(*dump)->stages[i] = PipelineShaderProgramDump{
|
||||
.user_data = *stage,
|
||||
.code = std::vector<u32>{code.begin(), code.end()},
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DebugStateImpl::CollectShader(const std::string& name, vk::ShaderModule module,
|
||||
std::span<const u32> spv, std::span<const u32> raw_code,
|
||||
std::span<const u32> patch_spv, bool is_patched) {
|
||||
shader_dump_list.emplace_back(name, module, std::vector<u32>{spv.begin(), spv.end()},
|
||||
void DebugStateImpl::PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr,
|
||||
const CsState& cs_state) {
|
||||
std::scoped_lock lock{frame_dump_list_mutex};
|
||||
|
||||
auto dump = GetRegDump(base_addr, header_addr);
|
||||
if (!dump) {
|
||||
return;
|
||||
}
|
||||
|
||||
(*dump)->is_compute = true;
|
||||
auto& cs = (*dump)->regs.cs_program;
|
||||
cs = cs_state;
|
||||
|
||||
(*dump)->cs_data = PipelineComputerProgramDump{
|
||||
.cs_program = cs,
|
||||
.code = std::vector<u32>{cs.Code().begin(), cs.Code().end()},
|
||||
};
|
||||
}
|
||||
|
||||
void DebugStateImpl::CollectShader(const std::string& name, Shader::LogicalStage l_stage,
|
||||
vk::ShaderModule module, std::span<const u32> spv,
|
||||
std::span<const u32> raw_code, std::span<const u32> patch_spv,
|
||||
bool is_patched) {
|
||||
shader_dump_list.emplace_back(name, l_stage, module, std::vector<u32>{spv.begin(), spv.end()},
|
||||
std::vector<u32>{raw_code.begin(), raw_code.end()},
|
||||
std::vector<u32>{patch_spv.begin(), patch_spv.end()}, is_patched);
|
||||
}
|
||||
|
@ -11,7 +11,6 @@
|
||||
#include <queue>
|
||||
|
||||
#include "common/types.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
@ -76,6 +75,7 @@ struct FrameDump {
|
||||
|
||||
struct ShaderDump {
|
||||
std::string name;
|
||||
Shader::LogicalStage l_stage;
|
||||
vk::ShaderModule module;
|
||||
|
||||
std::vector<u32> spv;
|
||||
@ -90,16 +90,17 @@ struct ShaderDump {
|
||||
std::string cache_isa_disasm{};
|
||||
std::string cache_patch_disasm{};
|
||||
|
||||
ShaderDump(std::string name, vk::ShaderModule module, std::vector<u32> spv,
|
||||
std::vector<u32> isa, std::vector<u32> patch_spv, bool is_patched)
|
||||
: name(std::move(name)), module(module), spv(std::move(spv)), isa(std::move(isa)),
|
||||
patch_spv(std::move(patch_spv)), is_patched(is_patched) {}
|
||||
ShaderDump(std::string name, Shader::LogicalStage l_stage, vk::ShaderModule module,
|
||||
std::vector<u32> spv, std::vector<u32> isa, std::vector<u32> patch_spv,
|
||||
bool is_patched)
|
||||
: name(std::move(name)), l_stage(l_stage), module(module), spv(std::move(spv)),
|
||||
isa(std::move(isa)), patch_spv(std::move(patch_spv)), is_patched(is_patched) {}
|
||||
|
||||
ShaderDump(const ShaderDump& other) = delete;
|
||||
ShaderDump(ShaderDump&& other) noexcept
|
||||
: name{std::move(other.name)}, module{std::move(other.module)}, spv{std::move(other.spv)},
|
||||
isa{std::move(other.isa)}, patch_spv{std::move(other.patch_spv)},
|
||||
patch_source{std::move(other.patch_source)},
|
||||
: name{std::move(other.name)}, l_stage(other.l_stage), module{std::move(other.module)},
|
||||
spv{std::move(other.spv)}, isa{std::move(other.isa)},
|
||||
patch_spv{std::move(other.patch_spv)}, patch_source{std::move(other.patch_source)},
|
||||
cache_spv_disasm{std::move(other.cache_spv_disasm)},
|
||||
cache_isa_disasm{std::move(other.cache_isa_disasm)},
|
||||
cache_patch_disasm{std::move(other.cache_patch_disasm)} {}
|
||||
@ -108,6 +109,7 @@ struct ShaderDump {
|
||||
if (this == &other)
|
||||
return *this;
|
||||
name = std::move(other.name);
|
||||
l_stage = other.l_stage;
|
||||
module = std::move(other.module);
|
||||
spv = std::move(other.spv);
|
||||
isa = std::move(other.isa);
|
||||
@ -201,11 +203,17 @@ public:
|
||||
void PushQueueDump(QueueDump dump);
|
||||
|
||||
void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
|
||||
const AmdGpu::Liverpool::Regs& regs, bool is_compute = false);
|
||||
const AmdGpu::Liverpool::Regs& regs);
|
||||
using CsState = AmdGpu::Liverpool::ComputeProgram;
|
||||
void PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr, const CsState& cs_state);
|
||||
|
||||
void CollectShader(const std::string& name, vk::ShaderModule module, std::span<const u32> spv,
|
||||
void CollectShader(const std::string& name, Shader::LogicalStage l_stage,
|
||||
vk::ShaderModule module, std::span<const u32> spv,
|
||||
std::span<const u32> raw_code, std::span<const u32> patch_spv,
|
||||
bool is_patched);
|
||||
|
||||
private:
|
||||
std::optional<RegDump*> GetRegDump(uintptr_t base_addr, uintptr_t header_addr);
|
||||
};
|
||||
} // namespace DebugStateType
|
||||
|
||||
|
@ -158,16 +158,17 @@ bool ShaderList::Selection::DrawShader(DebugStateType::ShaderDump& value) {
|
||||
DebugState.ShowDebugMessage(msg);
|
||||
}
|
||||
if (compile) {
|
||||
static std::map<std::string, std::string> stage_arg = {
|
||||
{"vs", "vert"},
|
||||
{"gs", "geom"},
|
||||
{"fs", "frag"},
|
||||
{"cs", "comp"},
|
||||
static std::map<Shader::LogicalStage, std::string> stage_arg = {
|
||||
{Shader::LogicalStage::Vertex, "vert"},
|
||||
{Shader::LogicalStage::TessellationControl, "tesc"},
|
||||
{Shader::LogicalStage::TessellationEval, "tese"},
|
||||
{Shader::LogicalStage::Geometry, "geom"},
|
||||
{Shader::LogicalStage::Fragment, "frag"},
|
||||
{Shader::LogicalStage::Compute, "comp"},
|
||||
};
|
||||
auto stage = stage_arg.find(value.name.substr(0, 2));
|
||||
auto stage = stage_arg.find(value.l_stage);
|
||||
if (stage == stage_arg.end()) {
|
||||
DebugState.ShowDebugMessage(std::string{"Invalid shader stage: "} +
|
||||
value.name.substr(0, 2));
|
||||
DebugState.ShowDebugMessage(std::string{"Invalid shader stage"});
|
||||
} else {
|
||||
std::string cmd =
|
||||
fmt::format("glslc --target-env=vulkan1.3 --target-spv=spv1.6 "
|
||||
|
@ -10,16 +10,28 @@
|
||||
|
||||
namespace Core::FileSys {
|
||||
|
||||
std::string RemoveTrailingSlashes(const std::string& path) {
|
||||
// Remove trailing slashes to make comparisons simpler.
|
||||
std::string path_sanitized = path;
|
||||
while (path_sanitized.ends_with("/")) {
|
||||
path_sanitized.pop_back();
|
||||
}
|
||||
return path_sanitized;
|
||||
}
|
||||
|
||||
void MntPoints::Mount(const std::filesystem::path& host_folder, const std::string& guest_folder,
|
||||
bool read_only) {
|
||||
std::scoped_lock lock{m_mutex};
|
||||
m_mnt_pairs.emplace_back(host_folder, guest_folder, read_only);
|
||||
const auto guest_folder_sanitized = RemoveTrailingSlashes(guest_folder);
|
||||
m_mnt_pairs.emplace_back(host_folder, guest_folder_sanitized, read_only);
|
||||
}
|
||||
|
||||
void MntPoints::Unmount(const std::filesystem::path& host_folder, const std::string& guest_folder) {
|
||||
std::scoped_lock lock{m_mutex};
|
||||
auto it = std::remove_if(m_mnt_pairs.begin(), m_mnt_pairs.end(),
|
||||
[&](const MntPair& pair) { return pair.mount == guest_folder; });
|
||||
const auto guest_folder_sanitized = RemoveTrailingSlashes(guest_folder);
|
||||
auto it = std::remove_if(m_mnt_pairs.begin(), m_mnt_pairs.end(), [&](const MntPair& pair) {
|
||||
return pair.mount == guest_folder_sanitized;
|
||||
});
|
||||
m_mnt_pairs.erase(it, m_mnt_pairs.end());
|
||||
}
|
||||
|
||||
@ -47,7 +59,8 @@ std::filesystem::path MntPoints::GetHostPath(std::string_view path, bool* is_rea
|
||||
}
|
||||
|
||||
// Nothing to do if getting the mount itself.
|
||||
if (corrected_path == mount->mount) {
|
||||
const auto corrected_path_sanitized = RemoveTrailingSlashes(corrected_path);
|
||||
if (corrected_path_sanitized == mount->mount) {
|
||||
return mount->host_path;
|
||||
}
|
||||
|
||||
@ -186,4 +199,14 @@ void HandleTable::CreateStdHandles() {
|
||||
setup("/dev/stderr", new Devices::Logger("stderr", true)); // stderr
|
||||
}
|
||||
|
||||
int HandleTable::GetFileDescriptor(File* file) {
|
||||
std::scoped_lock lock{m_mutex};
|
||||
auto it = std::find(m_files.begin(), m_files.end(), file);
|
||||
|
||||
if (it != m_files.end()) {
|
||||
return std::distance(m_files.begin(), it);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace Core::FileSys
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <vector>
|
||||
#include <tsl/robin_map.h>
|
||||
#include "common/io_file.h"
|
||||
#include "common/logging/formatter.h"
|
||||
#include "core/devices/base_device.h"
|
||||
|
||||
namespace Core::FileSys {
|
||||
@ -22,7 +23,7 @@ class MntPoints {
|
||||
public:
|
||||
struct MntPair {
|
||||
std::filesystem::path host_path;
|
||||
std::string mount; // e.g /app0/
|
||||
std::string mount; // e.g /app0
|
||||
bool read_only;
|
||||
};
|
||||
|
||||
@ -37,10 +38,21 @@ public:
|
||||
std::filesystem::path GetHostPath(std::string_view guest_directory,
|
||||
bool* is_read_only = nullptr);
|
||||
|
||||
const MntPair* GetMountFromHostPath(const std::string& host_path) {
|
||||
std::scoped_lock lock{m_mutex};
|
||||
const auto it = std::ranges::find_if(m_mnt_pairs, [&](const MntPair& mount) {
|
||||
return host_path.starts_with(std::string{fmt::UTF(mount.host_path.u8string()).data});
|
||||
});
|
||||
return it == m_mnt_pairs.end() ? nullptr : &*it;
|
||||
}
|
||||
|
||||
const MntPair* GetMount(const std::string& guest_path) {
|
||||
std::scoped_lock lock{m_mutex};
|
||||
const auto it = std::ranges::find_if(
|
||||
m_mnt_pairs, [&](const auto& mount) { return guest_path.starts_with(mount.mount); });
|
||||
const auto it = std::ranges::find_if(m_mnt_pairs, [&](const auto& mount) {
|
||||
// When doing starts-with check, add a trailing slash to make sure we don't match
|
||||
// against only part of the mount path.
|
||||
return guest_path == mount.mount || guest_path.starts_with(mount.mount + "/");
|
||||
});
|
||||
return it == m_mnt_pairs.end() ? nullptr : &*it;
|
||||
}
|
||||
|
||||
@ -83,6 +95,7 @@ public:
|
||||
void DeleteHandle(int d);
|
||||
File* GetFile(int d);
|
||||
File* GetFile(const std::filesystem::path& host_name);
|
||||
int GetFileDescriptor(File* file);
|
||||
|
||||
void CreateStdHandles();
|
||||
|
||||
|
@ -296,17 +296,12 @@ static_assert(CtxInitSequence400.size() == 0x61);
|
||||
// In case if `submitDone` is issued we need to block submissions until GPU idle
|
||||
static u32 submission_lock{};
|
||||
std::condition_variable cv_lock{};
|
||||
static std::mutex m_submission{};
|
||||
std::mutex m_submission{};
|
||||
static u64 frames_submitted{}; // frame counter
|
||||
static bool send_init_packet{true}; // initialize HW state before first game's submit in a frame
|
||||
static int sdk_version{0};
|
||||
|
||||
struct AscQueueInfo {
|
||||
VAddr map_addr;
|
||||
u32* read_addr;
|
||||
u32 ring_size_dw;
|
||||
};
|
||||
static Common::SlotVector<AscQueueInfo> asc_queues{};
|
||||
static u32 asc_next_offs_dw[Liverpool::NumComputeRings];
|
||||
static constexpr VAddr tessellation_factors_ring_addr = Core::SYSTEM_RESERVED_MAX - 0xFFFFFFF;
|
||||
static constexpr u32 tessellation_offchip_buffer_size = 0x800000u;
|
||||
|
||||
@ -493,6 +488,7 @@ int PS4_SYSV_ABI sceGnmDestroyWorkloadStream() {
|
||||
}
|
||||
|
||||
void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
|
||||
HLE_TRACE;
|
||||
LOG_DEBUG(Lib_GnmDriver, "vqid {}, offset_dw {}", gnm_vqid, next_offs_dw);
|
||||
|
||||
if (gnm_vqid == 0) {
|
||||
@ -506,11 +502,19 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
|
||||
}
|
||||
|
||||
auto vqid = gnm_vqid - 1;
|
||||
auto& asc_queue = asc_queues[{vqid}];
|
||||
const auto* acb_ptr = reinterpret_cast<const u32*>(asc_queue.map_addr + *asc_queue.read_addr);
|
||||
const auto acb_size = next_offs_dw ? (next_offs_dw << 2u) - *asc_queue.read_addr
|
||||
: (asc_queue.ring_size_dw << 2u) - *asc_queue.read_addr;
|
||||
const std::span acb_span{acb_ptr, acb_size >> 2u};
|
||||
auto& asc_queue = liverpool->asc_queues[{vqid}];
|
||||
|
||||
const auto& offs_dw = asc_next_offs_dw[vqid];
|
||||
|
||||
if (next_offs_dw < offs_dw) {
|
||||
ASSERT_MSG(next_offs_dw == 0, "ACB submission is split at the end of ring buffer");
|
||||
}
|
||||
|
||||
const auto* acb_ptr = reinterpret_cast<const u32*>(asc_queue.map_addr) + offs_dw;
|
||||
const auto acb_size_dw = (next_offs_dw ? next_offs_dw : asc_queue.ring_size_dw) - offs_dw;
|
||||
const std::span acb_span{acb_ptr, acb_size_dw};
|
||||
|
||||
asc_next_offs_dw[vqid] = next_offs_dw;
|
||||
|
||||
if (DebugState.DumpingCurrentFrame()) {
|
||||
static auto last_frame_num = -1LL;
|
||||
@ -545,9 +549,6 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
|
||||
});
|
||||
}
|
||||
liverpool->SubmitAsc(gnm_vqid, acb_span);
|
||||
|
||||
*asc_queue.read_addr += acb_size;
|
||||
*asc_queue.read_addr %= asc_queue.ring_size_dw * 4;
|
||||
}
|
||||
|
||||
void PS4_SYSV_ABI sceGnmDingDongForWorkload(u32 gnm_vqid, u32 next_offs_dw, u64 workload_id) {
|
||||
@ -1266,12 +1267,16 @@ int PS4_SYSV_ABI sceGnmMapComputeQueue(u32 pipe_id, u32 queue_id, VAddr ring_bas
|
||||
return ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_READ_PTR_ADDR;
|
||||
}
|
||||
|
||||
auto vqid = asc_queues.insert(VAddr(ring_base_addr), read_ptr_addr, ring_size_dw);
|
||||
const auto vqid =
|
||||
liverpool->asc_queues.insert(VAddr(ring_base_addr), read_ptr_addr, ring_size_dw, pipe_id);
|
||||
// We need to offset index as `dingDong` assumes it to be from the range [1..64]
|
||||
const auto gnm_vqid = vqid.index + 1;
|
||||
LOG_INFO(Lib_GnmDriver, "ASC pipe {} queue {} mapped to vqueue {}", pipe_id, queue_id,
|
||||
gnm_vqid);
|
||||
|
||||
const auto& queue = liverpool->asc_queues[vqid];
|
||||
*queue.read_addr = 0u;
|
||||
|
||||
return gnm_vqid;
|
||||
}
|
||||
|
||||
@ -1642,7 +1647,6 @@ s32 PS4_SYSV_ABI sceGnmSetGsShader(u32* cmdbuf, u32 size, const u32* gs_regs) {
|
||||
|
||||
s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u32 param4) {
|
||||
LOG_TRACE(Lib_GnmDriver, "called");
|
||||
|
||||
if (!cmdbuf || size < 0x1E) {
|
||||
return -1;
|
||||
}
|
||||
@ -1660,11 +1664,13 @@ s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u3
|
||||
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x108u, hs_regs[0], 0u); // SPI_SHADER_PGM_LO_HS
|
||||
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x10au, hs_regs[2],
|
||||
hs_regs[3]); // SPI_SHADER_PGM_RSRC1_HS/SPI_SHADER_PGM_RSRC2_HS
|
||||
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u, hs_regs[5],
|
||||
hs_regs[5]); // VGT_HOS_MAX_TESS_LEVEL
|
||||
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u,
|
||||
hs_regs[5], // VGT_HOS_MAX_TESS_LEVEL
|
||||
hs_regs[6]); // VGT_HOS_MIN_TESS_LEVEL
|
||||
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2dbu, hs_regs[4]); // VGT_TF_PARAM
|
||||
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2d6u, param4); // VGT_LS_HS_CONFIG
|
||||
|
||||
// right padding?
|
||||
WriteTrailingNop<11>(cmdbuf);
|
||||
return ORBIS_OK;
|
||||
}
|
||||
@ -2161,6 +2167,7 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(u32 workload, u32 count,
|
||||
u32* dcb_sizes_in_bytes,
|
||||
const u32* ccb_gpu_addrs[],
|
||||
u32* ccb_sizes_in_bytes) {
|
||||
HLE_TRACE;
|
||||
LOG_DEBUG(Lib_GnmDriver, "called");
|
||||
|
||||
if (!dcb_gpu_addrs || !dcb_sizes_in_bytes) {
|
||||
@ -2253,6 +2260,7 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmSubmitDone() {
|
||||
HLE_TRACE;
|
||||
LOG_DEBUG(Lib_GnmDriver, "called");
|
||||
WaitGpuIdle();
|
||||
if (!liverpool->IsGpuIdle()) {
|
||||
|
@ -695,12 +695,66 @@ static int GetDents(int fd, char* buf, int nbytes, s64* basep) {
|
||||
return sizeof(OrbisKernelDirent);
|
||||
}
|
||||
|
||||
static int HandleSeparateUpdateDents(int fd, char* buf, int nbytes, s64* basep) {
|
||||
int dir_entries = 0;
|
||||
|
||||
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
|
||||
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
|
||||
auto* file = h->GetFile(fd);
|
||||
auto update_dir_name = std::string{fmt::UTF(file->m_host_name.u8string()).data};
|
||||
auto mount = mnt->GetMountFromHostPath(update_dir_name);
|
||||
auto suffix = std::string{fmt::UTF(mount->host_path.u8string()).data};
|
||||
|
||||
size_t pos = update_dir_name.find("-UPDATE");
|
||||
if (pos != std::string::npos) {
|
||||
update_dir_name.erase(pos, 7);
|
||||
auto guest_name = mount->mount + "/" + update_dir_name.substr(suffix.size() + 1);
|
||||
int descriptor;
|
||||
|
||||
auto existent_folder = h->GetFile(update_dir_name);
|
||||
if (!existent_folder) {
|
||||
u32 handle = h->CreateHandle();
|
||||
auto* new_file = h->GetFile(handle);
|
||||
new_file->type = Core::FileSys::FileType::Directory;
|
||||
new_file->m_guest_name = guest_name;
|
||||
new_file->m_host_name = update_dir_name;
|
||||
if (!std::filesystem::is_directory(new_file->m_host_name)) {
|
||||
h->DeleteHandle(handle);
|
||||
return dir_entries;
|
||||
} else {
|
||||
new_file->dirents = GetDirectoryEntries(new_file->m_host_name);
|
||||
new_file->dirents_index = 0;
|
||||
}
|
||||
new_file->is_opened = true;
|
||||
descriptor = h->GetFileDescriptor(new_file);
|
||||
} else {
|
||||
descriptor = h->GetFileDescriptor(existent_folder);
|
||||
}
|
||||
|
||||
dir_entries = GetDents(descriptor, buf, nbytes, basep);
|
||||
if (dir_entries == ORBIS_OK && existent_folder) {
|
||||
existent_folder->dirents_index = 0;
|
||||
file->dirents_index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return dir_entries;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceKernelGetdents(int fd, char* buf, int nbytes) {
|
||||
return GetDents(fd, buf, nbytes, nullptr);
|
||||
int a = GetDents(fd, buf, nbytes, nullptr);
|
||||
if (a == ORBIS_OK) {
|
||||
return HandleSeparateUpdateDents(fd, buf, nbytes, nullptr);
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceKernelGetdirentries(int fd, char* buf, int nbytes, s64* basep) {
|
||||
return GetDents(fd, buf, nbytes, basep);
|
||||
int a = GetDents(fd, buf, nbytes, basep);
|
||||
if (a == ORBIS_OK) {
|
||||
return HandleSeparateUpdateDents(fd, buf, nbytes, basep);
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
s64 PS4_SYSV_ABI sceKernelPwrite(int d, void* buf, size_t nbytes, s64 offset) {
|
||||
|
@ -327,7 +327,7 @@ void PS4_SYSV_ABI sched_yield() {
|
||||
std::this_thread::yield();
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI posix_pthread_once(PthreadOnce* once_control, void (*init_routine)()) {
|
||||
int PS4_SYSV_ABI posix_pthread_once(PthreadOnce* once_control, void PS4_SYSV_ABI (*init_routine)()) {
|
||||
for (;;) {
|
||||
auto state = once_control->state.load();
|
||||
if (state == PthreadOnceState::Done) {
|
||||
|
@ -38,21 +38,22 @@ void TrophyUI::Finish() {
|
||||
void TrophyUI::Draw() {
|
||||
const auto& io = GetIO();
|
||||
|
||||
float AdjustWidth = io.DisplaySize.x / 1280;
|
||||
float AdjustHeight = io.DisplaySize.y / 720;
|
||||
const ImVec2 window_size{
|
||||
std::min(io.DisplaySize.x, 250.f),
|
||||
std::min(io.DisplaySize.y, 70.f),
|
||||
std::min(io.DisplaySize.x, (300 * AdjustWidth)),
|
||||
std::min(io.DisplaySize.y, (70 * AdjustHeight)),
|
||||
};
|
||||
|
||||
SetNextWindowSize(window_size);
|
||||
SetNextWindowCollapsed(false);
|
||||
SetNextWindowPos(ImVec2(io.DisplaySize.x - 250, 50));
|
||||
SetNextWindowPos(ImVec2(io.DisplaySize.x - (300 * AdjustWidth), (50 * AdjustHeight)));
|
||||
KeepNavHighlight();
|
||||
|
||||
if (Begin("Trophy Window", nullptr,
|
||||
ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_NoSavedSettings |
|
||||
ImGuiWindowFlags_NoInputs)) {
|
||||
if (trophy_icon) {
|
||||
Image(trophy_icon.GetTexture().im_id, ImVec2(50, 50));
|
||||
Image(trophy_icon.GetTexture().im_id, ImVec2((50 * AdjustWidth), (50 * AdjustHeight)));
|
||||
ImGui::SameLine();
|
||||
} else {
|
||||
// placeholder
|
||||
@ -61,6 +62,7 @@ void TrophyUI::Draw() {
|
||||
GetColorU32(ImVec4{0.7f}));
|
||||
ImGui::Indent(60);
|
||||
}
|
||||
SetWindowFontScale((1.2 * AdjustHeight));
|
||||
TextWrapped("Trophy earned!\n%s", trophy_name.c_str());
|
||||
}
|
||||
End();
|
||||
|
@ -1,6 +1,7 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <set>
|
||||
#include <fmt/core.h>
|
||||
|
||||
#include "common/config.h"
|
||||
@ -106,9 +107,11 @@ Emulator::~Emulator() {
|
||||
void Emulator::Run(const std::filesystem::path& file) {
|
||||
|
||||
// Use the eboot from the separated updates folder if it's there
|
||||
std::filesystem::path game_patch_folder = file.parent_path().concat("-UPDATE");
|
||||
bool use_game_patch = std::filesystem::exists(game_patch_folder / "sce_sys");
|
||||
std::filesystem::path eboot_path = use_game_patch ? game_patch_folder / file.filename() : file;
|
||||
std::filesystem::path game_patch_folder = file.parent_path();
|
||||
game_patch_folder += "-UPDATE";
|
||||
std::filesystem::path eboot_path = std::filesystem::exists(game_patch_folder / file.filename())
|
||||
? game_patch_folder / file.filename()
|
||||
: file;
|
||||
|
||||
// Applications expect to be run from /app0 so mount the file's parent path as app0.
|
||||
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
|
||||
@ -226,20 +229,37 @@ void Emulator::Run(const std::filesystem::path& file) {
|
||||
LoadSystemModules(eboot_path, game_info.game_serial);
|
||||
|
||||
// Load all prx from game's sce_module folder
|
||||
std::filesystem::path sce_module_folder = file.parent_path() / "sce_module";
|
||||
if (std::filesystem::is_directory(sce_module_folder)) {
|
||||
for (const auto& entry : std::filesystem::directory_iterator(sce_module_folder)) {
|
||||
std::filesystem::path module_path = entry.path();
|
||||
std::filesystem::path update_module_path =
|
||||
eboot_path.parent_path() / "sce_module" / entry.path().filename();
|
||||
if (std::filesystem::exists(update_module_path) && use_game_patch) {
|
||||
module_path = update_module_path;
|
||||
std::vector<std::filesystem::path> modules_to_load;
|
||||
std::filesystem::path game_module_folder = file.parent_path() / "sce_module";
|
||||
if (std::filesystem::is_directory(game_module_folder)) {
|
||||
for (const auto& entry : std::filesystem::directory_iterator(game_module_folder)) {
|
||||
if (entry.is_regular_file()) {
|
||||
modules_to_load.push_back(entry.path());
|
||||
}
|
||||
LOG_INFO(Loader, "Loading {}", fmt::UTF(module_path.u8string()));
|
||||
linker->LoadModule(module_path);
|
||||
}
|
||||
}
|
||||
|
||||
// Load all prx from separate update's sce_module folder
|
||||
std::filesystem::path update_module_folder = game_patch_folder / "sce_module";
|
||||
if (std::filesystem::is_directory(update_module_folder)) {
|
||||
for (const auto& entry : std::filesystem::directory_iterator(update_module_folder)) {
|
||||
auto it = std::find_if(modules_to_load.begin(), modules_to_load.end(),
|
||||
[&entry](const std::filesystem::path& p) {
|
||||
return p.filename() == entry.path().filename();
|
||||
});
|
||||
if (it != modules_to_load.end()) {
|
||||
*it = entry.path();
|
||||
} else {
|
||||
modules_to_load.push_back(entry.path());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& module_path : modules_to_load) {
|
||||
LOG_INFO(Loader, "Loading {}", fmt::UTF(module_path.u8string()));
|
||||
linker->LoadModule(module_path);
|
||||
}
|
||||
|
||||
#ifdef ENABLE_DISCORD_RPC
|
||||
// Discord RPC
|
||||
if (Config::getEnableDiscordRPC()) {
|
||||
|
@ -123,7 +123,7 @@ void GameListFrame::PopulateGameList() {
|
||||
formattedPlayTime = formattedPlayTime.trimmed();
|
||||
m_game_info->m_games[i].play_time = playTime.toStdString();
|
||||
if (formattedPlayTime.isEmpty()) {
|
||||
SetTableItem(i, 7, "0");
|
||||
SetTableItem(i, 7, QString("%1s").arg(seconds));
|
||||
} else {
|
||||
SetTableItem(i, 7, formattedPlayTime);
|
||||
}
|
||||
|
@ -122,11 +122,11 @@ public:
|
||||
|
||||
if (selected == &openSfoViewer) {
|
||||
PSF psf;
|
||||
QString game_update_path;
|
||||
Common::FS::PathToQString(game_update_path, m_games[itemID].path.concat("-UPDATE"));
|
||||
std::filesystem::path game_folder_path = m_games[itemID].path;
|
||||
if (std::filesystem::exists(Common::FS::PathFromQString(game_update_path))) {
|
||||
game_folder_path = Common::FS::PathFromQString(game_update_path);
|
||||
std::filesystem::path game_update_path = game_folder_path;
|
||||
game_update_path += "UPDATE";
|
||||
if (std::filesystem::exists(game_update_path)) {
|
||||
game_folder_path = game_update_path;
|
||||
}
|
||||
if (psf.Open(game_folder_path / "sce_sys" / "param.sfo")) {
|
||||
int rows = psf.GetEntries().size();
|
||||
@ -320,21 +320,17 @@ public:
|
||||
bool error = false;
|
||||
QString folder_path, game_update_path, dlc_path;
|
||||
Common::FS::PathToQString(folder_path, m_games[itemID].path);
|
||||
Common::FS::PathToQString(game_update_path, m_games[itemID].path.concat("-UPDATE"));
|
||||
game_update_path = folder_path + "-UPDATE";
|
||||
Common::FS::PathToQString(
|
||||
dlc_path, Config::getAddonInstallDir() /
|
||||
Common::FS::PathFromQString(folder_path).parent_path().filename());
|
||||
QString message_type = tr("Game");
|
||||
|
||||
if (selected == deleteUpdate) {
|
||||
if (!Config::getSeparateUpdateEnabled()) {
|
||||
QMessageBox::critical(nullptr, tr("Error"),
|
||||
QString(tr("requiresEnableSeparateUpdateFolder_MSG")));
|
||||
error = true;
|
||||
} else if (!std::filesystem::exists(
|
||||
Common::FS::PathFromQString(game_update_path))) {
|
||||
QMessageBox::critical(nullptr, tr("Error"),
|
||||
QString(tr("This game has no update to delete!")));
|
||||
if (!std::filesystem::exists(Common::FS::PathFromQString(game_update_path))) {
|
||||
QMessageBox::critical(
|
||||
nullptr, tr("Error"),
|
||||
QString(tr("This game has no separate update to delete!")));
|
||||
error = true;
|
||||
} else {
|
||||
folder_path = game_update_path;
|
||||
|
@ -115,6 +115,7 @@ void MainWindow::CreateActions() {
|
||||
m_theme_act_group->addAction(ui->setThemeGreen);
|
||||
m_theme_act_group->addAction(ui->setThemeBlue);
|
||||
m_theme_act_group->addAction(ui->setThemeViolet);
|
||||
m_theme_act_group->addAction(ui->setThemeGruvbox);
|
||||
}
|
||||
|
||||
void MainWindow::AddUiWidgets() {
|
||||
@ -550,6 +551,14 @@ void MainWindow::CreateConnects() {
|
||||
isIconBlack = false;
|
||||
}
|
||||
});
|
||||
connect(ui->setThemeGruvbox, &QAction::triggered, &m_window_themes, [this]() {
|
||||
m_window_themes.SetWindowTheme(Theme::Gruvbox, ui->mw_searchbar);
|
||||
Config::setMainWindowTheme(static_cast<int>(Theme::Gruvbox));
|
||||
if (isIconBlack) {
|
||||
SetUiIcons(false);
|
||||
isIconBlack = false;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void MainWindow::StartGame() {
|
||||
@ -923,6 +932,11 @@ void MainWindow::SetLastUsedTheme() {
|
||||
isIconBlack = false;
|
||||
SetUiIcons(false);
|
||||
break;
|
||||
case Theme::Gruvbox:
|
||||
ui->setThemeGruvbox->setChecked(true);
|
||||
isIconBlack = false;
|
||||
SetUiIcons(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8,14 +8,15 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) {
|
||||
|
||||
switch (theme) {
|
||||
case Theme::Dark:
|
||||
mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background
|
||||
"color: #ffffff;" // White text
|
||||
"border: 2px solid #ffffff;" // White border
|
||||
"padding: 5px;");
|
||||
mw_searchbar->setStyleSheet(
|
||||
"QLineEdit {"
|
||||
"background-color: #1e1e1e; color: #ffffff; border: 1px solid #ffffff; "
|
||||
"border-radius: 4px; padding: 5px; }"
|
||||
"QLineEdit:focus {"
|
||||
"border: 1px solid #2A82DA; }");
|
||||
themePalette.setColor(QPalette::Window, QColor(50, 50, 50));
|
||||
themePalette.setColor(QPalette::WindowText, Qt::white);
|
||||
themePalette.setColor(QPalette::Base, QColor(20, 20, 20));
|
||||
themePalette.setColor(QPalette::AlternateBase, QColor(25, 25, 25));
|
||||
themePalette.setColor(QPalette::AlternateBase, QColor(53, 53, 53));
|
||||
themePalette.setColor(QPalette::ToolTipBase, Qt::white);
|
||||
themePalette.setColor(QPalette::ToolTipText, Qt::white);
|
||||
@ -28,12 +29,13 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) {
|
||||
themePalette.setColor(QPalette::HighlightedText, Qt::black);
|
||||
qApp->setPalette(themePalette);
|
||||
break;
|
||||
|
||||
case Theme::Light:
|
||||
mw_searchbar->setStyleSheet("background-color: #ffffff;" // Light gray background
|
||||
"color: #000000;" // Black text
|
||||
"border: 2px solid #000000;" // Black border
|
||||
"padding: 5px;");
|
||||
mw_searchbar->setStyleSheet(
|
||||
"QLineEdit {"
|
||||
"background-color: #ffffff; color: #000000; border: 1px solid #000000; "
|
||||
"border-radius: 4px; padding: 5px; }"
|
||||
"QLineEdit:focus {"
|
||||
"border: 1px solid #2A82DA; }");
|
||||
themePalette.setColor(QPalette::Window, QColor(240, 240, 240)); // Light gray
|
||||
themePalette.setColor(QPalette::WindowText, Qt::black); // Black
|
||||
themePalette.setColor(QPalette::Base, QColor(230, 230, 230, 80)); // Grayish
|
||||
@ -48,12 +50,13 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) {
|
||||
themePalette.setColor(QPalette::HighlightedText, Qt::white); // White
|
||||
qApp->setPalette(themePalette);
|
||||
break;
|
||||
|
||||
case Theme::Green:
|
||||
mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background
|
||||
"color: #ffffff;" // White text
|
||||
"border: 2px solid #ffffff;" // White border
|
||||
"padding: 5px;");
|
||||
mw_searchbar->setStyleSheet(
|
||||
"QLineEdit {"
|
||||
"background-color: #192819; color: #ffffff; border: 1px solid #ffffff; "
|
||||
"border-radius: 4px; padding: 5px; }"
|
||||
"QLineEdit:focus {"
|
||||
"border: 1px solid #2A82DA; }");
|
||||
themePalette.setColor(QPalette::Window, QColor(53, 69, 53)); // Dark green background
|
||||
themePalette.setColor(QPalette::WindowText, Qt::white); // White text
|
||||
themePalette.setColor(QPalette::Base, QColor(25, 40, 25)); // Darker green base
|
||||
@ -68,15 +71,15 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) {
|
||||
themePalette.setColor(QPalette::Link, QColor(42, 130, 218)); // Light blue links
|
||||
themePalette.setColor(QPalette::Highlight, QColor(42, 130, 218)); // Light blue highlight
|
||||
themePalette.setColor(QPalette::HighlightedText, Qt::black); // Black highlighted text
|
||||
|
||||
qApp->setPalette(themePalette);
|
||||
break;
|
||||
|
||||
case Theme::Blue:
|
||||
mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background
|
||||
"color: #ffffff;" // White text
|
||||
"border: 2px solid #ffffff;" // White border
|
||||
"padding: 5px;");
|
||||
mw_searchbar->setStyleSheet(
|
||||
"QLineEdit {"
|
||||
"background-color: #14283c; color: #ffffff; border: 1px solid #ffffff; "
|
||||
"border-radius: 4px; padding: 5px; }"
|
||||
"QLineEdit:focus {"
|
||||
"border: 1px solid #2A82DA; }");
|
||||
themePalette.setColor(QPalette::Window, QColor(40, 60, 90)); // Dark blue background
|
||||
themePalette.setColor(QPalette::WindowText, Qt::white); // White text
|
||||
themePalette.setColor(QPalette::Base, QColor(20, 40, 60)); // Darker blue base
|
||||
@ -94,12 +97,13 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) {
|
||||
|
||||
qApp->setPalette(themePalette);
|
||||
break;
|
||||
|
||||
case Theme::Violet:
|
||||
mw_searchbar->setStyleSheet("background-color: #1e1e1e;" // Dark background
|
||||
"color: #ffffff;" // White text
|
||||
"border: 2px solid #ffffff;" // White border
|
||||
"padding: 5px;");
|
||||
mw_searchbar->setStyleSheet(
|
||||
"QLineEdit {"
|
||||
"background-color: #501e5a; color: #ffffff; border: 1px solid #ffffff; "
|
||||
"border-radius: 4px; padding: 5px; }"
|
||||
"QLineEdit:focus {"
|
||||
"border: 1px solid #2A82DA; }");
|
||||
themePalette.setColor(QPalette::Window, QColor(100, 50, 120)); // Violet background
|
||||
themePalette.setColor(QPalette::WindowText, Qt::white); // White text
|
||||
themePalette.setColor(QPalette::Base, QColor(80, 30, 90)); // Darker violet base
|
||||
@ -115,6 +119,28 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) {
|
||||
themePalette.setColor(QPalette::Highlight, QColor(42, 130, 218)); // Light blue highlight
|
||||
themePalette.setColor(QPalette::HighlightedText, Qt::black); // Black highlighted text
|
||||
|
||||
qApp->setPalette(themePalette);
|
||||
break;
|
||||
case Theme::Gruvbox:
|
||||
mw_searchbar->setStyleSheet(
|
||||
"QLineEdit {"
|
||||
"background-color: #1d2021; color: #f9f5d7; border: 1px solid #f9f5d7; "
|
||||
"border-radius: 4px; padding: 5px; }"
|
||||
"QLineEdit:focus {"
|
||||
"border: 1px solid #83A598; }");
|
||||
themePalette.setColor(QPalette::Window, QColor(29, 32, 33));
|
||||
themePalette.setColor(QPalette::WindowText, QColor(249, 245, 215));
|
||||
themePalette.setColor(QPalette::Base, QColor(29, 32, 33));
|
||||
themePalette.setColor(QPalette::AlternateBase, QColor(50, 48, 47));
|
||||
themePalette.setColor(QPalette::ToolTipBase, QColor(249, 245, 215));
|
||||
themePalette.setColor(QPalette::ToolTipText, QColor(249, 245, 215));
|
||||
themePalette.setColor(QPalette::Text, QColor(249, 245, 215));
|
||||
themePalette.setColor(QPalette::Button, QColor(40, 40, 40));
|
||||
themePalette.setColor(QPalette::ButtonText, QColor(249, 245, 215));
|
||||
themePalette.setColor(QPalette::BrightText, QColor(251, 73, 52));
|
||||
themePalette.setColor(QPalette::Link, QColor(131, 165, 152));
|
||||
themePalette.setColor(QPalette::Highlight, QColor(131, 165, 152));
|
||||
themePalette.setColor(QPalette::HighlightedText, Qt::black);
|
||||
qApp->setPalette(themePalette);
|
||||
break;
|
||||
}
|
||||
|
@ -7,13 +7,7 @@
|
||||
#include <QLineEdit>
|
||||
#include <QWidget>
|
||||
|
||||
enum class Theme : int {
|
||||
Dark,
|
||||
Light,
|
||||
Green,
|
||||
Blue,
|
||||
Violet,
|
||||
};
|
||||
enum class Theme : int { Dark, Light, Green, Blue, Violet, Gruvbox };
|
||||
|
||||
class WindowThemes : public QObject {
|
||||
Q_OBJECT
|
||||
|
@ -36,6 +36,7 @@ public:
|
||||
QAction* setThemeGreen;
|
||||
QAction* setThemeBlue;
|
||||
QAction* setThemeViolet;
|
||||
QAction* setThemeGruvbox;
|
||||
QWidget* centralWidget;
|
||||
QLineEdit* mw_searchbar;
|
||||
QPushButton* playButton;
|
||||
@ -158,6 +159,9 @@ public:
|
||||
setThemeViolet = new QAction(MainWindow);
|
||||
setThemeViolet->setObjectName("setThemeViolet");
|
||||
setThemeViolet->setCheckable(true);
|
||||
setThemeGruvbox = new QAction(MainWindow);
|
||||
setThemeGruvbox->setObjectName("setThemeGruvbox");
|
||||
setThemeGruvbox->setCheckable(true);
|
||||
centralWidget = new QWidget(MainWindow);
|
||||
centralWidget->setObjectName("centralWidget");
|
||||
sizePolicy.setHeightForWidth(centralWidget->sizePolicy().hasHeightForWidth());
|
||||
@ -282,6 +286,7 @@ public:
|
||||
menuThemes->addAction(setThemeGreen);
|
||||
menuThemes->addAction(setThemeBlue);
|
||||
menuThemes->addAction(setThemeViolet);
|
||||
menuThemes->addAction(setThemeGruvbox);
|
||||
menuGame_List_Icons->addAction(setIconSizeTinyAct);
|
||||
menuGame_List_Icons->addAction(setIconSizeSmallAct);
|
||||
menuGame_List_Icons->addAction(setIconSizeMediumAct);
|
||||
@ -368,6 +373,7 @@ public:
|
||||
setThemeGreen->setText(QCoreApplication::translate("MainWindow", "Green", nullptr));
|
||||
setThemeBlue->setText(QCoreApplication::translate("MainWindow", "Blue", nullptr));
|
||||
setThemeViolet->setText(QCoreApplication::translate("MainWindow", "Violet", nullptr));
|
||||
setThemeGruvbox->setText("Gruvbox");
|
||||
toolBar->setWindowTitle(QCoreApplication::translate("MainWindow", "toolBar", nullptr));
|
||||
} // retranslateUi
|
||||
};
|
||||
|
@ -1159,7 +1159,7 @@
|
||||
<message>
|
||||
<location filename="../settings_dialog.cpp" line="293"/>
|
||||
<source>separateUpdatesCheckBox</source>
|
||||
<translation>Enable Separate Update Folder:\nEnables installing game updates into a separate folder for easy management.</translation>
|
||||
<translation>Enable Separate Update Folder:\nEnables installing game updates into a separate folder for easy management.\nThis can be manually created by adding the extracted update to the game folder with the name "CUSA00000-UPDATE" where the CUSA ID matches the game's ID.</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../settings_dialog.cpp" line="295"/>
|
||||
|
@ -2,6 +2,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "SDL3/SDL_events.h"
|
||||
#include "SDL3/SDL_hints.h"
|
||||
#include "SDL3/SDL_init.h"
|
||||
#include "SDL3/SDL_properties.h"
|
||||
#include "SDL3/SDL_timer.h"
|
||||
@ -33,6 +34,9 @@ static Uint32 SDLCALL PollController(void* userdata, SDL_TimerID timer_id, Uint3
|
||||
WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_,
|
||||
std::string_view window_title)
|
||||
: width{width_}, height{height_}, controller{controller_} {
|
||||
if (!SDL_SetHint(SDL_HINT_APP_NAME, "shadPS4")) {
|
||||
UNREACHABLE_MSG("Failed to set SDL window hint: {}", SDL_GetError());
|
||||
}
|
||||
if (!SDL_Init(SDL_INIT_VIDEO)) {
|
||||
UNREACHABLE_MSG("Failed to initialize SDL video subsystem: {}", SDL_GetError());
|
||||
}
|
||||
|
@ -1,6 +1,5 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <span>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
@ -13,6 +12,7 @@
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
@ -72,7 +72,10 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
|
||||
return arg.VectorReg();
|
||||
} else if constexpr (std::is_same_v<ArgType, const char*>) {
|
||||
return arg.StringLiteral();
|
||||
} else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
|
||||
return arg.Patch();
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
template <auto func, bool is_first_arg_inst, size_t... I>
|
||||
@ -206,6 +209,32 @@ Id DefineMain(EmitContext& ctx, const IR::Program& program) {
|
||||
return main;
|
||||
}
|
||||
|
||||
spv::ExecutionMode ExecutionMode(AmdGpu::TessellationType primitive) {
|
||||
switch (primitive) {
|
||||
case AmdGpu::TessellationType::Isoline:
|
||||
return spv::ExecutionMode::Isolines;
|
||||
case AmdGpu::TessellationType::Triangle:
|
||||
return spv::ExecutionMode::Triangles;
|
||||
case AmdGpu::TessellationType::Quad:
|
||||
return spv::ExecutionMode::Quads;
|
||||
}
|
||||
UNREACHABLE_MSG("Tessellation primitive {}", primitive);
|
||||
}
|
||||
|
||||
spv::ExecutionMode ExecutionMode(AmdGpu::TessellationPartitioning spacing) {
|
||||
switch (spacing) {
|
||||
case AmdGpu::TessellationPartitioning::Integer:
|
||||
return spv::ExecutionMode::SpacingEqual;
|
||||
case AmdGpu::TessellationPartitioning::FracOdd:
|
||||
return spv::ExecutionMode::SpacingFractionalOdd;
|
||||
case AmdGpu::TessellationPartitioning::FracEven:
|
||||
return spv::ExecutionMode::SpacingFractionalEven;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
UNREACHABLE_MSG("Tessellation spacing {}", spacing);
|
||||
}
|
||||
|
||||
void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ctx) {
|
||||
ctx.AddCapability(spv::Capability::Image1D);
|
||||
ctx.AddCapability(spv::Capability::Sampled1D);
|
||||
@ -222,6 +251,10 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
|
||||
ctx.AddCapability(spv::Capability::StorageImageExtendedFormats);
|
||||
ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat);
|
||||
ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
|
||||
if (profile.supports_image_load_store_lod) {
|
||||
ctx.AddExtension("SPV_AMD_shader_image_load_store_lod");
|
||||
ctx.AddCapability(spv::Capability::ImageReadWriteLodAMD);
|
||||
}
|
||||
}
|
||||
if (info.has_texel_buffers) {
|
||||
ctx.AddCapability(spv::Capability::SampledBuffer);
|
||||
@ -244,36 +277,55 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
|
||||
if (info.uses_group_ballot) {
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniformBallot);
|
||||
}
|
||||
if (info.stage == Stage::Export || info.stage == Stage::Vertex) {
|
||||
const auto stage = info.l_stage;
|
||||
if (stage == LogicalStage::Vertex) {
|
||||
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
|
||||
ctx.AddCapability(spv::Capability::DrawParameters);
|
||||
}
|
||||
if (info.stage == Stage::Geometry) {
|
||||
if (stage == LogicalStage::Geometry) {
|
||||
ctx.AddCapability(spv::Capability::Geometry);
|
||||
}
|
||||
if (info.stage == Stage::Fragment && profile.needs_manual_interpolation) {
|
||||
ctx.AddExtension("SPV_KHR_fragment_shader_barycentric");
|
||||
ctx.AddCapability(spv::Capability::FragmentBarycentricKHR);
|
||||
}
|
||||
if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) {
|
||||
ctx.AddCapability(spv::Capability::Tessellation);
|
||||
}
|
||||
}
|
||||
|
||||
void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
||||
const auto& info = program.info;
|
||||
void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
|
||||
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
|
||||
spv::ExecutionModel execution_model{};
|
||||
switch (program.info.stage) {
|
||||
case Stage::Compute: {
|
||||
switch (info.l_stage) {
|
||||
case LogicalStage::Compute: {
|
||||
const std::array<u32, 3> workgroup_size{ctx.runtime_info.cs_info.workgroup_size};
|
||||
execution_model = spv::ExecutionModel::GLCompute;
|
||||
ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
|
||||
workgroup_size[1], workgroup_size[2]);
|
||||
break;
|
||||
}
|
||||
case Stage::Export:
|
||||
case Stage::Vertex:
|
||||
case LogicalStage::Vertex:
|
||||
execution_model = spv::ExecutionModel::Vertex;
|
||||
break;
|
||||
case Stage::Fragment:
|
||||
case LogicalStage::TessellationControl:
|
||||
execution_model = spv::ExecutionModel::TessellationControl;
|
||||
ctx.AddCapability(spv::Capability::Tessellation);
|
||||
ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
|
||||
ctx.runtime_info.hs_info.NumOutputControlPoints());
|
||||
break;
|
||||
case LogicalStage::TessellationEval: {
|
||||
execution_model = spv::ExecutionModel::TessellationEvaluation;
|
||||
const auto& vs_info = ctx.runtime_info.vs_info;
|
||||
ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_type));
|
||||
ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_partitioning));
|
||||
ctx.AddExecutionMode(main,
|
||||
vs_info.tess_topology == AmdGpu::TessellationTopology::TriangleCcw
|
||||
? spv::ExecutionMode::VertexOrderCcw
|
||||
: spv::ExecutionMode::VertexOrderCw);
|
||||
break;
|
||||
}
|
||||
case LogicalStage::Fragment:
|
||||
execution_model = spv::ExecutionModel::Fragment;
|
||||
if (ctx.profile.lower_left_origin_mode) {
|
||||
ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft);
|
||||
@ -288,7 +340,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
||||
ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
|
||||
}
|
||||
break;
|
||||
case Stage::Geometry:
|
||||
case LogicalStage::Geometry:
|
||||
execution_model = spv::ExecutionModel::Geometry;
|
||||
ctx.AddExecutionMode(main, GetInputPrimitiveType(ctx.runtime_info.gs_info.in_primitive));
|
||||
ctx.AddExecutionMode(main,
|
||||
@ -299,7 +351,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
||||
ctx.runtime_info.gs_info.num_invocations);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Stage {}", u32(program.info.stage));
|
||||
UNREACHABLE_MSG("Stage {}", u32(info.stage));
|
||||
}
|
||||
ctx.AddEntryPoint(execution_model, main, "main", interfaces);
|
||||
}
|
||||
@ -345,7 +397,7 @@ std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in
|
||||
const IR::Program& program, Bindings& binding) {
|
||||
EmitContext ctx{profile, runtime_info, program.info, binding};
|
||||
const Id main{DefineMain(ctx, program)};
|
||||
DefineEntryPoint(program, ctx, main);
|
||||
DefineEntryPoint(program.info, ctx, main);
|
||||
SetupCapabilities(program.info, profile, ctx);
|
||||
SetupFloatMode(ctx, profile, runtime_info, main);
|
||||
PatchPhiNodes(program, ctx);
|
||||
|
@ -18,9 +18,16 @@ void MemoryBarrier(EmitContext& ctx, spv::Scope scope) {
|
||||
|
||||
void EmitBarrier(EmitContext& ctx) {
|
||||
const auto execution{spv::Scope::Workgroup};
|
||||
const auto memory{spv::Scope::Workgroup};
|
||||
const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease |
|
||||
spv::MemorySemanticsMask::WorkgroupMemory};
|
||||
spv::Scope memory;
|
||||
spv::MemorySemanticsMask memory_semantics;
|
||||
if (ctx.l_stage == Shader::LogicalStage::TessellationControl) {
|
||||
memory = spv::Scope::Invocation;
|
||||
memory_semantics = spv::MemorySemanticsMask::MaskNone;
|
||||
} else {
|
||||
memory = spv::Scope::Workgroup;
|
||||
memory_semantics =
|
||||
spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::WorkgroupMemory;
|
||||
}
|
||||
ctx.OpControlBarrier(ctx.ConstU32(static_cast<u32>(execution)),
|
||||
ctx.ConstU32(static_cast<u32>(memory)),
|
||||
ctx.ConstU32(static_cast<u32>(memory_semantics)));
|
||||
|
@ -4,6 +4,9 @@
|
||||
#include "common/assert.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
#include "shader_recompiler/ir/patch.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
|
||||
#include <magic_enum/magic_enum.hpp>
|
||||
|
||||
@ -45,13 +48,19 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
|
||||
|
||||
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
||||
if (IR::IsParam(attr)) {
|
||||
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||
const auto& info{ctx.output_params.at(index)};
|
||||
ASSERT(info.num_components > 0);
|
||||
if (info.num_components == 1) {
|
||||
return info.id;
|
||||
const u32 attr_index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||
if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) {
|
||||
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]);
|
||||
return ctx.OpAccessChain(component_ptr, ctx.output_attr_array, ctx.ConstU32(attr_index),
|
||||
ctx.ConstU32(element));
|
||||
} else {
|
||||
return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element));
|
||||
const auto& info{ctx.output_params.at(attr_index)};
|
||||
ASSERT(info.num_components > 0);
|
||||
if (info.num_components == 1) {
|
||||
return info.id;
|
||||
} else {
|
||||
return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (IR::IsMrt(attr)) {
|
||||
@ -82,9 +91,13 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
||||
|
||||
std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr) {
|
||||
if (IR::IsParam(attr)) {
|
||||
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||
const auto& info{ctx.output_params.at(index)};
|
||||
return {info.component_type, info.is_integer};
|
||||
if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) {
|
||||
return {ctx.F32[1], false};
|
||||
} else {
|
||||
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||
const auto& info{ctx.output_params.at(index)};
|
||||
return {info.component_type, info.is_integer};
|
||||
}
|
||||
}
|
||||
if (IR::IsMrt(attr)) {
|
||||
const u32 index{u32(attr) - u32(IR::Attribute::RenderTarget0)};
|
||||
@ -171,12 +184,11 @@ Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
|
||||
rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
|
||||
}
|
||||
|
||||
Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
|
||||
Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
|
||||
if (IR::IsPosition(attr)) {
|
||||
ASSERT(attr == IR::Attribute::Position0);
|
||||
const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
||||
const auto pointer{
|
||||
ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index), ctx.ConstU32(0u))};
|
||||
const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, index, ctx.ConstU32(0u))};
|
||||
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
||||
return ctx.OpLoad(ctx.F32[1],
|
||||
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
|
||||
@ -186,7 +198,7 @@ Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u
|
||||
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
|
||||
const auto param = ctx.input_params.at(param_id).id;
|
||||
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
||||
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))};
|
||||
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)};
|
||||
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
||||
return ctx.OpLoad(ctx.F32[1],
|
||||
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
|
||||
@ -194,9 +206,27 @@ Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
|
||||
if (ctx.info.stage == Stage::Geometry) {
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
|
||||
if (ctx.info.l_stage == LogicalStage::Geometry) {
|
||||
return EmitGetAttributeForGeometry(ctx, attr, comp, index);
|
||||
} else if (ctx.info.l_stage == LogicalStage::TessellationControl ||
|
||||
ctx.info.l_stage == LogicalStage::TessellationEval) {
|
||||
if (IR::IsTessCoord(attr)) {
|
||||
const u32 component = attr == IR::Attribute::TessellationEvaluationPointU ? 0 : 1;
|
||||
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
||||
const auto pointer{
|
||||
ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))};
|
||||
return ctx.OpLoad(ctx.F32[1], pointer);
|
||||
} else if (IR::IsParam(attr)) {
|
||||
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
|
||||
const auto param = ctx.input_params.at(param_id).id;
|
||||
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
||||
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)};
|
||||
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
||||
return ctx.OpLoad(ctx.F32[1],
|
||||
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
if (IR::IsParam(attr)) {
|
||||
@ -242,8 +272,14 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
|
||||
}
|
||||
return coord;
|
||||
}
|
||||
case IR::Attribute::TessellationEvaluationPointU:
|
||||
return ctx.OpLoad(ctx.F32[1],
|
||||
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value));
|
||||
case IR::Attribute::TessellationEvaluationPointV:
|
||||
return ctx.OpLoad(ctx.F32[1],
|
||||
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.ConstU32(1U)));
|
||||
default:
|
||||
throw NotImplementedException("Read attribute {}", attr);
|
||||
UNREACHABLE_MSG("Read attribute {}", attr);
|
||||
}
|
||||
}
|
||||
|
||||
@ -266,10 +302,32 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
||||
return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value,
|
||||
ctx.u32_zero_value);
|
||||
case IR::Attribute::PrimitiveId:
|
||||
ASSERT(ctx.info.stage == Stage::Geometry);
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.primitive_id);
|
||||
case IR::Attribute::InvocationId:
|
||||
ASSERT(ctx.info.l_stage == LogicalStage::Geometry ||
|
||||
ctx.info.l_stage == LogicalStage::TessellationControl);
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
|
||||
case IR::Attribute::PatchVertices:
|
||||
ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl);
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.patch_vertices);
|
||||
case IR::Attribute::PackedHullInvocationInfo: {
|
||||
ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl);
|
||||
// [0:8]: patch id within VGT
|
||||
// [8:12]: output control point id
|
||||
// But 0:8 should be treated as 0 for attribute addressing purposes
|
||||
if (ctx.runtime_info.hs_info.IsPassthrough()) {
|
||||
// Gcn shader would run with 1 thread, but we need to run a thread for
|
||||
// each output control point.
|
||||
// If Gcn shader uses this value, we should make sure all threads in the
|
||||
// Vulkan shader use 0
|
||||
return ctx.ConstU32(0u);
|
||||
} else {
|
||||
const Id invocation_id = ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
|
||||
return ctx.OpShiftLeftLogical(ctx.U32[1], invocation_id, ctx.ConstU32(8u));
|
||||
}
|
||||
}
|
||||
default:
|
||||
throw NotImplementedException("Read U32 attribute {}", attr);
|
||||
UNREACHABLE_MSG("Read U32 attribute {}", attr);
|
||||
}
|
||||
}
|
||||
|
||||
@ -287,6 +345,58 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index) {
|
||||
const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
||||
return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.input_attr_array,
|
||||
vertex_index, attr_index, comp_index));
|
||||
}
|
||||
|
||||
void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index) {
|
||||
// Implied vertex index is invocation_id
|
||||
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]);
|
||||
Id pointer =
|
||||
ctx.OpAccessChain(component_ptr, ctx.output_attr_array,
|
||||
ctx.OpLoad(ctx.U32[1], ctx.invocation_id), attr_index, comp_index);
|
||||
ctx.OpStore(pointer, value);
|
||||
}
|
||||
|
||||
Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) {
|
||||
const u32 index{IR::GenericPatchIndex(patch)};
|
||||
const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))};
|
||||
const Id type{ctx.l_stage == LogicalStage::TessellationControl ? ctx.output_f32
|
||||
: ctx.input_f32};
|
||||
const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)};
|
||||
return ctx.OpLoad(ctx.F32[1], pointer);
|
||||
}
|
||||
|
||||
void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
|
||||
const Id pointer{[&] {
|
||||
if (IR::IsGeneric(patch)) {
|
||||
const u32 index{IR::GenericPatchIndex(patch)};
|
||||
const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))};
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element);
|
||||
}
|
||||
switch (patch) {
|
||||
case IR::Patch::TessellationLodLeft:
|
||||
case IR::Patch::TessellationLodRight:
|
||||
case IR::Patch::TessellationLodTop:
|
||||
case IR::Patch::TessellationLodBottom: {
|
||||
const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
|
||||
const Id index_id{ctx.ConstU32(index)};
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id);
|
||||
}
|
||||
case IR::Patch::TessellationLodInteriorU:
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner,
|
||||
ctx.u32_zero_value);
|
||||
case IR::Patch::TessellationLodInteriorV:
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.ConstU32(1u));
|
||||
default:
|
||||
UNREACHABLE_MSG("Patch {}", u32(patch));
|
||||
}
|
||||
}()};
|
||||
ctx.OpStore(pointer, value);
|
||||
}
|
||||
|
||||
template <u32 N>
|
||||
static Id EmitLoadBufferU32xN(EmitContext& ctx, u32 handle, Id address) {
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
|
@ -168,8 +168,8 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
||||
return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], texels) : texels;
|
||||
}
|
||||
|
||||
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset,
|
||||
Id lod, Id ms) {
|
||||
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod,
|
||||
const IR::Value& offset, Id ms) {
|
||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||
const Id result_type = texture.data_types->Get(4);
|
||||
@ -236,15 +236,22 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id
|
||||
return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], sample) : sample;
|
||||
}
|
||||
|
||||
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
|
||||
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id lod) {
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color) {
|
||||
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id color) {
|
||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||
const Id color_type = texture.data_types->Get(4);
|
||||
ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color));
|
||||
ImageOperands operands;
|
||||
if (ctx.profile.supports_image_load_store_lod) {
|
||||
operands.Add(spv::ImageOperandsMask::Lod, lod);
|
||||
} else if (Sirit::ValidId(lod)) {
|
||||
LOG_WARNING(Render, "Image write with LOD not supported by driver");
|
||||
}
|
||||
ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color), operands.mask,
|
||||
operands.operands);
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
@ -9,6 +9,7 @@
|
||||
namespace Shader::IR {
|
||||
enum class Attribute : u64;
|
||||
enum class ScalarReg : u32;
|
||||
enum class Patch : u64;
|
||||
class Inst;
|
||||
class Value;
|
||||
} // namespace Shader::IR
|
||||
@ -27,8 +28,6 @@ Id EmitConditionRef(EmitContext& ctx, const IR::Value& value);
|
||||
void EmitReference(EmitContext&);
|
||||
void EmitPhiMove(EmitContext&);
|
||||
void EmitJoin(EmitContext& ctx);
|
||||
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
|
||||
void EmitDeviceMemoryBarrier(EmitContext& ctx);
|
||||
void EmitGetScc(EmitContext& ctx);
|
||||
void EmitGetExec(EmitContext& ctx);
|
||||
void EmitGetVcc(EmitContext& ctx);
|
||||
@ -85,9 +84,13 @@ Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
|
||||
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index);
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index);
|
||||
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
|
||||
Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index);
|
||||
void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index);
|
||||
Id EmitGetPatch(EmitContext& ctx, IR::Patch patch);
|
||||
void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value);
|
||||
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
|
||||
void EmitSetSampleMask(EmitContext& ctx, Id value);
|
||||
void EmitSetFragDepth(EmitContext& ctx, Id value);
|
||||
@ -392,14 +395,14 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
||||
const IR::Value& offset);
|
||||
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
||||
const IR::Value& offset, Id dref);
|
||||
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset,
|
||||
Id lod, Id ms);
|
||||
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod,
|
||||
const IR::Value& offset, Id ms);
|
||||
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips);
|
||||
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords);
|
||||
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id derivatives_dx,
|
||||
Id derivatives_dy, const IR::Value& offset, const IR::Value& lod_clamp);
|
||||
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
|
||||
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color);
|
||||
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id lod);
|
||||
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id color);
|
||||
|
||||
Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||
#include "shader_recompiler/ir/passes/srt.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
@ -34,7 +35,7 @@ std::string_view StageName(Stage stage) {
|
||||
case Stage::Compute:
|
||||
return "cs";
|
||||
}
|
||||
throw InvalidArgument("Invalid stage {}", u32(stage));
|
||||
UNREACHABLE_MSG("Invalid hw stage {}", u32(stage));
|
||||
}
|
||||
|
||||
static constexpr u32 NumVertices(AmdGpu::PrimitiveType type) {
|
||||
@ -65,7 +66,7 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar
|
||||
EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
|
||||
const Info& info_, Bindings& binding_)
|
||||
: Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
|
||||
profile{profile_}, stage{info.stage}, binding{binding_} {
|
||||
profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} {
|
||||
AddCapability(spv::Capability::Shader);
|
||||
DefineArithmeticTypes();
|
||||
DefineInterfaces();
|
||||
@ -268,9 +269,8 @@ void EmitContext::DefineInputs() {
|
||||
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
|
||||
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
|
||||
}
|
||||
switch (stage) {
|
||||
case Stage::Export:
|
||||
case Stage::Vertex: {
|
||||
switch (l_stage) {
|
||||
case LogicalStage::Vertex: {
|
||||
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
|
||||
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
|
||||
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
|
||||
@ -294,7 +294,7 @@ void EmitContext::DefineInputs() {
|
||||
});
|
||||
// Note that we pass index rather than Id
|
||||
input_params[attrib.semantic] = SpirvAttribute{
|
||||
.id = rate_idx,
|
||||
.id = {rate_idx},
|
||||
.pointer_type = input_u32,
|
||||
.component_type = U32[1],
|
||||
.num_components = std::min<u16>(attrib.num_elements, num_components),
|
||||
@ -311,12 +311,11 @@ void EmitContext::DefineInputs() {
|
||||
}
|
||||
input_params[attrib.semantic] =
|
||||
GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false);
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Stage::Fragment:
|
||||
case LogicalStage::Fragment:
|
||||
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
|
||||
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
|
||||
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
||||
@ -351,15 +350,14 @@ void EmitContext::DefineInputs() {
|
||||
}
|
||||
input_params[semantic] =
|
||||
GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false);
|
||||
interfaces.push_back(attr_id);
|
||||
}
|
||||
break;
|
||||
case Stage::Compute:
|
||||
case LogicalStage::Compute:
|
||||
workgroup_id = DefineVariable(U32[3], spv::BuiltIn::WorkgroupId, spv::StorageClass::Input);
|
||||
local_invocation_id =
|
||||
DefineVariable(U32[3], spv::BuiltIn::LocalInvocationId, spv::StorageClass::Input);
|
||||
break;
|
||||
case Stage::Geometry: {
|
||||
case LogicalStage::Geometry: {
|
||||
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
|
||||
const auto gl_per_vertex =
|
||||
Name(TypeStruct(TypeVector(F32[1], 4), F32[1], TypeArray(F32[1], ConstU32(1u))),
|
||||
@ -383,9 +381,50 @@ void EmitContext::DefineInputs() {
|
||||
for (int param_id = 0; param_id < num_params; ++param_id) {
|
||||
const Id type{TypeArray(F32[4], ConstU32(num_verts_in))};
|
||||
const Id id{DefineInput(type, param_id)};
|
||||
Name(id, fmt::format("in_attr{}", param_id));
|
||||
Name(id, fmt::format("gs_in_attr{}", param_id));
|
||||
input_params[param_id] = {id, input_f32, F32[1], 4};
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case LogicalStage::TessellationControl: {
|
||||
invocation_id =
|
||||
DefineVariable(U32[1], spv::BuiltIn::InvocationId, spv::StorageClass::Input);
|
||||
patch_vertices =
|
||||
DefineVariable(U32[1], spv::BuiltIn::PatchVertices, spv::StorageClass::Input);
|
||||
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
|
||||
|
||||
const u32 num_attrs = runtime_info.hs_info.ls_stride >> 4;
|
||||
if (num_attrs > 0) {
|
||||
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
|
||||
// The input vertex count isn't statically known, so make length 32 (what glslang does)
|
||||
const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))};
|
||||
input_attr_array = DefineInput(patch_array_type, 0);
|
||||
Name(input_attr_array, "in_attrs");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case LogicalStage::TessellationEval: {
|
||||
tess_coord = DefineInput(F32[3], std::nullopt, spv::BuiltIn::TessCoord);
|
||||
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
|
||||
|
||||
const u32 num_attrs = runtime_info.vs_info.hs_output_cp_stride >> 4;
|
||||
if (num_attrs > 0) {
|
||||
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
|
||||
// The input vertex count isn't statically known, so make length 32 (what glslang does)
|
||||
const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))};
|
||||
input_attr_array = DefineInput(patch_array_type, 0);
|
||||
Name(input_attr_array, "in_attrs");
|
||||
}
|
||||
|
||||
u32 patch_base_location = runtime_info.vs_info.hs_output_cp_stride >> 4;
|
||||
for (size_t index = 0; index < 30; ++index) {
|
||||
if (!(info.uses_patches & (1U << index))) {
|
||||
continue;
|
||||
}
|
||||
const Id id{DefineInput(F32[4], patch_base_location + index)};
|
||||
Decorate(id, spv::Decoration::Patch);
|
||||
Name(id, fmt::format("patch_in{}", index));
|
||||
patches[index] = id;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -395,9 +434,81 @@ void EmitContext::DefineInputs() {
|
||||
}
|
||||
|
||||
void EmitContext::DefineOutputs() {
|
||||
switch (stage) {
|
||||
case Stage::Export:
|
||||
case Stage::Vertex: {
|
||||
switch (l_stage) {
|
||||
case LogicalStage::Vertex: {
|
||||
// No point in defining builtin outputs (i.e. position) unless next stage is fragment?
|
||||
// Might cause problems linking with tcs
|
||||
|
||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
||||
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
|
||||
info.stores.Get(IR::Attribute::Position2) ||
|
||||
info.stores.Get(IR::Attribute::Position3);
|
||||
if (has_extra_pos_stores) {
|
||||
const Id type{TypeArray(F32[1], ConstU32(8U))};
|
||||
clip_distances =
|
||||
DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output);
|
||||
cull_distances =
|
||||
DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output);
|
||||
}
|
||||
if (stage == Shader::Stage::Local && runtime_info.ls_info.links_with_tcs) {
|
||||
const u32 num_attrs = runtime_info.ls_info.ls_stride >> 4;
|
||||
if (num_attrs > 0) {
|
||||
const Id type{TypeArray(F32[4], ConstU32(num_attrs))};
|
||||
output_attr_array = DefineOutput(type, 0);
|
||||
Name(output_attr_array, "out_attrs");
|
||||
}
|
||||
} else {
|
||||
for (u32 i = 0; i < IR::NumParams; i++) {
|
||||
const IR::Attribute param{IR::Attribute::Param0 + i};
|
||||
if (!info.stores.GetAny(param)) {
|
||||
continue;
|
||||
}
|
||||
const u32 num_components = info.stores.NumComponents(param);
|
||||
const Id id{DefineOutput(F32[num_components], i)};
|
||||
Name(id, fmt::format("out_attr{}", i));
|
||||
output_params[i] =
|
||||
GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case LogicalStage::TessellationControl: {
|
||||
if (info.stores_tess_level_outer) {
|
||||
const Id type{TypeArray(F32[1], ConstU32(4U))};
|
||||
output_tess_level_outer =
|
||||
DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelOuter);
|
||||
Decorate(output_tess_level_outer, spv::Decoration::Patch);
|
||||
}
|
||||
if (info.stores_tess_level_inner) {
|
||||
const Id type{TypeArray(F32[1], ConstU32(2U))};
|
||||
output_tess_level_inner =
|
||||
DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelInner);
|
||||
Decorate(output_tess_level_inner, spv::Decoration::Patch);
|
||||
}
|
||||
|
||||
const u32 num_attrs = runtime_info.hs_info.hs_output_cp_stride >> 4;
|
||||
if (num_attrs > 0) {
|
||||
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
|
||||
// The input vertex count isn't statically known, so make length 32 (what glslang does)
|
||||
const Id patch_array_type{TypeArray(
|
||||
per_vertex_type, ConstU32(runtime_info.hs_info.NumOutputControlPoints()))};
|
||||
output_attr_array = DefineOutput(patch_array_type, 0);
|
||||
Name(output_attr_array, "out_attrs");
|
||||
}
|
||||
|
||||
u32 patch_base_location = runtime_info.hs_info.hs_output_cp_stride >> 4;
|
||||
for (size_t index = 0; index < 30; ++index) {
|
||||
if (!(info.uses_patches & (1U << index))) {
|
||||
continue;
|
||||
}
|
||||
const Id id{DefineOutput(F32[4], patch_base_location + index)};
|
||||
Decorate(id, spv::Decoration::Patch);
|
||||
Name(id, fmt::format("patch_out{}", index));
|
||||
patches[index] = id;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case LogicalStage::TessellationEval: {
|
||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
||||
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
|
||||
info.stores.Get(IR::Attribute::Position2) ||
|
||||
@ -419,11 +530,10 @@ void EmitContext::DefineOutputs() {
|
||||
Name(id, fmt::format("out_attr{}", i));
|
||||
output_params[i] =
|
||||
GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true);
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Stage::Fragment:
|
||||
case LogicalStage::Fragment:
|
||||
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
|
||||
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
|
||||
if (!info.stores.GetAny(mrt)) {
|
||||
@ -435,22 +545,22 @@ void EmitContext::DefineOutputs() {
|
||||
const Id id{DefineOutput(type, i)};
|
||||
Name(id, fmt::format("frag_color{}", i));
|
||||
frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true);
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
break;
|
||||
case Stage::Geometry: {
|
||||
case LogicalStage::Geometry: {
|
||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
||||
|
||||
for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) {
|
||||
const Id id{DefineOutput(F32[4], attr_id)};
|
||||
Name(id, fmt::format("out_attr{}", attr_id));
|
||||
output_params[attr_id] = {id, output_f32, F32[1], 4u};
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
case LogicalStage::Compute:
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
@ -586,6 +696,10 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
|
||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
|
||||
return spv::ImageFormat::R32ui;
|
||||
}
|
||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32 &&
|
||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Sint) {
|
||||
return spv::ImageFormat::R32i;
|
||||
}
|
||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32 &&
|
||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
||||
return spv::ImageFormat::R32f;
|
||||
|
@ -46,14 +46,18 @@ public:
|
||||
void DefineBufferOffsets();
|
||||
void DefineInterpolatedAttribs();
|
||||
|
||||
[[nodiscard]] Id DefineInput(Id type, u32 location) {
|
||||
const Id input_id{DefineVar(type, spv::StorageClass::Input)};
|
||||
Decorate(input_id, spv::Decoration::Location, location);
|
||||
[[nodiscard]] Id DefineInput(Id type, std::optional<u32> location = std::nullopt,
|
||||
std::optional<spv::BuiltIn> builtin = std::nullopt) {
|
||||
const Id input_id{DefineVariable(type, builtin, spv::StorageClass::Input)};
|
||||
if (location) {
|
||||
Decorate(input_id, spv::Decoration::Location, *location);
|
||||
}
|
||||
return input_id;
|
||||
}
|
||||
|
||||
[[nodiscard]] Id DefineOutput(Id type, std::optional<u32> location = std::nullopt) {
|
||||
const Id output_id{DefineVar(type, spv::StorageClass::Output)};
|
||||
[[nodiscard]] Id DefineOutput(Id type, std::optional<u32> location = std::nullopt,
|
||||
std::optional<spv::BuiltIn> builtin = std::nullopt) {
|
||||
const Id output_id{DefineVariable(type, builtin, spv::StorageClass::Output)};
|
||||
if (location) {
|
||||
Decorate(output_id, spv::Decoration::Location, *location);
|
||||
}
|
||||
@ -131,7 +135,8 @@ public:
|
||||
const Info& info;
|
||||
const RuntimeInfo& runtime_info;
|
||||
const Profile& profile;
|
||||
Stage stage{};
|
||||
Stage stage;
|
||||
LogicalStage l_stage{};
|
||||
|
||||
Id void_id{};
|
||||
Id U8{};
|
||||
@ -188,8 +193,15 @@ public:
|
||||
Id clip_distances{};
|
||||
Id cull_distances{};
|
||||
|
||||
Id patch_vertices{};
|
||||
Id output_tess_level_outer{};
|
||||
Id output_tess_level_inner{};
|
||||
Id tess_coord;
|
||||
std::array<Id, 30> patches{};
|
||||
|
||||
Id workgroup_id{};
|
||||
Id local_invocation_id{};
|
||||
Id invocation_id{}; // for instanced geoshaders or output vertices within TCS patch
|
||||
Id subgroup_local_invocation_id{};
|
||||
Id image_u32{};
|
||||
|
||||
@ -252,6 +264,8 @@ public:
|
||||
bool is_loaded{};
|
||||
s32 buffer_handle{-1};
|
||||
};
|
||||
Id input_attr_array;
|
||||
Id output_attr_array;
|
||||
std::array<SpirvAttribute, IR::NumParams> input_params{};
|
||||
std::array<SpirvAttribute, IR::NumParams> output_params{};
|
||||
std::array<SpirvAttribute, IR::NumRenderTargets> frag_outputs{};
|
||||
|
@ -80,6 +80,8 @@ void CFG::EmitLabels() {
|
||||
if (inst.IsUnconditionalBranch()) {
|
||||
const u32 target = inst.BranchTarget(pc);
|
||||
AddLabel(target);
|
||||
// Emit this label so that the block ends with s_branch instruction
|
||||
AddLabel(pc + inst.length);
|
||||
} else if (inst.IsConditionalBranch()) {
|
||||
const u32 true_label = inst.BranchTarget(pc);
|
||||
const u32 false_label = pc + inst.length;
|
||||
|
38
src/shader_recompiler/frontend/tessellation.h
Normal file
38
src/shader_recompiler/frontend/tessellation.h
Normal file
@ -0,0 +1,38 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/types.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
struct TessellationDataConstantBuffer {
|
||||
u32 ls_stride;
|
||||
u32 hs_cp_stride; // HullStateConstants::m_cpStride != 0 ? HullStateConstants::m_cpStride :
|
||||
// ls_stride
|
||||
u32 num_patches; // num patches submitted in threadgroup
|
||||
u32 hs_output_base; // HullStateConstants::m_numInputCP::m_cpStride != 0 ?
|
||||
// HullStateConstants::m_numInputCP * ls_stride * num_patches : 0
|
||||
// basically 0 when passthrough
|
||||
u32 patch_const_size; // 16 * num_patch_attrs
|
||||
u32 patch_const_base; // hs_output_base + patch_output_size
|
||||
u32 patch_output_size; // output_cp_stride * num_output_cp_per_patch
|
||||
f32 off_chip_tessellation_factor_threshold;
|
||||
u32 first_edge_tess_factor_index;
|
||||
};
|
||||
|
||||
// Assign names to dword fields of TessellationDataConstantBuffer
|
||||
enum class TessConstantAttribute : u32 {
|
||||
LsStride,
|
||||
HsCpStride,
|
||||
HsNumPatch,
|
||||
HsOutputBase,
|
||||
PatchConstSize,
|
||||
PatchConstBase,
|
||||
PatchOutputSize,
|
||||
OffChipTessellationFactorThreshold,
|
||||
FirstEdgeTessFactorIndex,
|
||||
};
|
||||
|
||||
} // namespace Shader
|
@ -1,8 +1,8 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
@ -73,10 +73,11 @@ void Translator::EmitDataShare(const GcnInst& inst) {
|
||||
void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {
|
||||
const IR::U32 value{GetSrc(inst.src[0])};
|
||||
|
||||
if (info.stage != Stage::Compute) {
|
||||
SetDst(inst.dst[0], value);
|
||||
} else {
|
||||
if (info.l_stage == LogicalStage::Compute ||
|
||||
info.l_stage == LogicalStage::TessellationControl) {
|
||||
SetDst(inst.dst[0], ir.ReadFirstLane(value));
|
||||
} else {
|
||||
SetDst(inst.dst[0], value);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -13,7 +13,7 @@ void Translator::EmitExport(const GcnInst& inst) {
|
||||
|
||||
const auto& exp = inst.control.exp;
|
||||
const IR::Attribute attrib{exp.target};
|
||||
if (attrib == IR::Attribute::Depth && exp.en != 1) {
|
||||
if (attrib == IR::Attribute::Depth && exp.en != 0 && exp.en != 1) {
|
||||
LOG_WARNING(Render_Vulkan, "Unsupported depth export");
|
||||
return;
|
||||
}
|
||||
|
@ -1,6 +1,8 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <bit>
|
||||
#include "common/assert.h"
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
@ -78,8 +80,10 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
|
||||
return S_BFM_B32(inst);
|
||||
case Opcode::S_MUL_I32:
|
||||
return S_MUL_I32(inst);
|
||||
case Opcode::S_BFE_I32:
|
||||
return S_BFE(inst, true);
|
||||
case Opcode::S_BFE_U32:
|
||||
return S_BFE_U32(inst);
|
||||
return S_BFE(inst, false);
|
||||
case Opcode::S_ABSDIFF_I32:
|
||||
return S_ABSDIFF_I32(inst);
|
||||
|
||||
@ -94,8 +98,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
|
||||
break;
|
||||
case Opcode::S_BREV_B32:
|
||||
return S_BREV_B32(inst);
|
||||
case Opcode::S_BCNT1_I32_B64:
|
||||
return S_BCNT1_I32_B64(inst);
|
||||
case Opcode::S_BCNT1_I32_B32:
|
||||
return S_BCNT1_I32_B32(inst);
|
||||
case Opcode::S_FF1_I32_B32:
|
||||
return S_FF1_I32_B32(inst);
|
||||
case Opcode::S_AND_SAVEEXEC_B64:
|
||||
@ -157,8 +161,9 @@ void Translator::EmitSOPK(const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
// SOPK
|
||||
case Opcode::S_MOVK_I32:
|
||||
return S_MOVK(inst);
|
||||
|
||||
return S_MOVK(inst, false);
|
||||
case Opcode::S_CMOVK_I32:
|
||||
return S_MOVK(inst, true);
|
||||
case Opcode::S_CMPK_EQ_I32:
|
||||
return S_CMPK(ConditionOp::EQ, true, inst);
|
||||
case Opcode::S_CMPK_LG_I32:
|
||||
@ -434,12 +439,12 @@ void Translator::S_MUL_I32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], ir.IMul(GetSrc(inst.src[0]), GetSrc(inst.src[1])));
|
||||
}
|
||||
|
||||
void Translator::S_BFE_U32(const GcnInst& inst) {
|
||||
void Translator::S_BFE(const GcnInst& inst, bool is_signed) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset{ir.BitwiseAnd(src1, ir.Imm32(0x1F))};
|
||||
const IR::U32 count{ir.BitFieldExtract(src1, ir.Imm32(16), ir.Imm32(7))};
|
||||
const IR::U32 result{ir.BitFieldExtract(src0, offset, count)};
|
||||
const IR::U32 result{ir.BitFieldExtract(src0, offset, count, is_signed)};
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
}
|
||||
@ -454,13 +459,16 @@ void Translator::S_ABSDIFF_I32(const GcnInst& inst) {
|
||||
|
||||
// SOPK
|
||||
|
||||
void Translator::S_MOVK(const GcnInst& inst) {
|
||||
const auto simm16 = inst.control.sopk.simm;
|
||||
if (simm16 & (1 << 15)) {
|
||||
// TODO: need to verify the case of imm sign extension
|
||||
UNREACHABLE();
|
||||
void Translator::S_MOVK(const GcnInst& inst, bool is_conditional) {
|
||||
const s16 simm16 = inst.control.sopk.simm;
|
||||
// do the sign extension
|
||||
const s32 simm32 = static_cast<s32>(simm16);
|
||||
IR::U32 val = ir.Imm32(simm32);
|
||||
if (is_conditional) {
|
||||
// if !SCC its a NOP
|
||||
val = IR::U32{ir.Select(ir.GetScc(), val, GetSrc(inst.dst[0]))};
|
||||
}
|
||||
SetDst(inst.dst[0], ir.Imm32(simm16));
|
||||
SetDst(inst.dst[0], val);
|
||||
}
|
||||
|
||||
void Translator::S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst) {
|
||||
@ -571,7 +579,7 @@ void Translator::S_BREV_B32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], ir.BitReverse(GetSrc(inst.src[0])));
|
||||
}
|
||||
|
||||
void Translator::S_BCNT1_I32_B64(const GcnInst& inst) {
|
||||
void Translator::S_BCNT1_I32_B32(const GcnInst& inst) {
|
||||
const IR::U32 result = ir.BitCount(GetSrc(inst.src[0]));
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
@ -594,6 +602,8 @@ void Translator::S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& in
|
||||
return ir.GetVcc();
|
||||
case OperandField::ScalarGPR:
|
||||
return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code));
|
||||
case OperandField::ExecLo:
|
||||
return ir.GetExec();
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -8,6 +8,8 @@
|
||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
@ -34,9 +36,8 @@ void Translator::EmitPrologue() {
|
||||
}
|
||||
|
||||
IR::VectorReg dst_vreg = IR::VectorReg::V0;
|
||||
switch (info.stage) {
|
||||
case Stage::Vertex:
|
||||
case Stage::Export:
|
||||
switch (info.l_stage) {
|
||||
case LogicalStage::Vertex:
|
||||
// v0: vertex ID, always present
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
|
||||
// v1: instance ID, step rate 0
|
||||
@ -52,7 +53,7 @@ void Translator::EmitPrologue() {
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId));
|
||||
}
|
||||
break;
|
||||
case Stage::Fragment:
|
||||
case LogicalStage::Fragment:
|
||||
dst_vreg = IR::VectorReg::V0;
|
||||
if (runtime_info.fs_info.addr_flags.persp_sample_ena) {
|
||||
++dst_vreg; // I
|
||||
@ -122,7 +123,30 @@ void Translator::EmitPrologue() {
|
||||
}
|
||||
}
|
||||
break;
|
||||
case Stage::Compute:
|
||||
case LogicalStage::TessellationControl: {
|
||||
// Should be laid out like:
|
||||
// [0:8]: patch id within VGT
|
||||
// [8:12]: output control point id
|
||||
ir.SetVectorReg(IR::VectorReg::V1,
|
||||
ir.GetAttributeU32(IR::Attribute::PackedHullInvocationInfo));
|
||||
// TODO PrimitiveId is probably V2 but haven't seen it yet
|
||||
break;
|
||||
}
|
||||
case LogicalStage::TessellationEval:
|
||||
ir.SetVectorReg(IR::VectorReg::V0,
|
||||
ir.GetAttribute(IR::Attribute::TessellationEvaluationPointU));
|
||||
ir.SetVectorReg(IR::VectorReg::V1,
|
||||
ir.GetAttribute(IR::Attribute::TessellationEvaluationPointV));
|
||||
// V2 is similar to PrimitiveID but not the same. It seems to only be used in
|
||||
// compiler-generated address calculations. Its probably the patch id within the
|
||||
// patches running locally on a given VGT (or CU, whichever is the granularity of LDS
|
||||
// memory)
|
||||
// Set to 0. See explanation in comment describing hull/domain passes
|
||||
ir.SetVectorReg(IR::VectorReg::V2, ir.Imm32(0u));
|
||||
// V3 is the actual PrimitiveID as intended by the shader author.
|
||||
ir.SetVectorReg(IR::VectorReg::V3, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
|
||||
break;
|
||||
case LogicalStage::Compute:
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 0));
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1));
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2));
|
||||
@ -137,7 +161,7 @@ void Translator::EmitPrologue() {
|
||||
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2));
|
||||
}
|
||||
break;
|
||||
case Stage::Geometry:
|
||||
case LogicalStage::Geometry:
|
||||
switch (runtime_info.gs_info.out_primitive[0]) {
|
||||
case AmdGpu::GsOutputPrimitiveType::TriangleStrip:
|
||||
ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2
|
||||
@ -152,7 +176,7 @@ void Translator::EmitPrologue() {
|
||||
ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Unknown shader stage");
|
||||
UNREACHABLE_MSG("Unknown shader stage");
|
||||
}
|
||||
}
|
||||
|
||||
@ -415,7 +439,8 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra
|
||||
ir.SetVectorReg(IR::VectorReg(operand.code + 1), hi);
|
||||
return ir.SetVectorReg(IR::VectorReg(operand.code), lo);
|
||||
case OperandField::VccLo:
|
||||
UNREACHABLE();
|
||||
ir.SetVccLo(lo);
|
||||
return ir.SetVccHi(hi);
|
||||
case OperandField::VccHi:
|
||||
UNREACHABLE();
|
||||
case OperandField::M0:
|
||||
@ -503,7 +528,8 @@ void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Inf
|
||||
|
||||
// Special case for emitting fetch shader.
|
||||
if (inst.opcode == Opcode::S_SWAPPC_B64) {
|
||||
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export);
|
||||
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export ||
|
||||
info.stage == Stage::Local);
|
||||
translator.EmitFetch(inst);
|
||||
continue;
|
||||
}
|
||||
|
@ -94,12 +94,13 @@ public:
|
||||
void S_ASHR_I32(const GcnInst& inst);
|
||||
void S_BFM_B32(const GcnInst& inst);
|
||||
void S_MUL_I32(const GcnInst& inst);
|
||||
void S_BFE_U32(const GcnInst& inst);
|
||||
void S_BFE(const GcnInst& inst, bool is_signed);
|
||||
void S_BFE_I32(const GcnInst& inst);
|
||||
void S_ABSDIFF_I32(const GcnInst& inst);
|
||||
void S_NOT_B32(const GcnInst& inst);
|
||||
|
||||
// SOPK
|
||||
void S_MOVK(const GcnInst& inst);
|
||||
void S_MOVK(const GcnInst& inst, bool is_conditional);
|
||||
void S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst);
|
||||
void S_ADDK_I32(const GcnInst& inst);
|
||||
void S_MULK_I32(const GcnInst& inst);
|
||||
@ -109,7 +110,7 @@ public:
|
||||
void S_MOV_B64(const GcnInst& inst);
|
||||
void S_NOT_B64(const GcnInst& inst);
|
||||
void S_BREV_B32(const GcnInst& inst);
|
||||
void S_BCNT1_I32_B64(const GcnInst& inst);
|
||||
void S_BCNT1_I32_B32(const GcnInst& inst);
|
||||
void S_FF1_I32_B32(const GcnInst& inst);
|
||||
void S_GETPC_B64(u32 pc, const GcnInst& inst);
|
||||
void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst);
|
||||
@ -217,7 +218,7 @@ public:
|
||||
|
||||
// VOP3a
|
||||
void V_MAD_F32(const GcnInst& inst);
|
||||
void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = false);
|
||||
void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = true);
|
||||
void V_MAD_U32_U24(const GcnInst& inst);
|
||||
void V_CUBEID_F32(const GcnInst& inst);
|
||||
void V_CUBESC_F32(const GcnInst& inst);
|
||||
@ -276,7 +277,7 @@ public:
|
||||
// Image Memory
|
||||
// MIMG
|
||||
void IMAGE_LOAD(bool has_mip, const GcnInst& inst);
|
||||
void IMAGE_STORE(const GcnInst& inst);
|
||||
void IMAGE_STORE(bool has_mip, const GcnInst& inst);
|
||||
void IMAGE_GET_RESINFO(const GcnInst& inst);
|
||||
void IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst);
|
||||
void IMAGE_SAMPLE(const GcnInst& inst);
|
||||
|
@ -1060,8 +1060,14 @@ void Translator::V_CUBEMA_F32(const GcnInst& inst) {
|
||||
|
||||
void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{ir.BitwiseAnd(GetSrc(inst.src[1]), ir.Imm32(0x1F))};
|
||||
const IR::U32 src2{ir.BitwiseAnd(GetSrc(inst.src[2]), ir.Imm32(0x1F))};
|
||||
IR::U32 src1{GetSrc(inst.src[1])};
|
||||
IR::U32 src2{GetSrc(inst.src[2])};
|
||||
if (!src1.IsImmediate()) {
|
||||
src1 = ir.BitwiseAnd(src1, ir.Imm32(0x1F));
|
||||
}
|
||||
if (!src2.IsImmediate()) {
|
||||
src2 = ir.BitwiseAnd(src2, ir.Imm32(0x1F));
|
||||
}
|
||||
SetDst(inst.dst[0], ir.BitFieldExtract(src0, src1, src2, is_signed));
|
||||
}
|
||||
|
||||
|
@ -98,7 +98,9 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
||||
|
||||
// Buffer store operations
|
||||
case Opcode::IMAGE_STORE:
|
||||
return IMAGE_STORE(inst);
|
||||
return IMAGE_STORE(false, inst);
|
||||
case Opcode::IMAGE_STORE_MIP:
|
||||
return IMAGE_STORE(true, inst);
|
||||
|
||||
// Image misc operations
|
||||
case Opcode::IMAGE_GET_RESINFO:
|
||||
@ -187,7 +189,8 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst)
|
||||
buffer_info.index_enable.Assign(mtbuf.idxen);
|
||||
buffer_info.offset_enable.Assign(mtbuf.offen);
|
||||
buffer_info.inst_offset.Assign(mtbuf.offset);
|
||||
buffer_info.ring_access.Assign(is_ring);
|
||||
buffer_info.globally_coherent.Assign(mtbuf.glc);
|
||||
buffer_info.system_coherent.Assign(mtbuf.slc);
|
||||
if (is_typed) {
|
||||
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
|
||||
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
|
||||
@ -245,11 +248,15 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
|
||||
if (info.stage != Stage::Export && info.stage != Stage::Geometry) {
|
||||
if (info.stage != Stage::Export && info.stage != Stage::Hull && info.stage != Stage::Geometry) {
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0,
|
||||
"Non immediate offset not supported");
|
||||
}
|
||||
|
||||
if (info.stage == Stage::Hull) {
|
||||
// printf("here\n"); // break
|
||||
}
|
||||
|
||||
IR::Value address = [&] -> IR::Value {
|
||||
if (is_ring) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset);
|
||||
@ -267,7 +274,8 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
|
||||
buffer_info.index_enable.Assign(mtbuf.idxen);
|
||||
buffer_info.offset_enable.Assign(mtbuf.offen);
|
||||
buffer_info.inst_offset.Assign(mtbuf.offset);
|
||||
buffer_info.ring_access.Assign(is_ring);
|
||||
buffer_info.globally_coherent.Assign(mtbuf.glc);
|
||||
buffer_info.system_coherent.Assign(mtbuf.slc);
|
||||
if (is_typed) {
|
||||
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
|
||||
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
|
||||
@ -423,7 +431,7 @@ void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) {
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::IMAGE_STORE(const GcnInst& inst) {
|
||||
void Translator::IMAGE_STORE(bool has_mip, const GcnInst& inst) {
|
||||
const auto& mimg = inst.control.mimg;
|
||||
IR::VectorReg addr_reg{inst.src[0].code};
|
||||
IR::VectorReg data_reg{inst.dst[0].code};
|
||||
@ -434,6 +442,9 @@ void Translator::IMAGE_STORE(const GcnInst& inst) {
|
||||
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
|
||||
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
|
||||
|
||||
IR::TextureInstInfo info{};
|
||||
info.has_lod.Assign(has_mip);
|
||||
|
||||
boost::container::static_vector<IR::F32, 4> comps;
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
if (((mimg.dmask >> i) & 1) == 0) {
|
||||
@ -443,7 +454,7 @@ void Translator::IMAGE_STORE(const GcnInst& inst) {
|
||||
comps.push_back(ir.GetVectorReg<IR::F32>(data_reg++));
|
||||
}
|
||||
const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]);
|
||||
ir.ImageWrite(handle, body, value, {});
|
||||
ir.ImageWrite(handle, body, {}, value, info);
|
||||
}
|
||||
|
||||
void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) {
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/backend/bindings.h"
|
||||
#include "shader_recompiler/frontend/copy_shader.h"
|
||||
#include "shader_recompiler/frontend/tessellation.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
#include "shader_recompiler/ir/passes/srt.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
@ -163,6 +164,7 @@ struct Info {
|
||||
UserDataMask ud_mask{};
|
||||
|
||||
CopyShaderData gs_copy_data;
|
||||
u32 uses_patches{};
|
||||
|
||||
BufferResourceList buffers;
|
||||
TextureBufferResourceList texture_buffers;
|
||||
@ -173,8 +175,12 @@ struct Info {
|
||||
PersistentSrtInfo srt_info;
|
||||
std::vector<u32> flattened_ud_buf;
|
||||
|
||||
IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max;
|
||||
s32 tess_consts_dword_offset = -1;
|
||||
|
||||
std::span<const u32> user_data;
|
||||
Stage stage;
|
||||
LogicalStage l_stage;
|
||||
|
||||
u64 pgm_hash{};
|
||||
VAddr pgm_base;
|
||||
@ -190,14 +196,16 @@ struct Info {
|
||||
bool uses_shared{};
|
||||
bool uses_fp16{};
|
||||
bool uses_fp64{};
|
||||
bool stores_tess_level_outer{};
|
||||
bool stores_tess_level_inner{};
|
||||
bool translation_failed{}; // indicates that shader has unsupported instructions
|
||||
bool has_readconst{};
|
||||
u8 mrt_mask{0u};
|
||||
bool has_fetch_shader{false};
|
||||
u32 fetch_shader_sgpr_base{0u};
|
||||
|
||||
explicit Info(Stage stage_, ShaderParams params)
|
||||
: stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
|
||||
explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params)
|
||||
: stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
|
||||
user_data{params.user_data} {}
|
||||
|
||||
template <typename T>
|
||||
@ -244,6 +252,16 @@ struct Info {
|
||||
srt_info.walker_func(user_data.data(), flattened_ud_buf.data());
|
||||
}
|
||||
}
|
||||
|
||||
void ReadTessConstantBuffer(TessellationDataConstantBuffer& tess_constants) const {
|
||||
ASSERT(tess_consts_dword_offset >= 0); // We've already tracked the V# UD
|
||||
auto buf = ReadUdReg<AmdGpu::Buffer>(static_cast<u32>(tess_consts_ptr_base),
|
||||
static_cast<u32>(tess_consts_dword_offset));
|
||||
VAddr tess_constants_addr = buf.base_address;
|
||||
memcpy(&tess_constants,
|
||||
reinterpret_cast<TessellationDataConstantBuffer*>(tess_constants_addr),
|
||||
sizeof(tess_constants));
|
||||
}
|
||||
};
|
||||
|
||||
constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {
|
||||
|
@ -104,6 +104,8 @@ std::string NameOf(Attribute attribute) {
|
||||
return "VertexId";
|
||||
case Attribute::InstanceId:
|
||||
return "InstanceId";
|
||||
case Attribute::PrimitiveId:
|
||||
return "PrimitiveId";
|
||||
case Attribute::FragCoord:
|
||||
return "FragCoord";
|
||||
case Attribute::IsFrontFace:
|
||||
@ -114,6 +116,16 @@ std::string NameOf(Attribute attribute) {
|
||||
return "LocalInvocationId";
|
||||
case Attribute::LocalInvocationIndex:
|
||||
return "LocalInvocationIndex";
|
||||
case Attribute::InvocationId:
|
||||
return "InvocationId";
|
||||
case Attribute::PatchVertices:
|
||||
return "PatchVertices";
|
||||
case Attribute::TessellationEvaluationPointU:
|
||||
return "TessellationEvaluationPointU";
|
||||
case Attribute::TessellationEvaluationPointV:
|
||||
return "TessellationEvaluationPointV";
|
||||
case Attribute::PackedHullInvocationInfo:
|
||||
return "PackedHullInvocationInfo";
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -72,8 +72,13 @@ enum class Attribute : u64 {
|
||||
LocalInvocationId = 75,
|
||||
LocalInvocationIndex = 76,
|
||||
FragCoord = 77,
|
||||
InstanceId0 = 78, // step rate 0
|
||||
InstanceId1 = 79, // step rate 1
|
||||
InstanceId0 = 78, // step rate 0
|
||||
InstanceId1 = 79, // step rate 1
|
||||
InvocationId = 80, // TCS id in output patch and instanced geometry shader id
|
||||
PatchVertices = 81,
|
||||
TessellationEvaluationPointU = 82,
|
||||
TessellationEvaluationPointV = 83,
|
||||
PackedHullInvocationInfo = 84, // contains patch id within the VGT and invocation ID
|
||||
Max,
|
||||
};
|
||||
|
||||
@ -85,6 +90,11 @@ constexpr bool IsPosition(Attribute attribute) noexcept {
|
||||
return attribute >= Attribute::Position0 && attribute <= Attribute::Position3;
|
||||
}
|
||||
|
||||
constexpr bool IsTessCoord(Attribute attribute) noexcept {
|
||||
return attribute >= Attribute::TessellationEvaluationPointU &&
|
||||
attribute <= Attribute::TessellationEvaluationPointV;
|
||||
}
|
||||
|
||||
constexpr bool IsParam(Attribute attribute) noexcept {
|
||||
return attribute >= Attribute::Param0 && attribute <= Attribute::Param31;
|
||||
}
|
||||
|
@ -94,6 +94,8 @@ static std::string ArgToIndex(std::map<const Inst*, size_t>& inst_to_index, size
|
||||
return fmt::format("{}", arg.VectorReg());
|
||||
case Type::Attribute:
|
||||
return fmt::format("{}", arg.Attribute());
|
||||
case Type::Patch:
|
||||
return fmt::format("{}", arg.Patch());
|
||||
default:
|
||||
return "<unknown immediate type>";
|
||||
}
|
||||
|
@ -266,8 +266,8 @@ void IREmitter::SetM0(const U32& value) {
|
||||
Inst(Opcode::SetM0, value);
|
||||
}
|
||||
|
||||
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, u32 index) {
|
||||
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), Imm32(index));
|
||||
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, IR::Value index) {
|
||||
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), index);
|
||||
}
|
||||
|
||||
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) {
|
||||
@ -278,6 +278,24 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp
|
||||
Inst(Opcode::SetAttribute, attribute, value, Imm32(comp));
|
||||
}
|
||||
|
||||
F32 IREmitter::GetTessGenericAttribute(const U32& vertex_index, const U32& attr_index,
|
||||
const U32& comp_index) {
|
||||
return Inst<F32>(IR::Opcode::GetTessGenericAttribute, vertex_index, attr_index, comp_index);
|
||||
}
|
||||
|
||||
void IREmitter::SetTcsGenericAttribute(const F32& value, const U32& attr_index,
|
||||
const U32& comp_index) {
|
||||
Inst(Opcode::SetTcsGenericAttribute, value, attr_index, comp_index);
|
||||
}
|
||||
|
||||
F32 IREmitter::GetPatch(Patch patch) {
|
||||
return Inst<F32>(Opcode::GetPatch, patch);
|
||||
}
|
||||
|
||||
void IREmitter::SetPatch(Patch patch, const F32& value) {
|
||||
Inst(Opcode::SetPatch, patch, value);
|
||||
}
|
||||
|
||||
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
||||
switch (bit_size) {
|
||||
case 32:
|
||||
@ -552,6 +570,19 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu
|
||||
}
|
||||
}
|
||||
|
||||
Value IREmitter::CompositeConstruct(std::span<const Value> elements) {
|
||||
switch (elements.size()) {
|
||||
case 2:
|
||||
return CompositeConstruct(elements[0], elements[1]);
|
||||
case 3:
|
||||
return CompositeConstruct(elements[0], elements[1], elements[2]);
|
||||
case 4:
|
||||
return CompositeConstruct(elements[0], elements[1], elements[2], elements[3]);
|
||||
default:
|
||||
UNREACHABLE_MSG("Composite construct with greater than 4 elements");
|
||||
}
|
||||
}
|
||||
|
||||
Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
|
||||
const auto read{[&](Opcode opcode, size_t limit) -> Value {
|
||||
if (element >= limit) {
|
||||
@ -1599,9 +1630,9 @@ Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const
|
||||
return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, dref);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset,
|
||||
const U32& lod, const U32& multisampling, TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageFetch, Flags{info}, handle, coords, offset, lod, multisampling);
|
||||
Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const U32& lod,
|
||||
const Value& offset, const U32& multisampling, TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageFetch, Flags{info}, handle, coords, lod, offset, multisampling);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod,
|
||||
@ -1625,13 +1656,14 @@ Value IREmitter::ImageGradient(const Value& handle, const Value& coords,
|
||||
offset, lod_clamp);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageRead, Flags{info}, handle, coords);
|
||||
Value IREmitter::ImageRead(const Value& handle, const Value& coords, const U32& lod,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageRead, Flags{info}, handle, coords, lod);
|
||||
}
|
||||
|
||||
void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color,
|
||||
TextureInstInfo info) {
|
||||
Inst(Opcode::ImageWrite, Flags{info}, handle, coords, color);
|
||||
void IREmitter::ImageWrite(const Value& handle, const Value& coords, const U32& lod,
|
||||
const Value& color, TextureInstInfo info) {
|
||||
Inst(Opcode::ImageWrite, Flags{info}, handle, coords, lod, color);
|
||||
}
|
||||
|
||||
// Debug print maps to SPIRV's NonSemantic DebugPrintf instruction
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/condition.h"
|
||||
#include "shader_recompiler/ir/patch.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
@ -80,10 +81,18 @@ public:
|
||||
|
||||
[[nodiscard]] U1 Condition(IR::Condition cond);
|
||||
|
||||
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, u32 index = 0);
|
||||
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0,
|
||||
IR::Value index = IR::Value(u32(0u)));
|
||||
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
|
||||
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
|
||||
|
||||
[[nodiscard]] F32 GetTessGenericAttribute(const U32& vertex_index, const U32& attr_index,
|
||||
const U32& comp_index);
|
||||
void SetTcsGenericAttribute(const F32& value, const U32& attr_index, const U32& comp_index);
|
||||
|
||||
[[nodiscard]] F32 GetPatch(Patch patch);
|
||||
void SetPatch(Patch patch, const F32& value);
|
||||
|
||||
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
|
||||
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
||||
|
||||
@ -138,6 +147,8 @@ public:
|
||||
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
|
||||
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
|
||||
const Value& e4);
|
||||
[[nodiscard]] Value CompositeConstruct(std::span<const Value> values);
|
||||
|
||||
[[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
|
||||
[[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);
|
||||
|
||||
@ -314,14 +325,16 @@ public:
|
||||
TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords,
|
||||
const Value& offset, const F32& dref, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset,
|
||||
const U32& lod, const U32& multisampling, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const U32& lod,
|
||||
const Value& offset, const U32& multisampling,
|
||||
TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords,
|
||||
const Value& derivatives_dx, const Value& derivatives_dy,
|
||||
const Value& offset, const F32& lod_clamp,
|
||||
TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info);
|
||||
void ImageWrite(const Value& handle, const Value& coords, const Value& color,
|
||||
[[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, const U32& lod,
|
||||
TextureInstInfo info);
|
||||
void ImageWrite(const Value& handle, const Value& coords, const U32& lod, const Value& color,
|
||||
TextureInstInfo info);
|
||||
|
||||
void EmitVertex();
|
||||
@ -333,6 +346,7 @@ private:
|
||||
template <typename T = Value, typename... Args>
|
||||
T Inst(Opcode op, Args... args) {
|
||||
auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})};
|
||||
it->SetParent(block);
|
||||
return T{Value{&*it}};
|
||||
}
|
||||
|
||||
@ -350,6 +364,7 @@ private:
|
||||
u32 raw_flags{};
|
||||
std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy));
|
||||
auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
|
||||
it->SetParent(block);
|
||||
return T{Value{&*it}};
|
||||
}
|
||||
};
|
||||
|
@ -52,6 +52,8 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
||||
case Opcode::Discard:
|
||||
case Opcode::DiscardCond:
|
||||
case Opcode::SetAttribute:
|
||||
case Opcode::SetTcsGenericAttribute:
|
||||
case Opcode::SetPatch:
|
||||
case Opcode::StoreBufferU32:
|
||||
case Opcode::StoreBufferU32x2:
|
||||
case Opcode::StoreBufferU32x3:
|
||||
|
@ -30,7 +30,7 @@ constexpr Type Opaque{Type::Opaque};
|
||||
constexpr Type ScalarReg{Type::ScalarReg};
|
||||
constexpr Type VectorReg{Type::VectorReg};
|
||||
constexpr Type Attribute{Type::Attribute};
|
||||
constexpr Type SystemValue{Type::SystemValue};
|
||||
constexpr Type Patch{Type::Patch};
|
||||
constexpr Type U1{Type::U1};
|
||||
constexpr Type U8{Type::U8};
|
||||
constexpr Type U16{Type::U16};
|
||||
|
@ -60,6 +60,10 @@ OPCODE(SetGotoVariable, Void, U32,
|
||||
OPCODE(GetAttribute, F32, Attribute, U32, U32, )
|
||||
OPCODE(GetAttributeU32, U32, Attribute, U32, )
|
||||
OPCODE(SetAttribute, Void, Attribute, F32, U32, )
|
||||
OPCODE(GetPatch, F32, Patch, )
|
||||
OPCODE(SetPatch, Void, Patch, F32, )
|
||||
OPCODE(GetTessGenericAttribute, F32, U32, U32, U32, )
|
||||
OPCODE(SetTcsGenericAttribute, Void, F32, U32, U32, )
|
||||
|
||||
// Flags
|
||||
OPCODE(GetScc, U1, Void, )
|
||||
@ -334,12 +338,12 @@ OPCODE(ImageSampleDrefImplicitLod, F32x4, Opaq
|
||||
OPCODE(ImageSampleDrefExplicitLod, F32x4, Opaque, Opaque, F32, F32, Opaque, )
|
||||
OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, )
|
||||
OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, F32, )
|
||||
OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, )
|
||||
OPCODE(ImageFetch, F32x4, Opaque, Opaque, U32, Opaque, Opaque, )
|
||||
OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, )
|
||||
OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, )
|
||||
OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, F32, )
|
||||
OPCODE(ImageRead, U32x4, Opaque, Opaque, )
|
||||
OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
|
||||
OPCODE(ImageRead, U32x4, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageWrite, Void, Opaque, Opaque, U32, U32x4, )
|
||||
|
||||
// Image atomic operations
|
||||
OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, )
|
||||
|
@ -216,6 +216,18 @@ void FoldAdd(IR::Block& block, IR::Inst& inst) {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void FoldMul(IR::Block& block, IR::Inst& inst) {
|
||||
if (!FoldCommutative<T>(inst, [](T a, T b) { return a * b; })) {
|
||||
return;
|
||||
}
|
||||
const IR::Value rhs{inst.Arg(1)};
|
||||
if (rhs.IsImmediate() && Arg<T>(rhs) == 0) {
|
||||
inst.ReplaceUsesWithAndRemove(IR::Value(0u));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void FoldCmpClass(IR::Block& block, IR::Inst& inst) {
|
||||
ASSERT_MSG(inst.Arg(1).IsImmediate(), "Unable to resolve compare operation");
|
||||
const auto class_mask = static_cast<IR::FloatClassFunc>(inst.Arg(1).U32());
|
||||
@ -292,7 +304,19 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
||||
FoldWhenAllImmediates(inst, [](u32 a) { return static_cast<float>(a); });
|
||||
return;
|
||||
case IR::Opcode::IMul32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
|
||||
FoldMul<u32>(block, inst);
|
||||
return;
|
||||
case IR::Opcode::UDiv32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) {
|
||||
ASSERT_MSG(b != 0, "Folding UDiv32 with divisor 0");
|
||||
return a / b;
|
||||
});
|
||||
return;
|
||||
case IR::Opcode::UMod32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) {
|
||||
ASSERT_MSG(b != 0, "Folding UMod32 with modulo 0");
|
||||
return a % b;
|
||||
});
|
||||
return;
|
||||
case IR::Opcode::FPCmpClass32:
|
||||
FoldCmpClass(block, inst);
|
||||
|
4
src/shader_recompiler/ir/passes/constant_propogation.h
Normal file
4
src/shader_recompiler/ir/passes/constant_propogation.h
Normal file
@ -0,0 +1,4 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
746
src/shader_recompiler/ir/passes/hull_shader_transform.cpp
Normal file
746
src/shader_recompiler/ir/passes/hull_shader_transform.cpp
Normal file
@ -0,0 +1,746 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
#include "common/assert.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/ir/opcodes.h"
|
||||
#include "shader_recompiler/ir/pattern_matching.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
/**
|
||||
* Tessellation shaders pass outputs to the next shader using LDS.
|
||||
* The Hull shader stage receives input control points stored in LDS.
|
||||
*
|
||||
* These passes attempt to resolve LDS accesses to attribute accesses and correctly
|
||||
* write to the tessellation factor tables.
|
||||
*
|
||||
* The LDS layout is:
|
||||
* - TCS inputs for patch 0
|
||||
* - TCS inputs for patch 1
|
||||
* - TCS inputs for patch 2
|
||||
* - ...
|
||||
* - TCS outputs for patch 0
|
||||
* - TCS outputs for patch 1
|
||||
* - TCS outputs for patch 2
|
||||
* - ...
|
||||
* - PatchConst TCS outputs for patch 0
|
||||
* - PatchConst TCS outputs for patch 1
|
||||
* - PatchConst TCS outputs for patch 2
|
||||
*
|
||||
*
|
||||
* If the Hull stage does not write any new control points the driver will
|
||||
* optimize LDS layout so input and output control point spaces overlap.
|
||||
* (Passthrough)
|
||||
*
|
||||
* The gnm driver requires a V# holding special constants to be bound
|
||||
* for reads by the shader.
|
||||
* The Hull and Domain shaders read values from this buffer which
|
||||
* contain size and offset information required to address input, output,
|
||||
* or PatchConst attributes within the current patch.
|
||||
* See the TessellationDataConstantBuffer struct to see the layout of this V#.
|
||||
*
|
||||
* Tessellation factors are stored to a special tessellation factor V# that is automatically bound
|
||||
* by the driver. This is the input to the fixed function tessellator that actually subdivides the
|
||||
* domain. We translate these to writes to SPIR-V builtins for tessellation factors in the Hull
|
||||
* shader.
|
||||
* The offset into the tess factor buffer determines which factor the shader is writing.
|
||||
* Additionally, most hull shaders seem to redundantly write tess factors to PatchConst
|
||||
* attributes, even if dead in the domain shader. We just treat these as generic PatchConst writes.
|
||||
*
|
||||
* LDS reads in the Hull shader can be from input control points, and in the the Domain shader can
|
||||
* be hs output control points (output from the perspective of the Hull shader) and patchconst
|
||||
* values.
|
||||
* LDS stores in the Hull shader can either be output control point writes or per-patch
|
||||
* (PatchConst) data writes. The Domain shader exports attributes using EXP instructions, unless its
|
||||
* followed by the geometry stage (but we havent seen this yet), so nothing special there.
|
||||
* The address calculations can vary significantly and can't be easily pattern matched. We are at
|
||||
* the mercy of instruction selection the ps4 compiler wanted to use.
|
||||
* Generally though, they could look something like this:
|
||||
* Input control point:
|
||||
* addr = PatchIdInVgt * input_cp_stride * #input_cp_per_patch + index * input_cp_stride
|
||||
* + attr# * 16 + component
|
||||
* Output control point:
|
||||
* addr = #patches * input_cp_stride * #input_cp_per_patch
|
||||
* + PatchIdInVgt * output_patch_stride + InvocationID * output_cp_stride
|
||||
+ attr# * 16 + component
|
||||
* Per patch output:
|
||||
* addr = #patches * input_cp_stride * #cp_per_input_patch
|
||||
* + #patches * output_patch_stride
|
||||
* + PatchIdInVgt * per_patch_output_stride + attr# * 16 + component
|
||||
*
|
||||
* output_patch_stride and output_cp_stride are usually compile time constants in the gcn
|
||||
*
|
||||
* Hull shaders can probably also read output control points corresponding to other threads, like
|
||||
* shared memory (but we havent seen this yet).
|
||||
* ^ This is an UNREACHABLE for now. We may need to insert additional barriers if this happens.
|
||||
* They should also be able to read PatchConst values,
|
||||
* although not sure if this happens in practice.
|
||||
*
|
||||
* To determine which type of attribute (input, output, patchconst) we the check the users of
|
||||
* TessConstants V# reads to deduce which type of attribute a given load/store to LDS
|
||||
* is touching.
|
||||
*
|
||||
* In the Hull shader, both the PatchId within the VGT group (PatchIdInVgt) and the output control
|
||||
* point id (InvocationId) are packed in VGPR1 by the driver like
|
||||
* V1 = InvocationId << 8 | PatchIdInVgt
|
||||
* The shader typically uses V_BFE_(U|S)32 to extract them. We use the starting bit_pos to determine
|
||||
* which is which.
|
||||
*
|
||||
* This pass does not attempt to deduce the exact attribute referenced in a LDS load/store.
|
||||
* Instead, it feeds the address in the LDS load/store to the get/set Insts we use for TCS in/out's,
|
||||
* TES in's, and PatchConst in/out's.
|
||||
*
|
||||
* TCS/TES Input attributes:
|
||||
* We define input attributes using an array in the shader roughly like this:
|
||||
* // equivalent GLSL in TCS
|
||||
* layout (location = 0) in vec4 in_attrs[][NUM_INPUT_ATTRIBUTES];
|
||||
*
|
||||
* Here the NUM_INPUT_ATTRIBUTES is derived from the ls_stride member of the TessConstants V#.
|
||||
* We divide ls_stride (in bytes) by 16 to get the number of vec4 attributes.
|
||||
* For TES, the number of attributes comes from hs_cp_stride / 16.
|
||||
* The first (outer) dimension is unsized but corresponds to the number of vertices in the hs input
|
||||
* patch (for Hull) or the hs output patch (for Domain).
|
||||
*
|
||||
* For input reads in TCS or TES, we emit SPIR-V like:
|
||||
* float value = in_attrs[addr / ls_stride][(addr % ls_stride) >> 4][(addr & 0xF) >> 2];
|
||||
*
|
||||
* For output writes, we assume the control point index is InvocationId, since high level languages
|
||||
* impose that restriction (although maybe it's technically possible on hardware). So SPIR-V looks
|
||||
* like this:
|
||||
* layout (location = 0) in vec4 in_attrs[][NUM_OUTPUT_ATTRIBUTES];
|
||||
* out_attrs[InvocationId][(addr % hs_cp_stride) >> 4][(addr & 0xF) >> 2] = value;
|
||||
*
|
||||
* NUM_OUTPUT_ATTRIBUTES is derived by hs_cp_stride / 16, so it can link with the TES in_attrs
|
||||
* variable.
|
||||
*
|
||||
* Another challenge is the fact that the GCN shader needs to address attributes from LDS as a whole
|
||||
* which contains the attributes from many patches. On the other hand, higher level shading
|
||||
* languages restrict attribute access to the patch of the current thread, which is naturally a
|
||||
* restriction in SPIR-V also.
|
||||
* The addresses the ps4 compiler generates for loads/stores and the fact that LDS holds many
|
||||
* patches' attributes are just implementation details of the ps4 driver/compiler. To deal with
|
||||
* this, we can replace certain TessConstant V# reads with 0, which only contribute to the base
|
||||
* address of the current patch's attributes in LDS and not the indexes within the local patch.
|
||||
*
|
||||
* (A perfect implementation might need emulation of the VGTs in mesh/compute, loading/storing
|
||||
* attributes to buffers and not caring about whether they are hs input, hs output, or patchconst
|
||||
* attributes)
|
||||
*
|
||||
*/
|
||||
|
||||
namespace {
|
||||
|
||||
using namespace Shader::Optimiation::PatternMatching;
|
||||
|
||||
static void InitTessConstants(IR::ScalarReg sharp_ptr_base, s32 sharp_dword_offset,
|
||||
Shader::Info& info, Shader::RuntimeInfo& runtime_info,
|
||||
TessellationDataConstantBuffer& tess_constants) {
|
||||
info.tess_consts_ptr_base = sharp_ptr_base;
|
||||
info.tess_consts_dword_offset = sharp_dword_offset;
|
||||
info.ReadTessConstantBuffer(tess_constants);
|
||||
if (info.l_stage == LogicalStage::TessellationControl) {
|
||||
runtime_info.hs_info.InitFromTessConstants(tess_constants);
|
||||
} else {
|
||||
runtime_info.vs_info.InitFromTessConstants(tess_constants);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
struct TessSharpLocation {
|
||||
IR::ScalarReg ptr_base;
|
||||
u32 dword_off;
|
||||
};
|
||||
|
||||
std::optional<TessSharpLocation> FindTessConstantSharp(IR::Inst* read_const_buffer) {
|
||||
IR::Value sharp_ptr_base;
|
||||
IR::Value sharp_dword_offset;
|
||||
|
||||
IR::Value rv = IR::Value{read_const_buffer};
|
||||
IR::Value handle = read_const_buffer->Arg(0);
|
||||
|
||||
if (M_COMPOSITECONSTRUCTU32X4(M_GETUSERDATA(MatchImm(sharp_dword_offset)), MatchIgnore(),
|
||||
MatchIgnore(), MatchIgnore())
|
||||
.Match(handle)) {
|
||||
return TessSharpLocation{.ptr_base = IR::ScalarReg::Max,
|
||||
.dword_off = static_cast<u32>(sharp_dword_offset.ScalarReg())};
|
||||
} else if (M_COMPOSITECONSTRUCTU32X4(
|
||||
M_READCONST(M_COMPOSITECONSTRUCTU32X2(M_GETUSERDATA(MatchImm(sharp_ptr_base)),
|
||||
MatchIgnore()),
|
||||
MatchImm(sharp_dword_offset)),
|
||||
MatchIgnore(), MatchIgnore(), MatchIgnore())
|
||||
.Match(handle)) {
|
||||
return TessSharpLocation{.ptr_base = sharp_ptr_base.ScalarReg(),
|
||||
.dword_off = sharp_dword_offset.U32()};
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
// Walker that helps deduce what type of attribute a DS instruction is reading
|
||||
// or writing, which could be an input control point, output control point,
|
||||
// or per-patch constant (PatchConst).
|
||||
// For certain ReadConstBuffer instructions using the tess constants V#,, we visit the users
|
||||
// recursively and increment a counter on the Load/WriteShared users.
|
||||
// Namely NumPatch (from m_hsNumPatch), HsOutputBase (m_hsOutputBase),
|
||||
// and PatchConstBase (m_patchConstBase).
|
||||
// In addr calculations, the term NumPatch * ls_stride * #input_cp_in_patch
|
||||
// is used as an addend to skip the region for input control points, and similarly
|
||||
// NumPatch * hs_cp_stride * #output_cp_in_patch is used to skip the region
|
||||
// for output control points.
|
||||
//
|
||||
// TODO: this will break if AMD compiler used distributive property like
|
||||
// TcsNumPatches * (ls_stride * #input_cp_in_patch + hs_cp_stride * #output_cp_in_patch)
|
||||
class TessConstantUseWalker {
|
||||
public:
|
||||
void MarkTessAttributeUsers(IR::Inst* read_const_buffer, TessConstantAttribute attr) {
|
||||
u32 inc;
|
||||
switch (attr) {
|
||||
case TessConstantAttribute::HsNumPatch:
|
||||
case TessConstantAttribute::HsOutputBase:
|
||||
inc = 1;
|
||||
break;
|
||||
case TessConstantAttribute::PatchConstBase:
|
||||
inc = 2;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
for (IR::Use use : read_const_buffer->Uses()) {
|
||||
MarkTessAttributeUsersHelper(use, inc);
|
||||
}
|
||||
|
||||
++seq_num;
|
||||
}
|
||||
|
||||
private:
|
||||
void MarkTessAttributeUsersHelper(IR::Use use, u32 inc) {
|
||||
IR::Inst* inst = use.user;
|
||||
|
||||
switch (use.user->GetOpcode()) {
|
||||
case IR::Opcode::LoadSharedU32:
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::LoadSharedU128:
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
case IR::Opcode::WriteSharedU128: {
|
||||
u32 counter = inst->Flags<u32>();
|
||||
inst->SetFlags<u32>(counter + inc);
|
||||
// Stop here
|
||||
return;
|
||||
}
|
||||
case IR::Opcode::Phi: {
|
||||
struct PhiCounter {
|
||||
u16 seq_num;
|
||||
u8 unique_edge;
|
||||
u8 counter;
|
||||
};
|
||||
|
||||
PhiCounter count = inst->Flags<PhiCounter>();
|
||||
ASSERT_MSG(count.counter == 0 || count.unique_edge == use.operand);
|
||||
// the point of seq_num is to tell us if we've already traversed this
|
||||
// phi on the current walk. Alternatively we could keep a set of phi's
|
||||
// seen on the current walk. This is to handle phi cycles
|
||||
if (count.seq_num == 0) {
|
||||
// First time we've encountered this phi
|
||||
count.seq_num = seq_num;
|
||||
// Mark the phi as having been traversed originally through this edge
|
||||
count.unique_edge = use.operand;
|
||||
count.counter = inc;
|
||||
} else if (count.seq_num < seq_num) {
|
||||
count.seq_num = seq_num;
|
||||
// For now, assume we are visiting this phi via the same edge
|
||||
// as on other walks. If not, some dataflow analysis might be necessary
|
||||
ASSERT(count.unique_edge == use.operand);
|
||||
count.counter += inc;
|
||||
} else {
|
||||
// count.seq_num == seq_num
|
||||
// there's a cycle, and we've already been here on this walk
|
||||
return;
|
||||
}
|
||||
inst->SetFlags<PhiCounter>(count);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
for (IR::Use use : inst->Uses()) {
|
||||
MarkTessAttributeUsersHelper(use, inc);
|
||||
}
|
||||
}
|
||||
|
||||
u32 seq_num{1u};
|
||||
};
|
||||
|
||||
enum class AttributeRegion : u32 { InputCP, OutputCP, PatchConst };
|
||||
|
||||
static AttributeRegion GetAttributeRegionKind(IR::Inst* ring_access, const Shader::Info& info,
|
||||
const Shader::RuntimeInfo& runtime_info) {
|
||||
u32 count = ring_access->Flags<u32>();
|
||||
if (count == 0) {
|
||||
return AttributeRegion::InputCP;
|
||||
} else if (info.l_stage == LogicalStage::TessellationControl &&
|
||||
runtime_info.hs_info.IsPassthrough()) {
|
||||
ASSERT(count <= 1);
|
||||
return AttributeRegion::PatchConst;
|
||||
} else {
|
||||
ASSERT(count <= 2);
|
||||
return AttributeRegion(count);
|
||||
}
|
||||
}
|
||||
|
||||
static bool IsDivisibleByStride(IR::Value term, u32 stride) {
|
||||
IR::Value a, b;
|
||||
if (MatchU32(stride).Match(term)) {
|
||||
return true;
|
||||
} else if (M_BITFIELDUEXTRACT(MatchValue(a), MatchU32(0), MatchU32(24)).Match(term) ||
|
||||
M_BITFIELDSEXTRACT(MatchValue(a), MatchU32(0), MatchU32(24)).Match(term)) {
|
||||
return IsDivisibleByStride(a, stride);
|
||||
} else if (M_IMUL32(MatchValue(a), MatchValue(b)).Match(term)) {
|
||||
return IsDivisibleByStride(a, stride) || IsDivisibleByStride(b, stride);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Return true if we can eliminate any addends
|
||||
static bool TryOptimizeAddendInModulo(IR::Value addend, u32 stride, std::vector<IR::U32>& addends) {
|
||||
IR::Value a, b;
|
||||
if (M_IADD32(MatchValue(a), MatchValue(b)).Match(addend)) {
|
||||
bool ret = false;
|
||||
ret = TryOptimizeAddendInModulo(a, stride, addends);
|
||||
ret |= TryOptimizeAddendInModulo(b, stride, addends);
|
||||
return ret;
|
||||
} else if (!IsDivisibleByStride(addend, stride)) {
|
||||
addends.push_back(IR::U32{addend});
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// In calculation (a + b + ...) % stride
|
||||
// Use this fact
|
||||
// (a + b) mod N = (a mod N + b mod N) mod N
|
||||
// If any addend is divisible by stride, then we can replace it with 0 in the attribute
|
||||
// or component index calculation
|
||||
static IR::U32 TryOptimizeAddressModulo(IR::U32 addr, u32 stride, IR::IREmitter& ir) {
|
||||
std::vector<IR::U32> addends;
|
||||
if (TryOptimizeAddendInModulo(addr, stride, addends)) {
|
||||
addr = ir.Imm32(0);
|
||||
for (auto& addend : addends) {
|
||||
addr = ir.IAdd(addr, addend);
|
||||
}
|
||||
}
|
||||
return addr;
|
||||
}
|
||||
|
||||
// TODO: can optimize div in control point index similarly to mod
|
||||
|
||||
// Read a TCS input (InputCP region) or TES input (OutputCP region)
|
||||
static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmitter& ir,
|
||||
u32 off_dw) {
|
||||
if (off_dw > 0) {
|
||||
addr = ir.IAdd(addr, ir.Imm32(off_dw));
|
||||
}
|
||||
const IR::U32 control_point_index = ir.IDiv(addr, ir.Imm32(stride));
|
||||
const IR::U32 addr_for_attrs = TryOptimizeAddressModulo(addr, stride, ir);
|
||||
const IR::U32 attr_index =
|
||||
ir.ShiftRightLogical(ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u));
|
||||
const IR::U32 comp_index =
|
||||
ir.ShiftRightLogical(ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u));
|
||||
return ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
|
||||
const Info& info = program.info;
|
||||
|
||||
for (IR::Block* block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
const auto opcode = inst.GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::StoreBufferU32:
|
||||
case IR::Opcode::StoreBufferU32x2:
|
||||
case IR::Opcode::StoreBufferU32x3:
|
||||
case IR::Opcode::StoreBufferU32x4: {
|
||||
const auto info = inst.Flags<IR::BufferInstInfo>();
|
||||
if (!info.globally_coherent) {
|
||||
break;
|
||||
}
|
||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto GetValue = [&](IR::Value data) -> IR::F32 {
|
||||
if (auto* inst = data.TryInstRecursive();
|
||||
inst && inst->GetOpcode() == IR::Opcode::BitCastU32F32) {
|
||||
return IR::F32{inst->Arg(0)};
|
||||
}
|
||||
return ir.BitCast<IR::F32, IR::U32>(IR::U32{data});
|
||||
};
|
||||
const u32 num_dwords = u32(opcode) - u32(IR::Opcode::StoreBufferU32) + 1;
|
||||
IR::U32 index = IR::U32{inst.Arg(1)};
|
||||
ASSERT(index.IsImmediate());
|
||||
const u32 gcn_factor_idx = (info.inst_offset.Value() + index.U32()) >> 2;
|
||||
|
||||
const IR::Value data = inst.Arg(2);
|
||||
auto get_factor_attr = [&](u32 gcn_factor_idx) -> IR::Patch {
|
||||
// The hull outputs tess factors in different formats depending on the shader.
|
||||
// For triangle domains, it seems to pack the entries into 4 consecutive floats,
|
||||
// with the 3 edge factors followed by the 1 interior factor.
|
||||
// For quads, it does 4 edge factors then 2 interior.
|
||||
// There is a tess factor stride member of the GNMX hull constants struct in
|
||||
// a hull program shader binary archive, but this doesn't seem to be
|
||||
// communicated to the driver.
|
||||
// The layout seems to be implied by the type of the abstract domain.
|
||||
switch (runtime_info.hs_info.tess_type) {
|
||||
case AmdGpu::TessellationType::Isoline:
|
||||
ASSERT(gcn_factor_idx < 2);
|
||||
return IR::PatchFactor(gcn_factor_idx);
|
||||
case AmdGpu::TessellationType::Triangle:
|
||||
ASSERT(gcn_factor_idx < 4);
|
||||
if (gcn_factor_idx == 3) {
|
||||
return IR::Patch::TessellationLodInteriorU;
|
||||
}
|
||||
return IR::PatchFactor(gcn_factor_idx);
|
||||
case AmdGpu::TessellationType::Quad:
|
||||
ASSERT(gcn_factor_idx < 6);
|
||||
return IR::PatchFactor(gcn_factor_idx);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
};
|
||||
|
||||
inst.Invalidate();
|
||||
if (num_dwords == 1) {
|
||||
ir.SetPatch(get_factor_attr(gcn_factor_idx), GetValue(data));
|
||||
break;
|
||||
}
|
||||
auto* inst = data.TryInstRecursive();
|
||||
ASSERT(inst && (inst->GetOpcode() == IR::Opcode::CompositeConstructU32x2 ||
|
||||
inst->GetOpcode() == IR::Opcode::CompositeConstructU32x3 ||
|
||||
inst->GetOpcode() == IR::Opcode::CompositeConstructU32x4));
|
||||
for (s32 i = 0; i < num_dwords; i++) {
|
||||
ir.SetPatch(get_factor_attr(gcn_factor_idx + i), GetValue(inst->Arg(i)));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
case IR::Opcode::WriteSharedU128: {
|
||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const u32 num_dwords = opcode == IR::Opcode::WriteSharedU32
|
||||
? 1
|
||||
: (opcode == IR::Opcode::WriteSharedU64 ? 2 : 4);
|
||||
const IR::U32 addr{inst.Arg(0)};
|
||||
const IR::U32 data{inst.Arg(1).Resolve()};
|
||||
|
||||
const auto SetOutput = [&](IR::U32 addr, IR::U32 value, AttributeRegion output_kind,
|
||||
u32 off_dw) {
|
||||
const IR::F32 data_component = ir.BitCast<IR::F32, IR::U32>(value);
|
||||
|
||||
if (output_kind == AttributeRegion::OutputCP) {
|
||||
if (off_dw > 0) {
|
||||
addr = ir.IAdd(addr, ir.Imm32(off_dw));
|
||||
}
|
||||
u32 stride = runtime_info.hs_info.hs_output_cp_stride;
|
||||
// Invocation ID array index is implicit, handled by SPIRV backend
|
||||
const IR::U32 addr_for_attrs = TryOptimizeAddressModulo(addr, stride, ir);
|
||||
const IR::U32 attr_index = ir.ShiftRightLogical(
|
||||
ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u));
|
||||
const IR::U32 comp_index = ir.ShiftRightLogical(
|
||||
ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u));
|
||||
ir.SetTcsGenericAttribute(data_component, attr_index, comp_index);
|
||||
} else {
|
||||
ASSERT(output_kind == AttributeRegion::PatchConst);
|
||||
ASSERT_MSG(addr.IsImmediate(), "patch addr non imm, inst {}",
|
||||
fmt::ptr(addr.Inst()));
|
||||
ir.SetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw), data_component);
|
||||
}
|
||||
};
|
||||
|
||||
AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
|
||||
if (num_dwords == 1) {
|
||||
SetOutput(addr, data, region, 0);
|
||||
} else {
|
||||
for (auto i = 0; i < num_dwords; i++) {
|
||||
SetOutput(addr, IR::U32{data.Inst()->Arg(i)}, region, i);
|
||||
}
|
||||
}
|
||||
inst.Invalidate();
|
||||
break;
|
||||
}
|
||||
|
||||
case IR::Opcode::LoadSharedU32: {
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::LoadSharedU128:
|
||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const IR::U32 addr{inst.Arg(0)};
|
||||
AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
|
||||
const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32
|
||||
? 1
|
||||
: (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4);
|
||||
ASSERT_MSG(region == AttributeRegion::InputCP,
|
||||
"Unhandled read of output or patchconst attribute in hull shader");
|
||||
IR::Value attr_read;
|
||||
if (num_dwords == 1) {
|
||||
attr_read = ir.BitCast<IR::U32>(
|
||||
ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, 0));
|
||||
} else {
|
||||
boost::container::static_vector<IR::Value, 4> read_components;
|
||||
for (auto i = 0; i < num_dwords; i++) {
|
||||
const IR::F32 component =
|
||||
ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, i);
|
||||
read_components.push_back(ir.BitCast<IR::U32>(component));
|
||||
}
|
||||
attr_read = ir.CompositeConstruct(read_components);
|
||||
}
|
||||
inst.ReplaceUsesWithAndRemove(attr_read);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (runtime_info.hs_info.IsPassthrough()) {
|
||||
// Copy input attributes to output attributes, indexed by InvocationID
|
||||
// Passthrough should imply that input and output patches have same number of vertices
|
||||
IR::Block* entry_block = *program.blocks.begin();
|
||||
auto it = std::ranges::find_if(entry_block->Instructions(), [](IR::Inst& inst) {
|
||||
return inst.GetOpcode() == IR::Opcode::Prologue;
|
||||
});
|
||||
ASSERT(it != entry_block->end());
|
||||
++it;
|
||||
ASSERT(it != entry_block->end());
|
||||
++it;
|
||||
// Prologue
|
||||
// SetExec #true
|
||||
// <- insert here
|
||||
// ...
|
||||
IR::IREmitter ir{*entry_block, it};
|
||||
|
||||
ASSERT(runtime_info.hs_info.ls_stride % 16 == 0);
|
||||
u32 num_attributes = runtime_info.hs_info.ls_stride / 16;
|
||||
const auto invocation_id = ir.GetAttributeU32(IR::Attribute::InvocationId);
|
||||
for (u32 attr_no = 0; attr_no < num_attributes; attr_no++) {
|
||||
for (u32 comp = 0; comp < 4; comp++) {
|
||||
IR::F32 attr_read =
|
||||
ir.GetTessGenericAttribute(invocation_id, ir.Imm32(attr_no), ir.Imm32(comp));
|
||||
// InvocationId is implicit index for output control point writes
|
||||
ir.SetTcsGenericAttribute(attr_read, ir.Imm32(attr_no), ir.Imm32(comp));
|
||||
}
|
||||
}
|
||||
// We could wrap the rest of the program in an if stmt
|
||||
// CopyInputAttrsToOutputs(); // psuedocode
|
||||
// if (InvocationId == 0) {
|
||||
// PatchConstFunction();
|
||||
// }
|
||||
// But as long as we treat invocation ID as 0 for all threads, shouldn't matter functionally
|
||||
}
|
||||
}
|
||||
|
||||
void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
|
||||
Info& info = program.info;
|
||||
|
||||
for (IR::Block* block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto opcode = inst.GetOpcode();
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadSharedU32: {
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::LoadSharedU128:
|
||||
const IR::U32 addr{inst.Arg(0)};
|
||||
AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
|
||||
const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32
|
||||
? 1
|
||||
: (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4);
|
||||
const auto GetInput = [&](IR::U32 addr, u32 off_dw) -> IR::F32 {
|
||||
if (region == AttributeRegion::OutputCP) {
|
||||
return ReadTessInputComponent(
|
||||
addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw);
|
||||
} else {
|
||||
ASSERT(region == AttributeRegion::PatchConst);
|
||||
return ir.GetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw));
|
||||
}
|
||||
};
|
||||
IR::Value attr_read;
|
||||
if (num_dwords == 1) {
|
||||
attr_read = ir.BitCast<IR::U32>(GetInput(addr, 0));
|
||||
} else {
|
||||
boost::container::static_vector<IR::Value, 4> read_components;
|
||||
for (auto i = 0; i < num_dwords; i++) {
|
||||
const IR::F32 component = GetInput(addr, i);
|
||||
read_components.push_back(ir.BitCast<IR::U32>(component));
|
||||
}
|
||||
attr_read = ir.CompositeConstruct(read_components);
|
||||
}
|
||||
inst.ReplaceUsesWithAndRemove(attr_read);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Run before either hull or domain transform
|
||||
void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info) {
|
||||
TessellationDataConstantBuffer tess_constants;
|
||||
Shader::Info& info = program.info;
|
||||
// Find the TessellationDataConstantBuffer V#
|
||||
for (IR::Block* block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
auto found_tess_consts_sharp = [&]() -> bool {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadSharedU32:
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::LoadSharedU128:
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
case IR::Opcode::WriteSharedU128: {
|
||||
IR::Value addr = inst.Arg(0);
|
||||
auto read_const_buffer = IR::BreadthFirstSearch(
|
||||
addr, [](IR::Inst* maybe_tess_const) -> std::optional<IR::Inst*> {
|
||||
if (maybe_tess_const->GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
return maybe_tess_const;
|
||||
}
|
||||
return std::nullopt;
|
||||
});
|
||||
if (read_const_buffer) {
|
||||
auto sharp_location = FindTessConstantSharp(read_const_buffer.value());
|
||||
if (sharp_location) {
|
||||
if (info.tess_consts_dword_offset >= 0) {
|
||||
// Its possible theres a readconstbuffer that contributes to an
|
||||
// LDS address and isnt a TessConstant V# read. Could improve on
|
||||
// this somehow
|
||||
ASSERT_MSG(static_cast<s32>(sharp_location->dword_off) ==
|
||||
info.tess_consts_dword_offset &&
|
||||
sharp_location->ptr_base ==
|
||||
info.tess_consts_ptr_base,
|
||||
"TessConstants V# is ambiguous");
|
||||
}
|
||||
InitTessConstants(sharp_location->ptr_base,
|
||||
static_cast<s32>(sharp_location->dword_off), info,
|
||||
runtime_info, tess_constants);
|
||||
return true;
|
||||
}
|
||||
UNREACHABLE_MSG("Failed to match tess constant sharp");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}();
|
||||
|
||||
if (found_tess_consts_sharp) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(info.tess_consts_dword_offset >= 0);
|
||||
|
||||
TessConstantUseWalker walker;
|
||||
|
||||
for (IR::Block* block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
auto sharp_location = FindTessConstantSharp(&inst);
|
||||
if (sharp_location && sharp_location->ptr_base == info.tess_consts_ptr_base &&
|
||||
sharp_location->dword_off == info.tess_consts_dword_offset) {
|
||||
// The shader is reading from the TessConstants V#
|
||||
IR::Value index = inst.Arg(1);
|
||||
|
||||
ASSERT_MSG(index.IsImmediate(),
|
||||
"Tessellation constant read with dynamic index");
|
||||
u32 off_dw = index.U32();
|
||||
ASSERT(off_dw <=
|
||||
static_cast<u32>(TessConstantAttribute::FirstEdgeTessFactorIndex));
|
||||
|
||||
auto tess_const_attr = static_cast<TessConstantAttribute>(off_dw);
|
||||
switch (tess_const_attr) {
|
||||
case TessConstantAttribute::LsStride:
|
||||
// If not, we may need to make this runtime state for TES
|
||||
ASSERT(info.l_stage == LogicalStage::TessellationControl);
|
||||
inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.ls_stride));
|
||||
break;
|
||||
case TessConstantAttribute::HsCpStride:
|
||||
inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.hs_cp_stride));
|
||||
break;
|
||||
case TessConstantAttribute::HsNumPatch:
|
||||
case TessConstantAttribute::HsOutputBase:
|
||||
case TessConstantAttribute::PatchConstBase:
|
||||
walker.MarkTessAttributeUsers(&inst, tess_const_attr);
|
||||
// We should be able to safely set these to 0 so that indexing happens only
|
||||
// within the local patch in the recompiled Vulkan shader. This assumes
|
||||
// these values only contribute to address calculations for in/out
|
||||
// attributes in the original gcn shader.
|
||||
// See the explanation for why we set V2 to 0 when emitting the prologue.
|
||||
inst.ReplaceUsesWithAndRemove(IR::Value(0u));
|
||||
break;
|
||||
case Shader::TessConstantAttribute::PatchConstSize:
|
||||
case Shader::TessConstantAttribute::PatchOutputSize:
|
||||
case Shader::TessConstantAttribute::OffChipTessellationFactorThreshold:
|
||||
case Shader::TessConstantAttribute::FirstEdgeTessFactorIndex:
|
||||
// May need to replace PatchConstSize and PatchOutputSize with 0
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Read past end of TessConstantsBuffer");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// These pattern matching are neccessary for now unless we support dynamic indexing of
|
||||
// PatchConst attributes and tess factors. PatchConst should be easy, turn those into a single
|
||||
// vec4 array like in/out attrs. Not sure about tess factors.
|
||||
if (info.l_stage == LogicalStage::TessellationControl) {
|
||||
// Replace the BFEs on V1 (packed with patch id within VGT and output cp id)
|
||||
for (IR::Block* block : program.blocks) {
|
||||
for (auto it = block->Instructions().begin(); it != block->Instructions().end(); it++) {
|
||||
IR::Inst& inst = *it;
|
||||
if (M_BITFIELDUEXTRACT(
|
||||
M_GETATTRIBUTEU32(MatchAttribute(IR::Attribute::PackedHullInvocationInfo),
|
||||
MatchIgnore()),
|
||||
MatchU32(0), MatchU32(8))
|
||||
.Match(IR::Value{&inst})) {
|
||||
IR::IREmitter emit(*block, it);
|
||||
// This is the patch id within the VGT, not the actual PrimitiveId
|
||||
// in the draw
|
||||
IR::Value replacement(0u);
|
||||
inst.ReplaceUsesWithAndRemove(replacement);
|
||||
} else if (M_BITFIELDUEXTRACT(
|
||||
M_GETATTRIBUTEU32(
|
||||
MatchAttribute(IR::Attribute::PackedHullInvocationInfo),
|
||||
MatchIgnore()),
|
||||
MatchU32(8), MatchU32(5))
|
||||
.Match(IR::Value{&inst})) {
|
||||
IR::IREmitter ir(*block, it);
|
||||
IR::Value replacement;
|
||||
if (runtime_info.hs_info.IsPassthrough()) {
|
||||
// Deal with annoying pattern in BB where InvocationID use makes no
|
||||
// sense (in addr calculation for patchconst or tess factor write)
|
||||
replacement = ir.Imm32(0);
|
||||
} else {
|
||||
replacement = ir.GetAttributeU32(IR::Attribute::InvocationId);
|
||||
}
|
||||
inst.ReplaceUsesWithAndRemove(replacement);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
@ -18,5 +18,8 @@ void CollectShaderInfoPass(IR::Program& program);
|
||||
void LowerSharedMemToRegisters(IR::Program& program);
|
||||
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
|
||||
Stage stage);
|
||||
void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info);
|
||||
void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
|
||||
void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
@ -586,12 +586,13 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
const auto dimensions =
|
||||
unnormalized ? ir.ImageQueryDimension(ir.Imm32(image_binding), ir.Imm32(0u), ir.Imm1(false))
|
||||
: IR::Value{};
|
||||
const auto get_coord = [&](u32 idx, u32 dim_idx) -> IR::Value {
|
||||
const auto coord = get_addr_reg(idx);
|
||||
const auto get_coord = [&](u32 coord_idx, u32 dim_idx) -> IR::Value {
|
||||
const auto coord = get_addr_reg(coord_idx);
|
||||
if (unnormalized) {
|
||||
// Normalize the coordinate for sampling, dividing by its corresponding dimension.
|
||||
return ir.FPDiv(coord,
|
||||
ir.BitCast<IR::F32>(IR::U32{ir.CompositeExtract(dimensions, dim_idx)}));
|
||||
const auto dim =
|
||||
ir.ConvertUToF(32, 32, IR::U32{ir.CompositeExtract(dimensions, dim_idx)});
|
||||
return ir.FPDiv(coord, dim);
|
||||
}
|
||||
return coord;
|
||||
};
|
||||
@ -771,14 +772,16 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
||||
inst.SetArg(1, coords);
|
||||
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageWrite) {
|
||||
inst.SetArg(2, SwizzleVector(ir, image, inst.Arg(2)));
|
||||
inst.SetArg(3, SwizzleVector(ir, image, inst.Arg(3)));
|
||||
}
|
||||
|
||||
if (inst_info.has_lod) {
|
||||
ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch);
|
||||
ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch ||
|
||||
inst.GetOpcode() == IR::Opcode::ImageRead ||
|
||||
inst.GetOpcode() == IR::Opcode::ImageWrite);
|
||||
ASSERT(image.GetType() != AmdGpu::ImageType::Color2DMsaa &&
|
||||
image.GetType() != AmdGpu::ImageType::Color2DMsaaArray);
|
||||
inst.SetArg(3, arg);
|
||||
inst.SetArg(2, arg);
|
||||
} else if (image.GetType() == AmdGpu::ImageType::Color2DMsaa ||
|
||||
image.GetType() == AmdGpu::ImageType::Color2DMsaaArray) {
|
||||
inst.SetArg(4, arg);
|
||||
|
@ -1,11 +1,13 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/ir/opcodes.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/recompiler.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
@ -23,12 +25,45 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
||||
};
|
||||
|
||||
switch (stage) {
|
||||
case Stage::Local: {
|
||||
ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
|
||||
const auto opcode = inst.GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
case IR::Opcode::WriteSharedU32: {
|
||||
bool is_composite = opcode == IR::Opcode::WriteSharedU64;
|
||||
u32 num_components = opcode == IR::Opcode::WriteSharedU32 ? 1 : 2;
|
||||
|
||||
u32 offset = 0;
|
||||
const auto* addr = inst.Arg(0).InstRecursive();
|
||||
if (addr->GetOpcode() == IR::Opcode::IAdd32) {
|
||||
ASSERT(addr->Arg(1).IsImmediate());
|
||||
offset = addr->Arg(1).U32();
|
||||
}
|
||||
IR::Value data = inst.Arg(1).Resolve();
|
||||
for (s32 i = 0; i < num_components; i++) {
|
||||
const auto attrib = IR::Attribute::Param0 + (offset / 16);
|
||||
const auto comp = (offset / 4) % 4;
|
||||
const IR::U32 value = IR::U32{is_composite ? data.Inst()->Arg(i) : data};
|
||||
ir.SetAttribute(attrib, ir.BitCast<IR::F32, IR::U32>(value), comp);
|
||||
offset += 4;
|
||||
}
|
||||
inst.Invalidate();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
});
|
||||
break;
|
||||
}
|
||||
case Stage::Export: {
|
||||
ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
|
||||
const auto opcode = inst.GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::StoreBufferU32: {
|
||||
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
|
||||
const auto info = inst.Flags<IR::BufferInstInfo>();
|
||||
if (!info.system_coherent || !info.globally_coherent) {
|
||||
break;
|
||||
}
|
||||
|
||||
@ -61,12 +96,13 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
||||
const auto opcode = inst.GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::LoadBufferU32: {
|
||||
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
|
||||
const auto info = inst.Flags<IR::BufferInstInfo>();
|
||||
if (!info.system_coherent || !info.globally_coherent) {
|
||||
break;
|
||||
}
|
||||
|
||||
const auto shl_inst = inst.Arg(1).TryInstRecursive();
|
||||
const auto vertex_id = shl_inst->Arg(0).Resolve().U32() >> 2;
|
||||
const auto vertex_id = ir.Imm32(shl_inst->Arg(0).Resolve().U32() >> 2);
|
||||
const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1);
|
||||
const auto bucket = offset.Resolve().U32() / 256u;
|
||||
const auto attrib = bucket < 4 ? IR::Attribute::Position0
|
||||
@ -80,7 +116,8 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::StoreBufferU32: {
|
||||
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
|
||||
const auto buffer_info = inst.Flags<IR::BufferInstInfo>();
|
||||
if (!buffer_info.system_coherent || !buffer_info.globally_coherent) {
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -17,6 +17,22 @@ void Visit(Info& info, IR::Inst& inst) {
|
||||
case IR::Opcode::GetUserData:
|
||||
info.ud_mask.Set(inst.Arg(0).ScalarReg());
|
||||
break;
|
||||
case IR::Opcode::SetPatch: {
|
||||
const auto patch = inst.Arg(0).Patch();
|
||||
if (patch <= IR::Patch::TessellationLodBottom) {
|
||||
info.stores_tess_level_outer = true;
|
||||
} else if (patch <= IR::Patch::TessellationLodInteriorV) {
|
||||
info.stores_tess_level_inner = true;
|
||||
} else {
|
||||
info.uses_patches |= 1U << IR::GenericPatchIndex(patch);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::GetPatch: {
|
||||
const auto patch = inst.Arg(0).Patch();
|
||||
info.uses_patches |= 1U << IR::GenericPatchIndex(patch);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::LoadSharedU32:
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
|
28
src/shader_recompiler/ir/patch.cpp
Normal file
28
src/shader_recompiler/ir/patch.cpp
Normal file
@ -0,0 +1,28 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/ir/patch.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
std::string NameOf(Patch patch) {
|
||||
switch (patch) {
|
||||
case Patch::TessellationLodLeft:
|
||||
return "TessellationLodLeft";
|
||||
case Patch::TessellationLodTop:
|
||||
return "TessellationLodTop";
|
||||
case Patch::TessellationLodRight:
|
||||
return "TessellationLodRight";
|
||||
case Patch::TessellationLodBottom:
|
||||
return "TessellationLodBottom";
|
||||
case Patch::TessellationLodInteriorU:
|
||||
return "TessellationLodInteriorU";
|
||||
case Patch::TessellationLodInteriorV:
|
||||
return "TessellationLodInteriorV";
|
||||
default:
|
||||
const u32 index = u32(patch) - u32(Patch::Component0);
|
||||
return fmt::format("Component{}", index);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::IR
|
173
src/shader_recompiler/ir/patch.h
Normal file
173
src/shader_recompiler/ir/patch.h
Normal file
@ -0,0 +1,173 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include "common/types.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
enum class Patch : u64 {
|
||||
TessellationLodLeft,
|
||||
TessellationLodTop,
|
||||
TessellationLodRight,
|
||||
TessellationLodBottom,
|
||||
TessellationLodInteriorU,
|
||||
TessellationLodInteriorV,
|
||||
Component0,
|
||||
Component1,
|
||||
Component2,
|
||||
Component3,
|
||||
Component4,
|
||||
Component5,
|
||||
Component6,
|
||||
Component7,
|
||||
Component8,
|
||||
Component9,
|
||||
Component10,
|
||||
Component11,
|
||||
Component12,
|
||||
Component13,
|
||||
Component14,
|
||||
Component15,
|
||||
Component16,
|
||||
Component17,
|
||||
Component18,
|
||||
Component19,
|
||||
Component20,
|
||||
Component21,
|
||||
Component22,
|
||||
Component23,
|
||||
Component24,
|
||||
Component25,
|
||||
Component26,
|
||||
Component27,
|
||||
Component28,
|
||||
Component29,
|
||||
Component30,
|
||||
Component31,
|
||||
Component32,
|
||||
Component33,
|
||||
Component34,
|
||||
Component35,
|
||||
Component36,
|
||||
Component37,
|
||||
Component38,
|
||||
Component39,
|
||||
Component40,
|
||||
Component41,
|
||||
Component42,
|
||||
Component43,
|
||||
Component44,
|
||||
Component45,
|
||||
Component46,
|
||||
Component47,
|
||||
Component48,
|
||||
Component49,
|
||||
Component50,
|
||||
Component51,
|
||||
Component52,
|
||||
Component53,
|
||||
Component54,
|
||||
Component55,
|
||||
Component56,
|
||||
Component57,
|
||||
Component58,
|
||||
Component59,
|
||||
Component60,
|
||||
Component61,
|
||||
Component62,
|
||||
Component63,
|
||||
Component64,
|
||||
Component65,
|
||||
Component66,
|
||||
Component67,
|
||||
Component68,
|
||||
Component69,
|
||||
Component70,
|
||||
Component71,
|
||||
Component72,
|
||||
Component73,
|
||||
Component74,
|
||||
Component75,
|
||||
Component76,
|
||||
Component77,
|
||||
Component78,
|
||||
Component79,
|
||||
Component80,
|
||||
Component81,
|
||||
Component82,
|
||||
Component83,
|
||||
Component84,
|
||||
Component85,
|
||||
Component86,
|
||||
Component87,
|
||||
Component88,
|
||||
Component89,
|
||||
Component90,
|
||||
Component91,
|
||||
Component92,
|
||||
Component93,
|
||||
Component94,
|
||||
Component95,
|
||||
Component96,
|
||||
Component97,
|
||||
Component98,
|
||||
Component99,
|
||||
Component100,
|
||||
Component101,
|
||||
Component102,
|
||||
Component103,
|
||||
Component104,
|
||||
Component105,
|
||||
Component106,
|
||||
Component107,
|
||||
Component108,
|
||||
Component109,
|
||||
Component110,
|
||||
Component111,
|
||||
Component112,
|
||||
Component113,
|
||||
Component114,
|
||||
Component115,
|
||||
Component116,
|
||||
Component117,
|
||||
Component118,
|
||||
Component119,
|
||||
};
|
||||
static_assert(static_cast<u64>(Patch::Component119) == 125);
|
||||
|
||||
constexpr bool IsGeneric(Patch patch) noexcept {
|
||||
return patch >= Patch::Component0 && patch <= Patch::Component119;
|
||||
}
|
||||
|
||||
constexpr Patch PatchFactor(u32 index) {
|
||||
return static_cast<Patch>(index);
|
||||
}
|
||||
|
||||
constexpr Patch PatchGeneric(u32 index) {
|
||||
return static_cast<Patch>(static_cast<u32>(Patch::Component0) + index);
|
||||
}
|
||||
|
||||
constexpr u32 GenericPatchIndex(Patch patch) {
|
||||
return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) / 4;
|
||||
}
|
||||
|
||||
constexpr u32 GenericPatchElement(Patch patch) {
|
||||
return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) % 4;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::string NameOf(Patch patch);
|
||||
|
||||
} // namespace Shader::IR
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<Shader::IR::Patch> {
|
||||
constexpr auto parse(format_parse_context& ctx) {
|
||||
return ctx.begin();
|
||||
}
|
||||
auto format(const Shader::IR::Patch patch, format_context& ctx) const {
|
||||
return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(patch));
|
||||
}
|
||||
};
|
127
src/shader_recompiler/ir/pattern_matching.h
Normal file
127
src/shader_recompiler/ir/pattern_matching.h
Normal file
@ -0,0 +1,127 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
|
||||
namespace Shader::Optimiation::PatternMatching {
|
||||
|
||||
// Attempt at pattern matching for Insts and Values
|
||||
// Needs improvement, mostly a convenience
|
||||
|
||||
template <typename Derived>
|
||||
struct MatchObject {
|
||||
inline bool Match(IR::Value v) {
|
||||
return static_cast<Derived*>(this)->Match(v);
|
||||
}
|
||||
};
|
||||
|
||||
struct MatchValue : MatchObject<MatchValue> {
|
||||
MatchValue(IR::Value& return_val_) : return_val(return_val_) {}
|
||||
|
||||
inline bool Match(IR::Value v) {
|
||||
return_val = v;
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
IR::Value& return_val;
|
||||
};
|
||||
|
||||
struct MatchIgnore : MatchObject<MatchIgnore> {
|
||||
MatchIgnore() {}
|
||||
|
||||
inline bool Match(IR::Value v) {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
struct MatchImm : MatchObject<MatchImm> {
|
||||
MatchImm(IR::Value& v) : return_val(v) {}
|
||||
|
||||
inline bool Match(IR::Value v) {
|
||||
if (!v.IsImmediate()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return_val = v;
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
IR::Value& return_val;
|
||||
};
|
||||
|
||||
struct MatchAttribute : MatchObject<MatchAttribute> {
|
||||
MatchAttribute(IR::Attribute attribute_) : attribute(attribute_) {}
|
||||
|
||||
inline bool Match(IR::Value v) {
|
||||
return v.Type() == IR::Type::Attribute && v.Attribute() == attribute;
|
||||
}
|
||||
|
||||
private:
|
||||
IR::Attribute attribute;
|
||||
};
|
||||
|
||||
struct MatchU32 : MatchObject<MatchU32> {
|
||||
MatchU32(u32 imm_) : imm(imm_) {}
|
||||
|
||||
inline bool Match(IR::Value v) {
|
||||
return v.IsImmediate() && v.Type() == IR::Type::U32 && v.U32() == imm;
|
||||
}
|
||||
|
||||
private:
|
||||
u32 imm;
|
||||
};
|
||||
|
||||
template <IR::Opcode opcode, typename... Args>
|
||||
struct MatchInstObject : MatchObject<MatchInstObject<opcode>> {
|
||||
static_assert(sizeof...(Args) == IR::NumArgsOf(opcode));
|
||||
MatchInstObject(Args&&... args) : pattern(std::forward_as_tuple(args...)) {}
|
||||
|
||||
inline bool Match(IR::Value v) {
|
||||
IR::Inst* inst = v.TryInstRecursive();
|
||||
if (!inst || inst->GetOpcode() != opcode) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool matched = true;
|
||||
|
||||
[&]<std::size_t... Is>(std::index_sequence<Is...>) {
|
||||
((matched = matched && std::get<Is>(pattern).Match(inst->Arg(Is))), ...);
|
||||
}(std::make_index_sequence<sizeof...(Args)>{});
|
||||
|
||||
return matched;
|
||||
}
|
||||
|
||||
private:
|
||||
using MatchArgs = std::tuple<Args&...>;
|
||||
MatchArgs pattern;
|
||||
};
|
||||
|
||||
template <IR::Opcode opcode, typename... Args>
|
||||
inline auto MakeInstPattern(Args&&... args) {
|
||||
return MatchInstObject<opcode, Args...>(std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
// Conveniences. TODO probably simpler way of doing this
|
||||
#define M_READCONST(...) MakeInstPattern<IR::Opcode::ReadConst>(__VA_ARGS__)
|
||||
#define M_GETUSERDATA(...) MakeInstPattern<IR::Opcode::GetUserData>(__VA_ARGS__)
|
||||
#define M_BITFIELDUEXTRACT(...) MakeInstPattern<IR::Opcode::BitFieldUExtract>(__VA_ARGS__)
|
||||
#define M_BITFIELDSEXTRACT(...) MakeInstPattern<IR::Opcode::BitFieldSExtract>(__VA_ARGS__)
|
||||
#define M_GETATTRIBUTEU32(...) MakeInstPattern<IR::Opcode::GetAttributeU32>(__VA_ARGS__)
|
||||
#define M_UMOD32(...) MakeInstPattern<IR::Opcode::UMod32>(__VA_ARGS__)
|
||||
#define M_SHIFTRIGHTLOGICAL32(...) MakeInstPattern<IR::Opcode::ShiftRightLogical32>(__VA_ARGS__)
|
||||
#define M_IADD32(...) MakeInstPattern<IR::Opcode::IAdd32>(__VA_ARGS__)
|
||||
#define M_IMUL32(...) MakeInstPattern<IR::Opcode::IMul32>(__VA_ARGS__)
|
||||
#define M_BITWISEAND32(...) MakeInstPattern<IR::Opcode::BitwiseAnd32>(__VA_ARGS__)
|
||||
#define M_GETTESSGENERICATTRIBUTE(...) \
|
||||
MakeInstPattern<IR::Opcode::GetTessGenericAttribute>(__VA_ARGS__)
|
||||
#define M_SETTCSGENERICATTRIBUTE(...) \
|
||||
MakeInstPattern<IR::Opcode::SetTcsGenericAttribute>(__VA_ARGS__)
|
||||
#define M_COMPOSITECONSTRUCTU32X2(...) \
|
||||
MakeInstPattern<IR::Opcode::CompositeConstructU32x2>(__VA_ARGS__)
|
||||
#define M_COMPOSITECONSTRUCTU32X4(...) \
|
||||
MakeInstPattern<IR::Opcode::CompositeConstructU32x4>(__VA_ARGS__)
|
||||
|
||||
} // namespace Shader::Optimiation::PatternMatching
|
@ -49,7 +49,8 @@ union BufferInstInfo {
|
||||
BitField<0, 1, u32> index_enable;
|
||||
BitField<1, 1, u32> offset_enable;
|
||||
BitField<2, 12, u32> inst_offset;
|
||||
BitField<14, 1, u32> ring_access; // global + system coherency
|
||||
BitField<14, 1, u32> system_coherent;
|
||||
BitField<15, 1, u32> globally_coherent;
|
||||
};
|
||||
|
||||
enum class ScalarReg : u32 {
|
||||
|
@ -15,7 +15,7 @@ enum class Type {
|
||||
ScalarReg = 1 << 1,
|
||||
VectorReg = 1 << 2,
|
||||
Attribute = 1 << 3,
|
||||
SystemValue = 1 << 4,
|
||||
Patch = 1 << 4,
|
||||
U1 = 1 << 5,
|
||||
U8 = 1 << 6,
|
||||
U16 = 1 << 7,
|
||||
|
@ -16,6 +16,8 @@ Value::Value(IR::VectorReg reg) noexcept : type{Type::VectorReg}, vreg{reg} {}
|
||||
|
||||
Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {}
|
||||
|
||||
Value::Value(IR::Patch patch) noexcept : type{Type::Patch}, patch{patch} {}
|
||||
|
||||
Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {}
|
||||
|
||||
Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {}
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
#include "shader_recompiler/ir/opcodes.h"
|
||||
#include "shader_recompiler/ir/patch.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/ir/type.h"
|
||||
|
||||
@ -34,6 +35,7 @@ public:
|
||||
explicit Value(IR::ScalarReg reg) noexcept;
|
||||
explicit Value(IR::VectorReg reg) noexcept;
|
||||
explicit Value(IR::Attribute value) noexcept;
|
||||
explicit Value(IR::Patch patch) noexcept;
|
||||
explicit Value(bool value) noexcept;
|
||||
explicit Value(u8 value) noexcept;
|
||||
explicit Value(u16 value) noexcept;
|
||||
@ -56,6 +58,7 @@ public:
|
||||
[[nodiscard]] IR::ScalarReg ScalarReg() const;
|
||||
[[nodiscard]] IR::VectorReg VectorReg() const;
|
||||
[[nodiscard]] IR::Attribute Attribute() const;
|
||||
[[nodiscard]] IR::Patch Patch() const;
|
||||
[[nodiscard]] bool U1() const;
|
||||
[[nodiscard]] u8 U8() const;
|
||||
[[nodiscard]] u16 U16() const;
|
||||
@ -75,6 +78,7 @@ private:
|
||||
IR::ScalarReg sreg;
|
||||
IR::VectorReg vreg;
|
||||
IR::Attribute attribute;
|
||||
IR::Patch patch;
|
||||
bool imm_u1;
|
||||
u8 imm_u8;
|
||||
u16 imm_u16;
|
||||
@ -330,6 +334,11 @@ inline IR::Attribute Value::Attribute() const {
|
||||
return attribute;
|
||||
}
|
||||
|
||||
inline IR::Patch Value::Patch() const {
|
||||
DEBUG_ASSERT(type == Type::Patch);
|
||||
return patch;
|
||||
}
|
||||
|
||||
inline bool Value::U1() const {
|
||||
if (IsIdentity()) {
|
||||
return inst->Arg(0).U1();
|
||||
|
@ -23,6 +23,7 @@ struct Profile {
|
||||
bool support_fp32_denorm_flush{};
|
||||
bool support_explicit_workgroup_layout{};
|
||||
bool support_legacy_vertex_attributes{};
|
||||
bool supports_image_load_store_lod{};
|
||||
bool has_broken_spirv_clamp{};
|
||||
bool lower_left_origin_mode{};
|
||||
bool needs_manual_interpolation{};
|
||||
|
@ -1,6 +1,9 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/config.h"
|
||||
#include "common/io_file.h"
|
||||
#include "common/path_util.h"
|
||||
#include "shader_recompiler/frontend/control_flow_graph.h"
|
||||
#include "shader_recompiler/frontend/decode.h"
|
||||
#include "shader_recompiler/frontend/structured_control_flow.h"
|
||||
@ -29,7 +32,7 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
|
||||
}
|
||||
|
||||
IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info,
|
||||
const RuntimeInfo& runtime_info, const Profile& profile) {
|
||||
RuntimeInfo& runtime_info, const Profile& profile) {
|
||||
// Ensure first instruction is expected.
|
||||
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
|
||||
if (code[0] != token_mov_vcchi) {
|
||||
@ -60,12 +63,29 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
|
||||
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
|
||||
|
||||
// Run optimization passes
|
||||
const auto stage = program.info.stage;
|
||||
|
||||
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
|
||||
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
||||
if (info.l_stage == LogicalStage::TessellationControl) {
|
||||
// Tess passes require previous const prop passes for now (for simplicity). TODO allow
|
||||
// fine grained folding or opportunistic folding we set an operand to an immediate
|
||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||
Shader::Optimization::TessellationPreprocess(program, runtime_info);
|
||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||
Shader::Optimization::HullShaderTransform(program, runtime_info);
|
||||
} else if (info.l_stage == LogicalStage::TessellationEval) {
|
||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||
Shader::Optimization::TessellationPreprocess(program, runtime_info);
|
||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||
Shader::Optimization::DomainShaderTransform(program, runtime_info);
|
||||
}
|
||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||
if (program.info.stage != Stage::Compute) {
|
||||
Shader::Optimization::RingAccessElimination(program, runtime_info, stage);
|
||||
if (stage != Stage::Compute) {
|
||||
Shader::Optimization::LowerSharedMemToRegisters(program);
|
||||
}
|
||||
Shader::Optimization::RingAccessElimination(program, runtime_info, program.info.stage);
|
||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||
Shader::Optimization::FlattenExtendedUserdataPass(program);
|
||||
Shader::Optimization::ResourceTrackingPass(program);
|
||||
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
||||
|
@ -28,6 +28,6 @@ struct Pools {
|
||||
};
|
||||
|
||||
[[nodiscard]] IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info,
|
||||
const RuntimeInfo& runtime_info, const Profile& profile);
|
||||
RuntimeInfo& runtime_info, const Profile& profile);
|
||||
|
||||
} // namespace Shader
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <span>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/frontend/tessellation.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
@ -21,12 +22,31 @@ enum class Stage : u32 {
|
||||
Local,
|
||||
Compute,
|
||||
};
|
||||
constexpr u32 MaxStageTypes = 7;
|
||||
|
||||
// Vertex intentionally comes after TCS/TES due to order of compilation
|
||||
enum class LogicalStage : u32 {
|
||||
Fragment,
|
||||
TessellationControl,
|
||||
TessellationEval,
|
||||
Vertex,
|
||||
Geometry,
|
||||
Compute,
|
||||
NumLogicalStages
|
||||
};
|
||||
|
||||
constexpr u32 MaxStageTypes = static_cast<u32>(LogicalStage::NumLogicalStages);
|
||||
|
||||
[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept {
|
||||
return static_cast<Stage>(index);
|
||||
}
|
||||
|
||||
struct LocalRuntimeInfo {
|
||||
u32 ls_stride;
|
||||
bool links_with_tcs;
|
||||
|
||||
auto operator<=>(const LocalRuntimeInfo&) const noexcept = default;
|
||||
};
|
||||
|
||||
struct ExportRuntimeInfo {
|
||||
u32 vertex_data_size;
|
||||
|
||||
@ -64,9 +84,57 @@ struct VertexRuntimeInfo {
|
||||
u32 num_outputs;
|
||||
std::array<VsOutputMap, 3> outputs;
|
||||
bool emulate_depth_negative_one_to_one{};
|
||||
// Domain
|
||||
AmdGpu::TessellationType tess_type;
|
||||
AmdGpu::TessellationTopology tess_topology;
|
||||
AmdGpu::TessellationPartitioning tess_partitioning;
|
||||
u32 hs_output_cp_stride{};
|
||||
|
||||
bool operator==(const VertexRuntimeInfo& other) const noexcept {
|
||||
return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one;
|
||||
return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one &&
|
||||
tess_type == other.tess_type && tess_topology == other.tess_topology &&
|
||||
tess_partitioning == other.tess_partitioning &&
|
||||
hs_output_cp_stride == other.hs_output_cp_stride;
|
||||
}
|
||||
|
||||
void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
|
||||
hs_output_cp_stride = tess_constants.hs_cp_stride;
|
||||
}
|
||||
};
|
||||
|
||||
struct HullRuntimeInfo {
|
||||
// from registers
|
||||
u32 num_input_control_points;
|
||||
u32 num_threads;
|
||||
AmdGpu::TessellationType tess_type;
|
||||
|
||||
// from tess constants buffer
|
||||
u32 ls_stride;
|
||||
u32 hs_output_cp_stride;
|
||||
u32 hs_output_base;
|
||||
|
||||
auto operator<=>(const HullRuntimeInfo&) const noexcept = default;
|
||||
|
||||
// It might be possible for a non-passthrough TCS to have these conditions, in some
|
||||
// dumb situation.
|
||||
// In that case, it should be fine to assume passthrough and declare some extra
|
||||
// output control points and attributes that shouldnt be read by the TES anyways
|
||||
bool IsPassthrough() const {
|
||||
return hs_output_base == 0 && ls_stride == hs_output_cp_stride && num_threads == 1;
|
||||
};
|
||||
|
||||
// regs.ls_hs_config.hs_output_control_points contains the number of threads, which
|
||||
// isn't exactly the number of output control points.
|
||||
// For passthrough shaders, the register field is set to 1, so use the number of
|
||||
// input control points
|
||||
u32 NumOutputControlPoints() const {
|
||||
return IsPassthrough() ? num_input_control_points : num_threads;
|
||||
}
|
||||
|
||||
void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
|
||||
ls_stride = tess_constants.ls_stride;
|
||||
hs_output_cp_stride = tess_constants.hs_cp_stride;
|
||||
hs_output_base = tess_constants.hs_output_base;
|
||||
}
|
||||
};
|
||||
|
||||
@ -150,8 +218,10 @@ struct RuntimeInfo {
|
||||
AmdGpu::FpDenormMode fp_denorm_mode32;
|
||||
AmdGpu::FpRoundMode fp_round_mode32;
|
||||
union {
|
||||
LocalRuntimeInfo ls_info;
|
||||
ExportRuntimeInfo es_info;
|
||||
VertexRuntimeInfo vs_info;
|
||||
HullRuntimeInfo hs_info;
|
||||
GeometryRuntimeInfo gs_info;
|
||||
FragmentRuntimeInfo fs_info;
|
||||
ComputeRuntimeInfo cs_info;
|
||||
@ -174,6 +244,10 @@ struct RuntimeInfo {
|
||||
return es_info == other.es_info;
|
||||
case Stage::Geometry:
|
||||
return gs_info == other.gs_info;
|
||||
case Stage::Hull:
|
||||
return hs_info == other.hs_info;
|
||||
case Stage::Local:
|
||||
return ls_info == other.ls_info;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
|
@ -127,6 +127,18 @@ struct StageSpecialization {
|
||||
[](auto& spec, const auto& desc, AmdGpu::Sampler sharp) {
|
||||
spec.force_unnormalized = sharp.force_unnormalized;
|
||||
});
|
||||
|
||||
// Initialize runtime_info fields that rely on analysis in tessellation passes
|
||||
if (info->l_stage == LogicalStage::TessellationControl ||
|
||||
info->l_stage == LogicalStage::TessellationEval) {
|
||||
Shader::TessellationDataConstantBuffer tess_constants;
|
||||
info->ReadTessConstantBuffer(tess_constants);
|
||||
if (info->l_stage == LogicalStage::TessellationControl) {
|
||||
runtime_info.hs_info.InitFromTessConstants(tess_constants);
|
||||
} else {
|
||||
runtime_info.vs_info.InitFromTessConstants(tess_constants);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) {
|
||||
|
@ -1,6 +1,8 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <boost/preprocessor/stringize.hpp>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/config.h"
|
||||
#include "common/debug.h"
|
||||
@ -18,7 +20,32 @@ namespace AmdGpu {
|
||||
|
||||
static const char* dcb_task_name{"DCB_TASK"};
|
||||
static const char* ccb_task_name{"CCB_TASK"};
|
||||
static const char* acb_task_name{"ACB_TASK"};
|
||||
|
||||
#define MAX_NAMES 56
|
||||
static_assert(Liverpool::NumComputeRings <= MAX_NAMES);
|
||||
|
||||
#define NAME_NUM(z, n, name) BOOST_PP_STRINGIZE(name) BOOST_PP_STRINGIZE(n),
|
||||
#define NAME_ARRAY(name, num) {BOOST_PP_REPEAT(num, NAME_NUM, name)}
|
||||
|
||||
static const char* acb_task_name[] = NAME_ARRAY(ACB_TASK, MAX_NAMES);
|
||||
|
||||
#define YIELD(name) \
|
||||
FIBER_EXIT; \
|
||||
co_yield {}; \
|
||||
FIBER_ENTER(name);
|
||||
|
||||
#define YIELD_CE() YIELD(ccb_task_name)
|
||||
#define YIELD_GFX() YIELD(dcb_task_name)
|
||||
#define YIELD_ASC(id) YIELD(acb_task_name[id])
|
||||
|
||||
#define RESUME(task, name) \
|
||||
FIBER_EXIT; \
|
||||
task.handle.resume(); \
|
||||
FIBER_ENTER(name);
|
||||
|
||||
#define RESUME_CE(task) RESUME(task, ccb_task_name)
|
||||
#define RESUME_GFX(task) RESUME(task, dcb_task_name)
|
||||
#define RESUME_ASC(task, id) RESUME(task, acb_task_name[id])
|
||||
|
||||
std::array<u8, 48_KB> Liverpool::ConstantEngine::constants_heap;
|
||||
|
||||
@ -60,7 +87,7 @@ void Liverpool::Process(std::stop_token stoken) {
|
||||
|
||||
VideoCore::StartCapture();
|
||||
|
||||
int qid = -1;
|
||||
curr_qid = -1;
|
||||
|
||||
while (num_submits || num_commands) {
|
||||
|
||||
@ -79,9 +106,9 @@ void Liverpool::Process(std::stop_token stoken) {
|
||||
--num_commands;
|
||||
}
|
||||
|
||||
qid = (qid + 1) % NumTotalQueues;
|
||||
curr_qid = (curr_qid + 1) % num_mapped_queues;
|
||||
|
||||
auto& queue = mapped_queues[qid];
|
||||
auto& queue = mapped_queues[curr_qid];
|
||||
|
||||
Task::Handle task{};
|
||||
{
|
||||
@ -119,7 +146,7 @@ void Liverpool::Process(std::stop_token stoken) {
|
||||
}
|
||||
|
||||
Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
||||
TracyFiberEnter(ccb_task_name);
|
||||
FIBER_ENTER(ccb_task_name);
|
||||
|
||||
while (!ccb.empty()) {
|
||||
const auto* header = reinterpret_cast<const PM4Header*>(ccb.data());
|
||||
@ -155,9 +182,7 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
||||
case PM4ItOpcode::WaitOnDeCounterDiff: {
|
||||
const auto diff = it_body[0];
|
||||
while ((cblock.de_count - cblock.ce_count) >= diff) {
|
||||
TracyFiberLeave;
|
||||
co_yield {};
|
||||
TracyFiberEnter(ccb_task_name);
|
||||
YIELD_CE();
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -165,13 +190,12 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
||||
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
||||
auto task =
|
||||
ProcessCeUpdate({indirect_buffer->Address<const u32>(), indirect_buffer->ib_size});
|
||||
while (!task.handle.done()) {
|
||||
task.handle.resume();
|
||||
RESUME_CE(task);
|
||||
|
||||
TracyFiberLeave;
|
||||
co_yield {};
|
||||
TracyFiberEnter(ccb_task_name);
|
||||
};
|
||||
while (!task.handle.done()) {
|
||||
YIELD_CE();
|
||||
RESUME_CE(task);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@ -182,11 +206,11 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
||||
ccb = NextPacket(ccb, header->type3.NumWords() + 1);
|
||||
}
|
||||
|
||||
TracyFiberLeave;
|
||||
FIBER_EXIT;
|
||||
}
|
||||
|
||||
Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb) {
|
||||
TracyFiberEnter(dcb_task_name);
|
||||
FIBER_ENTER(dcb_task_name);
|
||||
|
||||
cblock.Reset();
|
||||
|
||||
@ -197,9 +221,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
if (!ccb.empty()) {
|
||||
// In case of CCB provided kick off CE asap to have the constant heap ready to use
|
||||
ce_task = ProcessCeUpdate(ccb);
|
||||
TracyFiberLeave;
|
||||
ce_task.handle.resume();
|
||||
TracyFiberEnter(dcb_task_name);
|
||||
RESUME_GFX(ce_task);
|
||||
}
|
||||
|
||||
const auto base_addr = reinterpret_cast<uintptr_t>(dcb.data());
|
||||
@ -353,8 +375,18 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
}
|
||||
case PM4ItOpcode::SetShReg: {
|
||||
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
||||
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
|
||||
(count - 1) * sizeof(u32));
|
||||
const auto set_size = (count - 1) * sizeof(u32);
|
||||
|
||||
if (set_data->reg_offset >= 0x200 &&
|
||||
set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) {
|
||||
ASSERT(set_size <= sizeof(ComputeProgram));
|
||||
auto* addr = reinterpret_cast<u32*>(&mapped_queues[GfxQueueId].cs_state) +
|
||||
(set_data->reg_offset - 0x200);
|
||||
std::memcpy(addr, header + 2, set_size);
|
||||
} else {
|
||||
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
|
||||
set_size);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::SetUconfigReg: {
|
||||
@ -474,15 +506,16 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
}
|
||||
case PM4ItOpcode::DispatchDirect: {
|
||||
const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header);
|
||||
regs.cs_program.dim_x = dispatch_direct->dim_x;
|
||||
regs.cs_program.dim_y = dispatch_direct->dim_y;
|
||||
regs.cs_program.dim_z = dispatch_direct->dim_z;
|
||||
regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
||||
auto& cs_program = GetCsRegs();
|
||||
cs_program.dim_x = dispatch_direct->dim_x;
|
||||
cs_program.dim_y = dispatch_direct->dim_y;
|
||||
cs_program.dim_z = dispatch_direct->dim_z;
|
||||
cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
||||
if (DebugState.DumpingCurrentReg()) {
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs,
|
||||
true);
|
||||
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
|
||||
cs_program);
|
||||
}
|
||||
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
|
||||
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:Dispatch", cmd_address));
|
||||
rasterizer->DispatchDirect();
|
||||
@ -493,14 +526,15 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
case PM4ItOpcode::DispatchIndirect: {
|
||||
const auto* dispatch_indirect =
|
||||
reinterpret_cast<const PM4CmdDispatchIndirect*>(header);
|
||||
auto& cs_program = GetCsRegs();
|
||||
const auto offset = dispatch_indirect->data_offset;
|
||||
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
|
||||
const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions);
|
||||
if (DebugState.DumpingCurrentReg()) {
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs,
|
||||
true);
|
||||
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
|
||||
cs_program);
|
||||
}
|
||||
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
|
||||
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(
|
||||
fmt::format("dcb:{}:DispatchIndirect", cmd_address));
|
||||
@ -613,11 +647,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
case PM4ItOpcode::Rewind: {
|
||||
const PM4CmdRewind* rewind = reinterpret_cast<const PM4CmdRewind*>(header);
|
||||
while (!rewind->Valid()) {
|
||||
mapped_queues[GfxQueueId].cs_state = regs.cs_program;
|
||||
TracyFiberLeave;
|
||||
co_yield {};
|
||||
TracyFiberEnter(dcb_task_name);
|
||||
regs.cs_program = mapped_queues[GfxQueueId].cs_state;
|
||||
YIELD_GFX();
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -629,15 +659,12 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
// there are no other submits to yield to we can sleep the thread
|
||||
// instead and allow other tasks to run.
|
||||
const u64* wait_addr = wait_reg_mem->Address<u64*>();
|
||||
if (vo_port->IsVoLabel(wait_addr) && num_submits == 1) {
|
||||
if (vo_port->IsVoLabel(wait_addr) &&
|
||||
num_submits == mapped_queues[GfxQueueId].submits.size()) {
|
||||
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); });
|
||||
}
|
||||
while (!wait_reg_mem->Test()) {
|
||||
mapped_queues[GfxQueueId].cs_state = regs.cs_program;
|
||||
TracyFiberLeave;
|
||||
co_yield {};
|
||||
TracyFiberEnter(dcb_task_name);
|
||||
regs.cs_program = mapped_queues[GfxQueueId].cs_state;
|
||||
YIELD_GFX();
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -645,13 +672,12 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
||||
auto task = ProcessGraphics(
|
||||
{indirect_buffer->Address<const u32>(), indirect_buffer->ib_size}, {});
|
||||
while (!task.handle.done()) {
|
||||
task.handle.resume();
|
||||
RESUME_GFX(task);
|
||||
|
||||
TracyFiberLeave;
|
||||
co_yield {};
|
||||
TracyFiberEnter(dcb_task_name);
|
||||
};
|
||||
while (!task.handle.done()) {
|
||||
YIELD_GFX();
|
||||
RESUME_GFX(task);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::IncrementDeCounter: {
|
||||
@ -660,9 +686,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
}
|
||||
case PM4ItOpcode::WaitOnCeCounter: {
|
||||
while (cblock.ce_count <= cblock.de_count) {
|
||||
TracyFiberLeave;
|
||||
ce_task.handle.resume();
|
||||
TracyFiberEnter(dcb_task_name);
|
||||
RESUME_GFX(ce_task);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -686,11 +710,13 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
ce_task.handle.destroy();
|
||||
}
|
||||
|
||||
TracyFiberLeave;
|
||||
FIBER_EXIT;
|
||||
}
|
||||
|
||||
Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
||||
TracyFiberEnter(acb_task_name);
|
||||
template <bool is_indirect>
|
||||
Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
|
||||
FIBER_ENTER(acb_task_name[vqid]);
|
||||
const auto& queue = asc_queues[{vqid}];
|
||||
|
||||
auto base_addr = reinterpret_cast<uintptr_t>(acb.data());
|
||||
while (!acb.empty()) {
|
||||
@ -711,15 +737,14 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
||||
}
|
||||
case PM4ItOpcode::IndirectBuffer: {
|
||||
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
||||
auto task = ProcessCompute(
|
||||
auto task = ProcessCompute<true>(
|
||||
{indirect_buffer->Address<const u32>(), indirect_buffer->ib_size}, vqid);
|
||||
while (!task.handle.done()) {
|
||||
task.handle.resume();
|
||||
RESUME_ASC(task, vqid);
|
||||
|
||||
TracyFiberLeave;
|
||||
co_yield {};
|
||||
TracyFiberEnter(acb_task_name);
|
||||
};
|
||||
while (!task.handle.done()) {
|
||||
YIELD_ASC(vqid);
|
||||
RESUME_ASC(task, vqid);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DmaData: {
|
||||
@ -757,30 +782,38 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
||||
case PM4ItOpcode::Rewind: {
|
||||
const PM4CmdRewind* rewind = reinterpret_cast<const PM4CmdRewind*>(header);
|
||||
while (!rewind->Valid()) {
|
||||
mapped_queues[vqid].cs_state = regs.cs_program;
|
||||
TracyFiberLeave;
|
||||
co_yield {};
|
||||
TracyFiberEnter(acb_task_name);
|
||||
regs.cs_program = mapped_queues[vqid].cs_state;
|
||||
YIELD_ASC(vqid);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::SetShReg: {
|
||||
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
||||
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
|
||||
(count - 1) * sizeof(u32));
|
||||
const auto set_size = (count - 1) * sizeof(u32);
|
||||
|
||||
if (set_data->reg_offset >= 0x200 &&
|
||||
set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) {
|
||||
ASSERT(set_size <= sizeof(ComputeProgram));
|
||||
auto* addr = reinterpret_cast<u32*>(&mapped_queues[vqid + 1].cs_state) +
|
||||
(set_data->reg_offset - 0x200);
|
||||
std::memcpy(addr, header + 2, set_size);
|
||||
} else {
|
||||
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
|
||||
set_size);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DispatchDirect: {
|
||||
const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header);
|
||||
regs.cs_program.dim_x = dispatch_direct->dim_x;
|
||||
regs.cs_program.dim_y = dispatch_direct->dim_y;
|
||||
regs.cs_program.dim_z = dispatch_direct->dim_z;
|
||||
regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
||||
auto& cs_program = GetCsRegs();
|
||||
cs_program.dim_x = dispatch_direct->dim_x;
|
||||
cs_program.dim_y = dispatch_direct->dim_y;
|
||||
cs_program.dim_z = dispatch_direct->dim_z;
|
||||
cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
||||
if (DebugState.DumpingCurrentReg()) {
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs, true);
|
||||
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
|
||||
cs_program);
|
||||
}
|
||||
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
|
||||
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address));
|
||||
rasterizer->DispatchDirect();
|
||||
@ -803,17 +836,13 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
||||
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
|
||||
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
||||
while (!wait_reg_mem->Test()) {
|
||||
mapped_queues[vqid].cs_state = regs.cs_program;
|
||||
TracyFiberLeave;
|
||||
co_yield {};
|
||||
TracyFiberEnter(acb_task_name);
|
||||
regs.cs_program = mapped_queues[vqid].cs_state;
|
||||
YIELD_ASC(vqid);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::ReleaseMem: {
|
||||
const auto* release_mem = reinterpret_cast<const PM4CmdReleaseMem*>(header);
|
||||
release_mem->SignalFence(Platform::InterruptId::Compute0RelMem); // <---
|
||||
release_mem->SignalFence(static_cast<Platform::InterruptId>(queue.pipe_id));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@ -821,10 +850,16 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
||||
static_cast<u32>(opcode), count);
|
||||
}
|
||||
|
||||
acb = NextPacket(acb, header->type3.NumWords() + 1);
|
||||
const auto packet_size_dw = header->type3.NumWords() + 1;
|
||||
acb = NextPacket(acb, packet_size_dw);
|
||||
|
||||
if constexpr (!is_indirect) {
|
||||
*queue.read_addr += packet_size_dw;
|
||||
*queue.read_addr %= queue.ring_size_dw;
|
||||
}
|
||||
}
|
||||
|
||||
TracyFiberLeave;
|
||||
FIBER_EXIT;
|
||||
}
|
||||
|
||||
std::pair<std::span<const u32>, std::span<const u32>> Liverpool::CopyCmdBuffers(
|
||||
@ -881,10 +916,11 @@ void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb) {
|
||||
submit_cv.notify_one();
|
||||
}
|
||||
|
||||
void Liverpool::SubmitAsc(u32 vqid, std::span<const u32> acb) {
|
||||
ASSERT_MSG(vqid >= 0 && vqid < NumTotalQueues, "Invalid virtual ASC queue index");
|
||||
auto& queue = mapped_queues[vqid];
|
||||
void Liverpool::SubmitAsc(u32 gnm_vqid, std::span<const u32> acb) {
|
||||
ASSERT_MSG(gnm_vqid > 0 && gnm_vqid < NumTotalQueues, "Invalid virtual ASC queue index");
|
||||
auto& queue = mapped_queues[gnm_vqid];
|
||||
|
||||
const auto vqid = gnm_vqid - 1;
|
||||
const auto& task = ProcessCompute(acb, vqid);
|
||||
{
|
||||
std::scoped_lock lock{queue.m_access};
|
||||
@ -892,6 +928,7 @@ void Liverpool::SubmitAsc(u32 vqid, std::span<const u32> acb) {
|
||||
}
|
||||
|
||||
std::scoped_lock lk{submit_mutex};
|
||||
num_mapped_queues = std::max(num_mapped_queues, gnm_vqid + 1);
|
||||
++num_submits;
|
||||
submit_cv.notify_one();
|
||||
}
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/polyfill_thread.h"
|
||||
#include "common/slot_vector.h"
|
||||
#include "common/types.h"
|
||||
#include "common/unique_function.h"
|
||||
#include "shader_recompiler/params.h"
|
||||
@ -45,7 +46,8 @@ struct Liverpool {
|
||||
static constexpr u32 NumGfxRings = 1u; // actually 2, but HP is reserved by system software
|
||||
static constexpr u32 NumComputePipes = 7u; // actually 8, but #7 is reserved by system software
|
||||
static constexpr u32 NumQueuesPerPipe = 8u;
|
||||
static constexpr u32 NumTotalQueues = NumGfxRings + (NumComputePipes * NumQueuesPerPipe);
|
||||
static constexpr u32 NumComputeRings = NumComputePipes * NumQueuesPerPipe;
|
||||
static constexpr u32 NumTotalQueues = NumGfxRings + NumComputeRings;
|
||||
static_assert(NumTotalQueues < 64u); // need to fit into u64 bitmap for ffs
|
||||
|
||||
static constexpr u32 NumColorBuffers = 8;
|
||||
@ -143,6 +145,13 @@ struct Liverpool {
|
||||
}
|
||||
};
|
||||
|
||||
struct HsTessFactorClamp {
|
||||
// I've only seen min=0.0, max=1.0 so far.
|
||||
// TODO why is max set to 1.0? Makes no sense
|
||||
float hs_max_tess;
|
||||
float hs_min_tess;
|
||||
};
|
||||
|
||||
struct ComputeProgram {
|
||||
u32 dispatch_initiator;
|
||||
u32 dim_x;
|
||||
@ -956,6 +965,7 @@ struct Liverpool {
|
||||
enum VgtStages : u32 {
|
||||
Vs = 0u, // always enabled
|
||||
EsGs = 0xB0u,
|
||||
LsHs = 0x45u,
|
||||
};
|
||||
|
||||
VgtStages raw;
|
||||
@ -963,7 +973,8 @@ struct Liverpool {
|
||||
BitField<2, 1, u32> hs_en;
|
||||
BitField<3, 2, u32> es_en;
|
||||
BitField<5, 1, u32> gs_en;
|
||||
BitField<6, 1, u32> vs_en;
|
||||
BitField<6, 2, u32> vs_en;
|
||||
BitField<8, 1, u32> dynamic_hs;
|
||||
|
||||
bool IsStageEnabled(u32 stage) const {
|
||||
switch (stage) {
|
||||
@ -1059,6 +1070,28 @@ struct Liverpool {
|
||||
};
|
||||
};
|
||||
|
||||
union LsHsConfig {
|
||||
u32 raw;
|
||||
BitField<0, 8, u32> num_patches;
|
||||
BitField<8, 6, u32> hs_input_control_points;
|
||||
BitField<14, 6, u32> hs_output_control_points;
|
||||
};
|
||||
|
||||
union TessellationConfig {
|
||||
u32 raw;
|
||||
BitField<0, 2, TessellationType> type;
|
||||
BitField<2, 3, TessellationPartitioning> partitioning;
|
||||
BitField<5, 3, TessellationTopology> topology;
|
||||
};
|
||||
|
||||
union TessFactorMemoryBase {
|
||||
u32 base;
|
||||
|
||||
u64 MemoryBase() const {
|
||||
return static_cast<u64>(base) << 8;
|
||||
}
|
||||
};
|
||||
|
||||
union Eqaa {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> max_anchor_samples;
|
||||
@ -1109,10 +1142,10 @@ struct Liverpool {
|
||||
ShaderProgram es_program;
|
||||
INSERT_PADDING_WORDS(0x2C);
|
||||
ShaderProgram hs_program;
|
||||
INSERT_PADDING_WORDS(0x2C);
|
||||
INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20);
|
||||
ShaderProgram ls_program;
|
||||
INSERT_PADDING_WORDS(0xA4);
|
||||
ComputeProgram cs_program;
|
||||
ComputeProgram cs_program; // shadowed by `cs_state` in `mapped_queues`
|
||||
INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5);
|
||||
DepthRenderControl depth_render_control;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
@ -1176,7 +1209,9 @@ struct Liverpool {
|
||||
PolygonControl polygon_control;
|
||||
ViewportControl viewport_control;
|
||||
VsOutputControl vs_output_control;
|
||||
INSERT_PADDING_WORDS(0xA290 - 0xA207 - 1);
|
||||
INSERT_PADDING_WORDS(0xA287 - 0xA207 - 1);
|
||||
HsTessFactorClamp hs_clamp;
|
||||
INSERT_PADDING_WORDS(0xA290 - 0xA287 - 2);
|
||||
GsMode vgt_gs_mode;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
ModeControl mode_control;
|
||||
@ -1200,9 +1235,10 @@ struct Liverpool {
|
||||
BitField<0, 11, u32> vgt_gs_max_vert_out;
|
||||
INSERT_PADDING_WORDS(0xA2D5 - 0xA2CE - 1);
|
||||
ShaderStageEnable stage_enable;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
LsHsConfig ls_hs_config;
|
||||
u32 vgt_gs_vert_itemsize[4];
|
||||
INSERT_PADDING_WORDS(4);
|
||||
TessellationConfig tess_config;
|
||||
INSERT_PADDING_WORDS(3);
|
||||
PolygonOffset poly_offset;
|
||||
GsInstances vgt_gs_instance_cnt;
|
||||
StreamOutConfig vgt_strmout_config;
|
||||
@ -1216,6 +1252,8 @@ struct Liverpool {
|
||||
INSERT_PADDING_WORDS(0xC24C - 0xC243);
|
||||
u32 num_indices;
|
||||
VgtNumInstances num_instances;
|
||||
INSERT_PADDING_WORDS(0xC250 - 0xC24D - 1);
|
||||
TessFactorMemoryBase vgt_tf_memory_base;
|
||||
};
|
||||
std::array<u32, NumRegs> reg_array{};
|
||||
|
||||
@ -1262,7 +1300,7 @@ public:
|
||||
~Liverpool();
|
||||
|
||||
void SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb);
|
||||
void SubmitAsc(u32 vqid, std::span<const u32> acb);
|
||||
void SubmitAsc(u32 gnm_vqid, std::span<const u32> acb);
|
||||
|
||||
void SubmitDone() noexcept {
|
||||
std::scoped_lock lk{submit_mutex};
|
||||
@ -1305,6 +1343,18 @@ public:
|
||||
gfx_queue.dcb_buffer.reserve(GfxReservedSize);
|
||||
}
|
||||
|
||||
inline ComputeProgram& GetCsRegs() {
|
||||
return mapped_queues[curr_qid].cs_state;
|
||||
}
|
||||
|
||||
struct AscQueueInfo {
|
||||
VAddr map_addr;
|
||||
u32* read_addr;
|
||||
u32 ring_size_dw;
|
||||
u32 pipe_id;
|
||||
};
|
||||
Common::SlotVector<AscQueueInfo> asc_queues{};
|
||||
|
||||
private:
|
||||
struct Task {
|
||||
struct promise_type {
|
||||
@ -1342,7 +1392,8 @@ private:
|
||||
std::span<const u32> ccb);
|
||||
Task ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb);
|
||||
Task ProcessCeUpdate(std::span<const u32> ccb);
|
||||
Task ProcessCompute(std::span<const u32> acb, int vqid);
|
||||
template <bool is_indirect = false>
|
||||
Task ProcessCompute(std::span<const u32> acb, u32 vqid);
|
||||
|
||||
void Process(std::stop_token stoken);
|
||||
|
||||
@ -1357,6 +1408,7 @@ private:
|
||||
VAddr indirect_args_addr{};
|
||||
};
|
||||
std::array<GpuQueue, NumTotalQueues> mapped_queues{};
|
||||
u32 num_mapped_queues{1u}; // GFX is always available
|
||||
|
||||
struct ConstantEngine {
|
||||
void Reset() {
|
||||
@ -1385,6 +1437,7 @@ private:
|
||||
std::mutex submit_mutex;
|
||||
std::condition_variable_any submit_cv;
|
||||
std::queue<Common::UniqueFunction<void>> command_queue{};
|
||||
int curr_qid{-1};
|
||||
};
|
||||
|
||||
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
|
||||
@ -1431,6 +1484,7 @@ static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202);
|
||||
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
|
||||
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
|
||||
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
|
||||
static_assert(GFX6_3D_REG_INDEX(hs_clamp) == 0xA287);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290);
|
||||
static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_out_prim_type) == 0xA29B);
|
||||
@ -1445,6 +1499,7 @@ static_assert(GFX6_3D_REG_INDEX(vgt_gsvs_ring_itemsize) == 0xA2AC);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_max_vert_out) == 0xA2CE);
|
||||
static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_vert_itemsize[0]) == 0xA2D7);
|
||||
static_assert(GFX6_3D_REG_INDEX(tess_config) == 0xA2DB);
|
||||
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_instance_cnt) == 0xA2E4);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_strmout_config) == 0xA2E5);
|
||||
@ -1456,6 +1511,7 @@ static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381);
|
||||
static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242);
|
||||
static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_tf_memory_base) == 0xc250);
|
||||
|
||||
#undef GFX6_3D_REG_INDEX
|
||||
|
||||
|
@ -3,6 +3,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string_view>
|
||||
#include <fmt/format.h>
|
||||
#include "common/types.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
@ -21,6 +23,69 @@ enum class FpDenormMode : u32 {
|
||||
InOutAllow = 3,
|
||||
};
|
||||
|
||||
enum class TessellationType : u32 {
|
||||
Isoline = 0,
|
||||
Triangle = 1,
|
||||
Quad = 2,
|
||||
};
|
||||
|
||||
constexpr std::string_view NameOf(TessellationType type) {
|
||||
switch (type) {
|
||||
case TessellationType::Isoline:
|
||||
return "Isoline";
|
||||
case TessellationType::Triangle:
|
||||
return "Triangle";
|
||||
case TessellationType::Quad:
|
||||
return "Quad";
|
||||
default:
|
||||
return "Unknown";
|
||||
}
|
||||
}
|
||||
|
||||
enum class TessellationPartitioning : u32 {
|
||||
Integer = 0,
|
||||
Pow2 = 1,
|
||||
FracOdd = 2,
|
||||
FracEven = 3,
|
||||
};
|
||||
|
||||
constexpr std::string_view NameOf(TessellationPartitioning partitioning) {
|
||||
switch (partitioning) {
|
||||
case TessellationPartitioning::Integer:
|
||||
return "Integer";
|
||||
case TessellationPartitioning::Pow2:
|
||||
return "Pow2";
|
||||
case TessellationPartitioning::FracOdd:
|
||||
return "FracOdd";
|
||||
case TessellationPartitioning::FracEven:
|
||||
return "FracEven";
|
||||
default:
|
||||
return "Unknown";
|
||||
}
|
||||
}
|
||||
|
||||
enum class TessellationTopology : u32 {
|
||||
Point = 0,
|
||||
Line = 1,
|
||||
TriangleCw = 2,
|
||||
TriangleCcw = 3,
|
||||
};
|
||||
|
||||
constexpr std::string_view NameOf(TessellationTopology topology) {
|
||||
switch (topology) {
|
||||
case TessellationTopology::Point:
|
||||
return "Point";
|
||||
case TessellationTopology::Line:
|
||||
return "Line";
|
||||
case TessellationTopology::TriangleCw:
|
||||
return "TriangleCw";
|
||||
case TessellationTopology::TriangleCcw:
|
||||
return "TriangleCcw";
|
||||
default:
|
||||
return "Unknown";
|
||||
}
|
||||
}
|
||||
|
||||
// See `VGT_PRIMITIVE_TYPE` description in [Radeon Sea Islands 3D/Compute Register Reference Guide]
|
||||
enum class PrimitiveType : u32 {
|
||||
None = 0,
|
||||
@ -118,3 +183,33 @@ enum class NumberFormat : u32 {
|
||||
};
|
||||
|
||||
} // namespace AmdGpu
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<AmdGpu::TessellationType> {
|
||||
constexpr auto parse(format_parse_context& ctx) {
|
||||
return ctx.begin();
|
||||
}
|
||||
auto format(AmdGpu::TessellationType type, format_context& ctx) const {
|
||||
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<AmdGpu::TessellationPartitioning> {
|
||||
constexpr auto parse(format_parse_context& ctx) {
|
||||
return ctx.begin();
|
||||
}
|
||||
auto format(AmdGpu::TessellationPartitioning type, format_context& ctx) const {
|
||||
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<AmdGpu::TessellationTopology> {
|
||||
constexpr auto parse(format_parse_context& ctx) {
|
||||
return ctx.begin();
|
||||
}
|
||||
auto format(AmdGpu::TessellationTopology type, format_context& ctx) const {
|
||||
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
|
||||
}
|
||||
};
|
||||
|
@ -3,10 +3,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(__APPLE__) && !USE_SYSTEM_VULKAN_LOADER
|
||||
#define VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL 0
|
||||
#endif
|
||||
|
||||
// Include vulkan-hpp header
|
||||
#define VK_ENABLE_BETA_EXTENSIONS
|
||||
#define VK_NO_PROTOTYPES
|
||||
|
@ -16,7 +16,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
|
||||
ComputePipelineKey compute_key_, const Shader::Info& info_,
|
||||
vk::ShaderModule module)
|
||||
: Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache, true}, compute_key{compute_key_} {
|
||||
auto& info = stages[int(Shader::Stage::Compute)];
|
||||
auto& info = stages[int(Shader::LogicalStage::Compute)];
|
||||
info = &info_;
|
||||
|
||||
const vk::PipelineShaderStageCreateInfo shader_ci = {
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
@ -52,7 +53,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||
boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings;
|
||||
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes;
|
||||
if (fetch_shader && !instance.IsVertexInputDynamicState()) {
|
||||
const auto& vs_info = GetStage(Shader::Stage::Vertex);
|
||||
const auto& vs_info = GetStage(Shader::LogicalStage::Vertex);
|
||||
for (const auto& attrib : fetch_shader->attributes) {
|
||||
if (attrib.UsesStepRates()) {
|
||||
// Skip attribute binding as the data will be pulled by shader
|
||||
@ -106,6 +107,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||
key.primitive_restart_index == 0xFFFFFFFF,
|
||||
"Primitive restart index other than -1 is not supported yet");
|
||||
|
||||
const vk::PipelineTessellationStateCreateInfo tessellation_state = {
|
||||
.patchControlPoints = key.patch_control_points,
|
||||
};
|
||||
|
||||
const vk::PipelineRasterizationStateCreateInfo raster_state = {
|
||||
.depthClampEnable = false,
|
||||
.rasterizerDiscardEnable = false,
|
||||
@ -204,7 +209,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||
|
||||
boost::container::static_vector<vk::PipelineShaderStageCreateInfo, MaxShaderStages>
|
||||
shader_stages;
|
||||
auto stage = u32(Shader::Stage::Vertex);
|
||||
auto stage = u32(Shader::LogicalStage::Vertex);
|
||||
if (infos[stage]) {
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eVertex,
|
||||
@ -212,7 +217,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||
.pName = "main",
|
||||
});
|
||||
}
|
||||
stage = u32(Shader::Stage::Geometry);
|
||||
stage = u32(Shader::LogicalStage::Geometry);
|
||||
if (infos[stage]) {
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eGeometry,
|
||||
@ -220,7 +225,23 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||
.pName = "main",
|
||||
});
|
||||
}
|
||||
stage = u32(Shader::Stage::Fragment);
|
||||
stage = u32(Shader::LogicalStage::TessellationControl);
|
||||
if (infos[stage]) {
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eTessellationControl,
|
||||
.module = modules[stage],
|
||||
.pName = "main",
|
||||
});
|
||||
}
|
||||
stage = u32(Shader::LogicalStage::TessellationEval);
|
||||
if (infos[stage]) {
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eTessellationEvaluation,
|
||||
.module = modules[stage],
|
||||
.pName = "main",
|
||||
});
|
||||
}
|
||||
stage = u32(Shader::LogicalStage::Fragment);
|
||||
if (infos[stage]) {
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eFragment,
|
||||
@ -301,6 +322,8 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||
.pStages = shader_stages.data(),
|
||||
.pVertexInputState = !instance.IsVertexInputDynamicState() ? &vertex_input_info : nullptr,
|
||||
.pInputAssemblyState = &input_assembly,
|
||||
.pTessellationState =
|
||||
stages[u32(Shader::LogicalStage::TessellationControl)] ? &tessellation_state : nullptr,
|
||||
.pViewportState = &viewport_info,
|
||||
.pRasterizationState = &raster_state,
|
||||
.pMultisampleState = &multisampling,
|
||||
@ -327,7 +350,6 @@ void GraphicsPipeline::BuildDescSetLayout() {
|
||||
if (!stage) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (stage->has_readconst) {
|
||||
bindings.push_back({
|
||||
.binding = binding++,
|
||||
|
@ -52,6 +52,7 @@ struct GraphicsPipelineKey {
|
||||
std::array<Liverpool::BlendControl, Liverpool::NumColorBuffers> blend_controls;
|
||||
std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks;
|
||||
std::array<vk::Format, MaxVertexBufferCount> vertex_buffer_formats;
|
||||
u32 patch_control_points;
|
||||
|
||||
bool operator==(const GraphicsPipelineKey& key) const noexcept {
|
||||
return std::memcmp(this, &key, sizeof(key)) == 0;
|
||||
@ -73,7 +74,7 @@ public:
|
||||
|
||||
bool IsEmbeddedVs() const noexcept {
|
||||
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
|
||||
return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash;
|
||||
return key.stage_hashes[u32(Shader::LogicalStage::Vertex)] == EmbeddedVsHash;
|
||||
}
|
||||
|
||||
auto GetWriteMasks() const {
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/config.h"
|
||||
#include "common/debug.h"
|
||||
#include "sdl_window.h"
|
||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
@ -68,11 +69,10 @@ std::unordered_map<vk::Format, vk::FormatProperties3> GetFormatProperties(
|
||||
}
|
||||
// Other miscellaneous formats, e.g. for color buffers, swizzles, or compatibility
|
||||
static constexpr std::array misc_formats = {
|
||||
vk::Format::eA2R10G10B10UnormPack32, vk::Format::eA8B8G8R8UnormPack32,
|
||||
vk::Format::eA8B8G8R8SrgbPack32, vk::Format::eB8G8R8A8Unorm,
|
||||
vk::Format::eB8G8R8A8Snorm, vk::Format::eB8G8R8A8Uint,
|
||||
vk::Format::eB8G8R8A8Sint, vk::Format::eB8G8R8A8Srgb,
|
||||
vk::Format::eR5G6B5UnormPack16, vk::Format::eD24UnormS8Uint,
|
||||
vk::Format::eA2R10G10B10UnormPack32,
|
||||
vk::Format::eB8G8R8A8Unorm,
|
||||
vk::Format::eB8G8R8A8Srgb,
|
||||
vk::Format::eD24UnormS8Uint,
|
||||
};
|
||||
for (const auto& format : misc_formats) {
|
||||
if (!format_properties.contains(format)) {
|
||||
@ -262,11 +262,13 @@ bool Instance::CreateDevice() {
|
||||
// The next two extensions are required to be available together in order to support write masks
|
||||
color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
|
||||
color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
||||
const bool calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME);
|
||||
const bool calibrated_timestamps =
|
||||
TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false;
|
||||
const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
|
||||
list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME);
|
||||
maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
|
||||
legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME);
|
||||
image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME);
|
||||
|
||||
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
|
||||
// with extensions.
|
||||
@ -327,6 +329,7 @@ bool Instance::CreateDevice() {
|
||||
.imageCubeArray = features.imageCubeArray,
|
||||
.independentBlend = features.independentBlend,
|
||||
.geometryShader = features.geometryShader,
|
||||
.tessellationShader = features.tessellationShader,
|
||||
.logicOp = features.logicOp,
|
||||
.depthBiasClamp = features.depthBiasClamp,
|
||||
.fillModeNonSolid = features.fillModeNonSolid,
|
||||
@ -580,42 +583,22 @@ bool Instance::IsFormatSupported(const vk::Format format,
|
||||
return (GetFormatFeatureFlags(format) & flags) == flags;
|
||||
}
|
||||
|
||||
static vk::Format GetAlternativeFormat(const vk::Format format) {
|
||||
switch (format) {
|
||||
case vk::Format::eB5G6R5UnormPack16:
|
||||
return vk::Format::eR5G6B5UnormPack16;
|
||||
case vk::Format::eD16UnormS8Uint:
|
||||
return vk::Format::eD24UnormS8Uint;
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
vk::Format Instance::GetSupportedFormat(const vk::Format format,
|
||||
const vk::FormatFeatureFlags2 flags) const {
|
||||
if (IsFormatSupported(format, flags)) [[likely]] {
|
||||
return format;
|
||||
}
|
||||
const vk::Format alternative = GetAlternativeFormat(format);
|
||||
if (IsFormatSupported(alternative, flags)) [[likely]] {
|
||||
return alternative;
|
||||
if (!IsFormatSupported(format, flags)) [[unlikely]] {
|
||||
switch (format) {
|
||||
case vk::Format::eD16UnormS8Uint:
|
||||
if (IsFormatSupported(vk::Format::eD24UnormS8Uint, flags)) {
|
||||
return vk::Format::eD24UnormS8Uint;
|
||||
}
|
||||
if (IsFormatSupported(vk::Format::eD32SfloatS8Uint, flags)) {
|
||||
return vk::Format::eD32SfloatS8Uint;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return format;
|
||||
}
|
||||
|
||||
vk::ComponentMapping Instance::GetSupportedComponentSwizzle(
|
||||
const vk::Format format, const vk::ComponentMapping swizzle,
|
||||
const vk::FormatFeatureFlags2 flags) const {
|
||||
if (IsFormatSupported(format, flags)) [[likely]] {
|
||||
return swizzle;
|
||||
}
|
||||
|
||||
vk::ComponentMapping supported_swizzle = swizzle;
|
||||
if (format == vk::Format::eB5G6R5UnormPack16) {
|
||||
// B5G6R5 -> R5G6B5
|
||||
std::swap(supported_swizzle.r, supported_swizzle.b);
|
||||
}
|
||||
return supported_swizzle;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -33,10 +33,6 @@ public:
|
||||
[[nodiscard]] vk::Format GetSupportedFormat(vk::Format format,
|
||||
vk::FormatFeatureFlags2 flags) const;
|
||||
|
||||
/// Re-orders a component swizzle for format compatibility, if needed.
|
||||
[[nodiscard]] vk::ComponentMapping GetSupportedComponentSwizzle(
|
||||
vk::Format format, vk::ComponentMapping swizzle, vk::FormatFeatureFlags2 flags) const;
|
||||
|
||||
/// Returns the Vulkan instance
|
||||
vk::Instance GetInstance() const {
|
||||
return *instance;
|
||||
@ -158,6 +154,11 @@ public:
|
||||
return legacy_vertex_attributes;
|
||||
}
|
||||
|
||||
/// Returns true when VK_AMD_shader_image_load_store_lod is supported.
|
||||
bool IsImageLoadStoreLodSupported() const {
|
||||
return image_load_store_lod;
|
||||
}
|
||||
|
||||
/// Returns true when geometry shaders are supported by the device
|
||||
bool IsGeometryStageSupported() const {
|
||||
return features.geometryShader;
|
||||
@ -327,6 +328,7 @@ private:
|
||||
bool maintenance5{};
|
||||
bool list_restart{};
|
||||
bool legacy_vertex_attributes{};
|
||||
bool image_load_store_lod{};
|
||||
u64 min_imported_host_pointer_alignment{};
|
||||
u32 subgroup_size{};
|
||||
bool tooling_info{};
|
||||
|
@ -22,6 +22,8 @@ extern std::unique_ptr<Vulkan::Presenter> presenter;
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using Shader::LogicalStage;
|
||||
using Shader::Stage;
|
||||
using Shader::VsOutput;
|
||||
|
||||
constexpr static std::array DescriptorHeapSizes = {
|
||||
@ -78,7 +80,7 @@ void GatherVertexOutputs(Shader::VertexRuntimeInfo& info,
|
||||
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
|
||||
}
|
||||
|
||||
Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
||||
Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_stage) {
|
||||
auto info = Shader::RuntimeInfo{stage};
|
||||
const auto& regs = liverpool->regs;
|
||||
const auto BuildCommon = [&](const auto& program) {
|
||||
@ -89,20 +91,47 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
||||
info.fp_round_mode32 = program.settings.fp_round_mode32;
|
||||
};
|
||||
switch (stage) {
|
||||
case Shader::Stage::Export: {
|
||||
case Stage::Local: {
|
||||
BuildCommon(regs.ls_program);
|
||||
if (regs.stage_enable.IsStageEnabled(static_cast<u32>(Stage::Hull))) {
|
||||
info.ls_info.links_with_tcs = true;
|
||||
Shader::TessellationDataConstantBuffer tess_constants;
|
||||
const auto* pgm = regs.ProgramForStage(static_cast<u32>(Stage::Hull));
|
||||
const auto params = Liverpool::GetParams(*pgm);
|
||||
const auto& hull_info = program_cache.at(params.hash)->info;
|
||||
hull_info.ReadTessConstantBuffer(tess_constants);
|
||||
info.ls_info.ls_stride = tess_constants.ls_stride;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Stage::Hull: {
|
||||
BuildCommon(regs.hs_program);
|
||||
info.hs_info.num_input_control_points = regs.ls_hs_config.hs_input_control_points.Value();
|
||||
info.hs_info.num_threads = regs.ls_hs_config.hs_output_control_points.Value();
|
||||
info.hs_info.tess_type = regs.tess_config.type;
|
||||
|
||||
// We need to initialize most hs_info fields after finding the V# with tess constants
|
||||
break;
|
||||
}
|
||||
case Stage::Export: {
|
||||
BuildCommon(regs.es_program);
|
||||
info.es_info.vertex_data_size = regs.vgt_esgs_ring_itemsize;
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Vertex: {
|
||||
case Stage::Vertex: {
|
||||
BuildCommon(regs.vs_program);
|
||||
GatherVertexOutputs(info.vs_info, regs.vs_output_control);
|
||||
info.vs_info.emulate_depth_negative_one_to_one =
|
||||
!instance.IsDepthClipControlSupported() &&
|
||||
regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW;
|
||||
if (l_stage == LogicalStage::TessellationEval) {
|
||||
info.vs_info.tess_type = regs.tess_config.type;
|
||||
info.vs_info.tess_topology = regs.tess_config.topology;
|
||||
info.vs_info.tess_partitioning = regs.tess_config.partitioning;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Geometry: {
|
||||
case Stage::Geometry: {
|
||||
BuildCommon(regs.gs_program);
|
||||
auto& gs_info = info.gs_info;
|
||||
gs_info.output_vertices = regs.vgt_gs_max_vert_out;
|
||||
@ -121,7 +150,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
||||
DumpShader(gs_info.vs_copy, gs_info.vs_copy_hash, Shader::Stage::Vertex, 0, "copy.bin");
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Fragment: {
|
||||
case Stage::Fragment: {
|
||||
BuildCommon(regs.ps_program);
|
||||
info.fs_info.en_flags = regs.ps_input_ena;
|
||||
info.fs_info.addr_flags = regs.ps_input_addr;
|
||||
@ -143,10 +172,10 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Compute: {
|
||||
const auto& cs_pgm = regs.cs_program;
|
||||
case Stage::Compute: {
|
||||
const auto& cs_pgm = liverpool->GetCsRegs();
|
||||
info.num_user_data = cs_pgm.settings.num_user_regs;
|
||||
info.num_allocated_vgprs = regs.cs_program.settings.num_vgprs * 4;
|
||||
info.num_allocated_vgprs = cs_pgm.settings.num_vgprs * 4;
|
||||
info.cs_info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
|
||||
cs_pgm.num_thread_z.full};
|
||||
info.cs_info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1),
|
||||
@ -172,6 +201,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
||||
.support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32),
|
||||
.support_explicit_workgroup_layout = true,
|
||||
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
|
||||
.supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(),
|
||||
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
|
||||
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
|
||||
};
|
||||
@ -276,6 +306,11 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||
key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard);
|
||||
key.vertex_buffer_formats.fill(vk::Format::eUndefined);
|
||||
|
||||
key.patch_control_points = 0;
|
||||
if (regs.stage_enable.hs_en.Value()) {
|
||||
key.patch_control_points = regs.ls_hs_config.hs_input_control_points.Value();
|
||||
}
|
||||
|
||||
// First pass of bindings check to idenitfy formats and swizzles and pass them to rhe shader
|
||||
// recompiler.
|
||||
for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
|
||||
@ -304,7 +339,7 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||
fetch_shader = std::nullopt;
|
||||
|
||||
Shader::Backend::Bindings binding{};
|
||||
const auto& TryBindStageRemap = [&](Shader::Stage stage_in, Shader::Stage stage_out) -> bool {
|
||||
const auto& TryBindStage = [&](Shader::Stage stage_in, Shader::LogicalStage stage_out) -> bool {
|
||||
const auto stage_in_idx = static_cast<u32>(stage_in);
|
||||
const auto stage_out_idx = static_cast<u32>(stage_out);
|
||||
if (!regs.stage_enable.IsStageEnabled(stage_in_idx)) {
|
||||
@ -331,23 +366,23 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||
auto params = Liverpool::GetParams(*pgm);
|
||||
std::optional<Shader::Gcn::FetchShaderData> fetch_shader_;
|
||||
std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_,
|
||||
key.stage_hashes[stage_out_idx]) = GetProgram(stage_in, params, binding);
|
||||
key.stage_hashes[stage_out_idx]) =
|
||||
GetProgram(stage_in, stage_out, params, binding);
|
||||
if (fetch_shader_) {
|
||||
fetch_shader = fetch_shader_;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
const auto& TryBindStage = [&](Shader::Stage stage) { return TryBindStageRemap(stage, stage); };
|
||||
|
||||
const auto& IsGsFeaturesSupported = [&]() -> bool {
|
||||
// These checks are temporary until all functionality is implemented.
|
||||
return !regs.vgt_gs_mode.onchip && !regs.vgt_strmout_config.raw;
|
||||
};
|
||||
|
||||
TryBindStage(Shader::Stage::Fragment);
|
||||
infos.fill(nullptr);
|
||||
TryBindStage(Stage::Fragment, LogicalStage::Fragment);
|
||||
|
||||
const auto* fs_info = infos[static_cast<u32>(Shader::Stage::Fragment)];
|
||||
const auto* fs_info = infos[static_cast<u32>(LogicalStage::Fragment)];
|
||||
key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u;
|
||||
|
||||
switch (regs.stage_enable.raw) {
|
||||
@ -355,22 +390,36 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||
if (!instance.IsGeometryStageSupported() || !IsGsFeaturesSupported()) {
|
||||
return false;
|
||||
}
|
||||
if (!TryBindStageRemap(Shader::Stage::Export, Shader::Stage::Vertex)) {
|
||||
if (!TryBindStage(Stage::Export, LogicalStage::Vertex)) {
|
||||
return false;
|
||||
}
|
||||
if (!TryBindStage(Shader::Stage::Geometry)) {
|
||||
if (!TryBindStage(Stage::Geometry, LogicalStage::Geometry)) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Liverpool::ShaderStageEnable::VgtStages::LsHs: {
|
||||
if (!instance.IsTessellationSupported()) {
|
||||
break;
|
||||
}
|
||||
if (!TryBindStage(Stage::Hull, LogicalStage::TessellationControl)) {
|
||||
return false;
|
||||
}
|
||||
if (!TryBindStage(Stage::Vertex, LogicalStage::TessellationEval)) {
|
||||
return false;
|
||||
}
|
||||
if (!TryBindStage(Stage::Local, LogicalStage::Vertex)) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
TryBindStage(Shader::Stage::Vertex);
|
||||
infos[static_cast<u32>(Shader::Stage::Geometry)] = nullptr;
|
||||
TryBindStage(Stage::Vertex, LogicalStage::Vertex);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const auto vs_info = infos[static_cast<u32>(Shader::Stage::Vertex)];
|
||||
const auto vs_info = infos[static_cast<u32>(Shader::LogicalStage::Vertex)];
|
||||
if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) {
|
||||
u32 vertex_binding = 0;
|
||||
for (const auto& attrib : fetch_shader->attributes) {
|
||||
@ -423,19 +472,18 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||
key.num_samples = num_samples;
|
||||
|
||||
return true;
|
||||
}
|
||||
} // namespace Vulkan
|
||||
|
||||
bool PipelineCache::RefreshComputeKey() {
|
||||
Shader::Backend::Bindings binding{};
|
||||
const auto* cs_pgm = &liverpool->regs.cs_program;
|
||||
const auto cs_params = Liverpool::GetParams(*cs_pgm);
|
||||
const auto& cs_pgm = liverpool->GetCsRegs();
|
||||
const auto cs_params = Liverpool::GetParams(cs_pgm);
|
||||
std::tie(infos[0], modules[0], fetch_shader, compute_key.value) =
|
||||
GetProgram(Shader::Stage::Compute, cs_params, binding);
|
||||
GetProgram(Shader::Stage::Compute, LogicalStage::Compute, cs_params, binding);
|
||||
return true;
|
||||
}
|
||||
|
||||
vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
|
||||
const Shader::RuntimeInfo& runtime_info,
|
||||
vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info,
|
||||
std::span<const u32> code, size_t perm_idx,
|
||||
Shader::Backend::Bindings& binding) {
|
||||
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash,
|
||||
@ -460,19 +508,19 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
|
||||
const auto name = fmt::format("{}_{:#018x}_{}", info.stage, info.pgm_hash, perm_idx);
|
||||
Vulkan::SetObjectName(instance.GetDevice(), module, name);
|
||||
if (Config::collectShadersForDebug()) {
|
||||
DebugState.CollectShader(name, module, spv, code, patch ? *patch : std::span<const u32>{},
|
||||
is_patched);
|
||||
DebugState.CollectShader(name, info.l_stage, module, spv, code,
|
||||
patch ? *patch : std::span<const u32>{}, is_patched);
|
||||
}
|
||||
return module;
|
||||
}
|
||||
|
||||
std::tuple<const Shader::Info*, vk::ShaderModule, std::optional<Shader::Gcn::FetchShaderData>, u64>
|
||||
PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
|
||||
Shader::Backend::Bindings& binding) {
|
||||
const auto runtime_info = BuildRuntimeInfo(stage);
|
||||
PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stage,
|
||||
Shader::ShaderParams params,
|
||||
Shader::Backend::Bindings& binding) {
|
||||
auto runtime_info = BuildRuntimeInfo(stage, l_stage);
|
||||
auto [it_pgm, new_program] = program_cache.try_emplace(params.hash);
|
||||
if (new_program) {
|
||||
it_pgm.value() = std::make_unique<Program>(stage, params);
|
||||
it_pgm.value() = std::make_unique<Program>(stage, l_stage, params);
|
||||
auto& program = it_pgm.value();
|
||||
auto start = binding;
|
||||
const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding);
|
||||
@ -481,6 +529,7 @@ PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
|
||||
return std::make_tuple(&program->info, module, spec.fetch_shader_data,
|
||||
HashCombine(params.hash, 0));
|
||||
}
|
||||
it_pgm.value()->info.user_data = params.user_data;
|
||||
|
||||
auto& program = it_pgm.value();
|
||||
auto& info = program->info;
|
||||
@ -491,7 +540,7 @@ PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
|
||||
|
||||
const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec);
|
||||
if (it == program->modules.end()) {
|
||||
auto new_info = Shader::Info(stage, params);
|
||||
auto new_info = Shader::Info(stage, l_stage, params);
|
||||
module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding);
|
||||
program->AddPermut(module, std::move(spec));
|
||||
} else {
|
||||
|
@ -34,11 +34,13 @@ struct Program {
|
||||
vk::ShaderModule module;
|
||||
Shader::StageSpecialization spec;
|
||||
};
|
||||
using ModuleList = boost::container::small_vector<Module, 8>;
|
||||
|
||||
Shader::Info info;
|
||||
boost::container::small_vector<Module, 8> modules;
|
||||
ModuleList modules;
|
||||
|
||||
explicit Program(Shader::Stage stage, Shader::ShaderParams params) : info{stage, params} {}
|
||||
explicit Program(Shader::Stage stage, Shader::LogicalStage l_stage, Shader::ShaderParams params)
|
||||
: info{stage, l_stage, params} {}
|
||||
|
||||
void AddPermut(vk::ShaderModule module, const Shader::StageSpecialization&& spec) {
|
||||
modules.emplace_back(module, std::move(spec));
|
||||
@ -55,10 +57,10 @@ public:
|
||||
|
||||
const ComputePipeline* GetComputePipeline();
|
||||
|
||||
std::tuple<const Shader::Info*, vk::ShaderModule, std::optional<Shader::Gcn::FetchShaderData>,
|
||||
u64>
|
||||
GetProgram(Shader::Stage stage, Shader::ShaderParams params,
|
||||
Shader::Backend::Bindings& binding);
|
||||
using Result = std::tuple<const Shader::Info*, vk::ShaderModule,
|
||||
std::optional<Shader::Gcn::FetchShaderData>, u64>;
|
||||
Result GetProgram(Shader::Stage stage, Shader::LogicalStage l_stage,
|
||||
Shader::ShaderParams params, Shader::Backend::Bindings& binding);
|
||||
|
||||
std::optional<vk::ShaderModule> ReplaceShader(vk::ShaderModule module,
|
||||
std::span<const u32> spv_code);
|
||||
@ -71,10 +73,10 @@ private:
|
||||
std::string_view ext);
|
||||
std::optional<std::vector<u32>> GetShaderPatch(u64 hash, Shader::Stage stage, size_t perm_idx,
|
||||
std::string_view ext);
|
||||
vk::ShaderModule CompileModule(Shader::Info& info, const Shader::RuntimeInfo& runtime_info,
|
||||
vk::ShaderModule CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info,
|
||||
std::span<const u32> code, size_t perm_idx,
|
||||
Shader::Backend::Bindings& binding);
|
||||
Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage);
|
||||
Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage, Shader::LogicalStage l_stage);
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
|
@ -14,9 +14,10 @@ class BufferCache;
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
static constexpr auto gp_stage_flags = vk::ShaderStageFlagBits::eVertex |
|
||||
vk::ShaderStageFlagBits::eGeometry |
|
||||
vk::ShaderStageFlagBits::eFragment;
|
||||
static constexpr auto gp_stage_flags =
|
||||
vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eTessellationControl |
|
||||
vk::ShaderStageFlagBits::eTessellationEvaluation | vk::ShaderStageFlagBits::eGeometry |
|
||||
vk::ShaderStageFlagBits::eFragment;
|
||||
|
||||
class Instance;
|
||||
class Scheduler;
|
||||
@ -37,6 +38,7 @@ public:
|
||||
}
|
||||
|
||||
auto GetStages() const {
|
||||
static_assert(static_cast<u32>(Shader::LogicalStage::Compute) == Shader::MaxStageTypes - 1);
|
||||
if (is_compute) {
|
||||
return std::span{stages.cend() - 1, stages.cend()};
|
||||
} else {
|
||||
@ -44,7 +46,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
const Shader::Info& GetStage(Shader::Stage stage) const noexcept {
|
||||
const Shader::Info& GetStage(Shader::LogicalStage stage) const noexcept {
|
||||
return *stages[u32(stage)];
|
||||
}
|
||||
|
||||
|
@ -14,6 +14,7 @@
|
||||
#endif
|
||||
|
||||
#include <vector>
|
||||
#include <fmt/ranges.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/config.h"
|
||||
#include "common/logging/log.h"
|
||||
@ -21,15 +22,6 @@
|
||||
#include "sdl_window.h"
|
||||
#include "video_core/renderer_vulkan/vk_platform.h"
|
||||
|
||||
#if VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL
|
||||
static vk::detail::DynamicLoader dl;
|
||||
#else
|
||||
extern "C" {
|
||||
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr(VkInstance instance,
|
||||
const char* pName);
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
static const char* const VALIDATION_LAYER_NAME = "VK_LAYER_KHRONOS_validation";
|
||||
@ -199,15 +191,57 @@ std::vector<const char*> GetInstanceExtensions(Frontend::WindowSystemType window
|
||||
return extensions;
|
||||
}
|
||||
|
||||
std::vector<const char*> GetInstanceLayers(bool enable_validation, bool enable_crash_diagnostic) {
|
||||
const auto [properties_result, properties] = vk::enumerateInstanceLayerProperties();
|
||||
if (properties_result != vk::Result::eSuccess || properties.empty()) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to query layer properties: {}",
|
||||
vk::to_string(properties_result));
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<const char*> layers;
|
||||
layers.reserve(2);
|
||||
|
||||
if (enable_validation) {
|
||||
layers.push_back(VALIDATION_LAYER_NAME);
|
||||
}
|
||||
if (enable_crash_diagnostic) {
|
||||
layers.push_back(CRASH_DIAGNOSTIC_LAYER_NAME);
|
||||
}
|
||||
|
||||
// Sanitize layer list
|
||||
std::erase_if(layers, [&](const char* layer) -> bool {
|
||||
const auto it = std::ranges::find_if(properties, [layer](const auto& prop) {
|
||||
return std::strcmp(layer, prop.layerName) == 0;
|
||||
});
|
||||
if (it == properties.end()) {
|
||||
LOG_ERROR(Render_Vulkan, "Requested layer {} is not available", layer);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
|
||||
return layers;
|
||||
}
|
||||
|
||||
vk::UniqueInstance CreateInstance(Frontend::WindowSystemType window_type, bool enable_validation,
|
||||
bool enable_crash_diagnostic) {
|
||||
LOG_INFO(Render_Vulkan, "Creating vulkan instance");
|
||||
|
||||
#if VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL
|
||||
auto vkGetInstanceProcAddr =
|
||||
dl.getProcAddress<PFN_vkGetInstanceProcAddr>("vkGetInstanceProcAddr");
|
||||
#ifdef __APPLE__
|
||||
// If the Vulkan loader exists in /usr/local/lib, give it priority. The Vulkan SDK
|
||||
// installs it here by default but it is not in the default library search path.
|
||||
// The loader has a clause to check for it, but at a lower priority than the bundled
|
||||
// libMoltenVK.dylib, so we need to handle it ourselves to give it priority.
|
||||
static const std::string usr_local_path = "/usr/local/lib/libvulkan.dylib";
|
||||
static vk::detail::DynamicLoader dl = std::filesystem::exists(usr_local_path)
|
||||
? vk::detail::DynamicLoader(usr_local_path)
|
||||
: vk::detail::DynamicLoader();
|
||||
#else
|
||||
static vk::detail::DynamicLoader dl;
|
||||
#endif
|
||||
VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr);
|
||||
VULKAN_HPP_DEFAULT_DISPATCHER.init(
|
||||
dl.getProcAddress<PFN_vkGetInstanceProcAddr>("vkGetInstanceProcAddr"));
|
||||
|
||||
const auto [available_version_result, available_version] =
|
||||
VULKAN_HPP_DEFAULT_DISPATCHER.vkEnumerateInstanceVersion
|
||||
@ -230,38 +264,25 @@ vk::UniqueInstance CreateInstance(Frontend::WindowSystemType window_type, bool e
|
||||
.apiVersion = available_version,
|
||||
};
|
||||
|
||||
u32 num_layers = 0;
|
||||
std::array<const char*, 2> layers;
|
||||
const auto layers = GetInstanceLayers(enable_validation, enable_crash_diagnostic);
|
||||
|
||||
vk::Bool32 enable_force_barriers = vk::False;
|
||||
const char* log_path{};
|
||||
const std::string extensions_string = fmt::format("{}", fmt::join(extensions, ", "));
|
||||
const std::string layers_string = fmt::format("{}", fmt::join(layers, ", "));
|
||||
LOG_INFO(Render_Vulkan, "Enabled instance extensions: {}", extensions_string);
|
||||
LOG_INFO(Render_Vulkan, "Enabled instance layers: {}", layers_string);
|
||||
|
||||
#if VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL
|
||||
if (enable_validation) {
|
||||
layers[num_layers++] = VALIDATION_LAYER_NAME;
|
||||
}
|
||||
// Validation settings
|
||||
vk::Bool32 enable_sync = Config::vkValidationSyncEnabled() ? vk::True : vk::False;
|
||||
vk::Bool32 enable_gpuav = Config::vkValidationSyncEnabled() ? vk::True : vk::False;
|
||||
const char* gpuav_mode =
|
||||
Config::vkValidationGpuEnabled() ? "GPU_BASED_GPU_ASSISTED" : "GPU_BASED_NONE";
|
||||
|
||||
if (enable_crash_diagnostic) {
|
||||
layers[num_layers++] = CRASH_DIAGNOSTIC_LAYER_NAME;
|
||||
static const auto crash_diagnostic_path =
|
||||
Common::FS::GetUserPathString(Common::FS::PathType::LogDir);
|
||||
log_path = crash_diagnostic_path.c_str();
|
||||
enable_force_barriers = vk::True;
|
||||
}
|
||||
#else
|
||||
if (enable_validation || enable_crash_diagnostic) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Skipping loading Vulkan layers as dynamic loading is not enabled.");
|
||||
}
|
||||
#endif
|
||||
// Crash diagnostics settings
|
||||
static const auto crash_diagnostic_path =
|
||||
Common::FS::GetUserPathString(Common::FS::PathType::LogDir);
|
||||
const char* log_path = crash_diagnostic_path.c_str();
|
||||
vk::Bool32 enable_force_barriers = vk::True;
|
||||
|
||||
vk::Bool32 enable_sync =
|
||||
enable_validation && Config::vkValidationSyncEnabled() ? vk::True : vk::False;
|
||||
vk::Bool32 enable_gpuav =
|
||||
enable_validation && Config::vkValidationSyncEnabled() ? vk::True : vk::False;
|
||||
const char* gpuav_mode = enable_validation && Config::vkValidationGpuEnabled()
|
||||
? "GPU_BASED_GPU_ASSISTED"
|
||||
: "GPU_BASED_NONE";
|
||||
const std::array layer_setings = {
|
||||
vk::LayerSettingEXT{
|
||||
.pLayerName = VALIDATION_LAYER_NAME,
|
||||
@ -331,7 +352,7 @@ vk::UniqueInstance CreateInstance(Frontend::WindowSystemType window_type, bool e
|
||||
vk::StructureChain<vk::InstanceCreateInfo, vk::LayerSettingsCreateInfoEXT> instance_ci_chain = {
|
||||
vk::InstanceCreateInfo{
|
||||
.pApplicationInfo = &application_info,
|
||||
.enabledLayerCount = num_layers,
|
||||
.enabledLayerCount = static_cast<u32>(layers.size()),
|
||||
.ppEnabledLayerNames = layers.data(),
|
||||
.enabledExtensionCount = static_cast<u32>(extensions.size()),
|
||||
.ppEnabledExtensionNames = extensions.data(),
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include "common/config.h"
|
||||
#include "common/debug.h"
|
||||
#include "core/memory.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
@ -48,10 +49,6 @@ void Rasterizer::CpSync() {
|
||||
|
||||
bool Rasterizer::FilterDraw() {
|
||||
const auto& regs = liverpool->regs;
|
||||
// Tessellation is unsupported so skip the draw to avoid locking up the driver.
|
||||
if (regs.primitive_type == AmdGpu::PrimitiveType::PatchPrimitive) {
|
||||
return false;
|
||||
}
|
||||
// There are several cases (e.g. FCE, FMask/HTile decompression) where we don't need to do an
|
||||
// actual draw hence can skip pipeline creation.
|
||||
if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::EliminateFastClear) {
|
||||
@ -214,7 +211,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
|
||||
const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex);
|
||||
const auto& fetch_shader = pipeline->GetFetchShader();
|
||||
buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
|
||||
const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset);
|
||||
@ -271,7 +268,7 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
|
||||
return;
|
||||
}
|
||||
|
||||
const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
|
||||
const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex);
|
||||
const auto& fetch_shader = pipeline->GetFetchShader();
|
||||
buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
|
||||
buffer_cache.BindIndexBuffer(is_indexed, 0);
|
||||
@ -320,14 +317,14 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
|
||||
void Rasterizer::DispatchDirect() {
|
||||
RENDERER_TRACE;
|
||||
|
||||
const auto& cs_program = liverpool->regs.cs_program;
|
||||
const auto& cs_program = liverpool->GetCsRegs();
|
||||
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
|
||||
if (!pipeline) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto& cs = pipeline->GetStage(Shader::Stage::Compute);
|
||||
if (ExecuteShaderHLE(cs, liverpool->regs, *this)) {
|
||||
const auto& cs = pipeline->GetStage(Shader::LogicalStage::Compute);
|
||||
if (ExecuteShaderHLE(cs, liverpool->regs, cs_program, *this)) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -347,7 +344,7 @@ void Rasterizer::DispatchDirect() {
|
||||
void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) {
|
||||
RENDERER_TRACE;
|
||||
|
||||
const auto& cs_program = liverpool->regs.cs_program;
|
||||
const auto& cs_program = liverpool->GetCsRegs();
|
||||
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
|
||||
if (!pipeline) {
|
||||
return;
|
||||
@ -387,7 +384,7 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
||||
const auto& regs = liverpool->regs;
|
||||
|
||||
if (pipeline->IsCompute()) {
|
||||
const auto& info = pipeline->GetStage(Shader::Stage::Compute);
|
||||
const auto& info = pipeline->GetStage(Shader::LogicalStage::Compute);
|
||||
|
||||
// Most of the time when a metadata is updated with a shader it gets cleared. It means
|
||||
// we can skip the whole dispatch and update the tracked state instead. Also, it is not
|
||||
|
@ -2,17 +2,19 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_hle.h"
|
||||
|
||||
#include "vk_rasterizer.h"
|
||||
extern std::unique_ptr<AmdGpu::Liverpool> liverpool;
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
static constexpr u64 COPY_SHADER_HASH = 0xfefebf9f;
|
||||
|
||||
bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs,
|
||||
Rasterizer& rasterizer) {
|
||||
static bool ExecuteCopyShaderHLE(const Shader::Info& info,
|
||||
const AmdGpu::Liverpool::ComputeProgram& cs_program,
|
||||
Rasterizer& rasterizer) {
|
||||
auto& scheduler = rasterizer.GetScheduler();
|
||||
auto& buffer_cache = rasterizer.GetBufferCache();
|
||||
|
||||
@ -34,9 +36,9 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg
|
||||
|
||||
static std::vector<vk::BufferCopy> copies;
|
||||
copies.clear();
|
||||
copies.reserve(regs.cs_program.dim_x);
|
||||
copies.reserve(cs_program.dim_x);
|
||||
|
||||
for (u32 i = 0; i < regs.cs_program.dim_x; i++) {
|
||||
for (u32 i = 0; i < cs_program.dim_x; i++) {
|
||||
const auto& [dst_idx, src_idx, end] = ctl_buf[i];
|
||||
const u32 local_dst_offset = dst_idx * buf_stride;
|
||||
const u32 local_src_offset = src_idx * buf_stride;
|
||||
@ -122,10 +124,10 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg
|
||||
}
|
||||
|
||||
bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs,
|
||||
Rasterizer& rasterizer) {
|
||||
const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer) {
|
||||
switch (info.pgm_hash) {
|
||||
case COPY_SHADER_HASH:
|
||||
return ExecuteCopyShaderHLE(info, regs, rasterizer);
|
||||
return ExecuteCopyShaderHLE(info, cs_program, rasterizer);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
@ -15,6 +15,6 @@ class Rasterizer;
|
||||
|
||||
/// Attempts to execute a shader using HLE if possible.
|
||||
bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs,
|
||||
Rasterizer& rasterizer);
|
||||
const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer);
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -131,7 +131,8 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
|
||||
format = image.info.pixel_format;
|
||||
aspect = vk::ImageAspectFlagBits::eDepth;
|
||||
}
|
||||
if (image.aspect_mask & vk::ImageAspectFlagBits::eStencil && format == vk::Format::eR8Uint) {
|
||||
if (image.aspect_mask & vk::ImageAspectFlagBits::eStencil &&
|
||||
(format == vk::Format::eR8Uint || format == vk::Format::eR8Unorm)) {
|
||||
format = image.info.pixel_format;
|
||||
aspect = vk::ImageAspectFlagBits::eStencil;
|
||||
}
|
||||
@ -141,8 +142,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
|
||||
.image = image.image,
|
||||
.viewType = info.type,
|
||||
.format = instance.GetSupportedFormat(format, image.format_features),
|
||||
.components =
|
||||
instance.GetSupportedComponentSwizzle(format, info.mapping, image.format_features),
|
||||
.components = info.mapping,
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect,
|
||||
.baseMipLevel = info.range.base.level,
|
||||
|
@ -172,6 +172,7 @@ void ConvertTileToLinear(u8* dst, const u8* src, u32 width, u32 height, bool is_
|
||||
|
||||
vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
||||
switch (format) {
|
||||
case vk::Format::eR8Uint:
|
||||
case vk::Format::eR8Unorm:
|
||||
return vk::Format::eR8Uint;
|
||||
case vk::Format::eR4G4B4A4UnormPack16:
|
||||
@ -212,6 +213,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
||||
case vk::Format::eBc7SrgbBlock:
|
||||
case vk::Format::eBc7UnormBlock:
|
||||
case vk::Format::eBc6HUfloatBlock:
|
||||
case vk::Format::eR32G32B32A32Uint:
|
||||
case vk::Format::eR32G32B32A32Sfloat:
|
||||
return vk::Format::eR32G32B32A32Uint;
|
||||
default:
|
||||
|
Loading…
Reference in New Issue
Block a user