Merge branch 'shadps4-emu:main' into allocate-fixes

This commit is contained in:
Stephen Miller 2025-01-11 11:23:28 -06:00 committed by GitHub
commit 6bd4c6b02f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
105 changed files with 21471 additions and 19128 deletions

View File

@ -14,14 +14,14 @@ env:
jobs: jobs:
reuse: reuse:
runs-on: ubuntu-latest runs-on: ubuntu-24.04
continue-on-error: true continue-on-error: true
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: fsfe/reuse-action@v5 - uses: fsfe/reuse-action@v5
clang-format: clang-format:
runs-on: ubuntu-latest runs-on: ubuntu-24.04
continue-on-error: true continue-on-error: true
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@ -39,7 +39,7 @@ jobs:
run: ./.ci/clang-format.sh run: ./.ci/clang-format.sh
get-info: get-info:
runs-on: ubuntu-latest runs-on: ubuntu-24.04
outputs: outputs:
date: ${{ steps.vars.outputs.date }} date: ${{ steps.vars.outputs.date }}
shorthash: ${{ steps.vars.outputs.shorthash }} shorthash: ${{ steps.vars.outputs.shorthash }}
@ -57,7 +57,7 @@ jobs:
echo "fullhash=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT echo "fullhash=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT
windows-sdl: windows-sdl:
runs-on: windows-latest runs-on: windows-2025
needs: get-info needs: get-info
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@ -101,7 +101,7 @@ jobs:
path: ${{github.workspace}}/build/shadPS4.exe path: ${{github.workspace}}/build/shadPS4.exe
windows-qt: windows-qt:
runs-on: windows-latest runs-on: windows-2025
needs: get-info needs: get-info
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@ -376,6 +376,78 @@ jobs:
name: shadps4-linux-qt-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }} name: shadps4-linux-qt-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}
path: Shadps4-qt.AppImage path: Shadps4-qt.AppImage
linux-sdl-gcc:
runs-on: ubuntu-24.04
needs: get-info
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Install dependencies
run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 gcc-14 build-essential libasound2-dev libpulse-dev libopenal-dev libudev-dev
- name: Cache CMake Configuration
uses: actions/cache@v4
env:
cache-name: ${{ runner.os }}-sdl-cache-cmake-configuration
with:
path: |
${{github.workspace}}/build
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
restore-keys: |
${{ env.cache-name }}-
- name: Cache CMake Build
uses: hendrikmuhs/ccache-action@v1.2.14
env:
cache-name: ${{ runner.os }}-sdl-cache-cmake-build
with:
append-timestamp: false
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
- name: Configure CMake
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=gcc-14 -DCMAKE_CXX_COMPILER=g++-14 -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc)
linux-qt-gcc:
runs-on: ubuntu-24.04
needs: get-info
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Install dependencies
run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 gcc-14 build-essential qt6-base-dev qt6-tools-dev qt6-multimedia-dev libasound2-dev libpulse-dev libopenal-dev libudev-dev
- name: Cache CMake Configuration
uses: actions/cache@v4
env:
cache-name: ${{ runner.os }}-qt-cache-cmake-configuration
with:
path: |
${{github.workspace}}/build
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
restore-keys: |
${{ env.cache-name }}-
- name: Cache CMake Build
uses: hendrikmuhs/ccache-action@v1.2.14
env:
cache-name: ${{ runner.os }}-qt-cache-cmake-build
with:
append-timestamp: false
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
- name: Configure CMake
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=gcc-14 -DCMAKE_CXX_COMPILER=g++-14 -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc)
pre-release: pre-release:
if: github.ref == 'refs/heads/main' && github.repository == 'shadps4-emu/shadPS4' && github.event_name == 'push' if: github.ref == 'refs/heads/main' && github.repository == 'shadps4-emu/shadPS4' && github.event_name == 'push'
needs: [get-info, windows-sdl, windows-qt, macos-sdl, macos-qt, linux-sdl, linux-qt] needs: [get-info, windows-sdl, windows-qt, macos-sdl, macos-qt, linux-sdl, linux-qt]

View File

@ -336,6 +336,8 @@ set(SYSTEM_LIBS src/core/libraries/system/commondialog.cpp
src/core/libraries/share_play/shareplay.h src/core/libraries/share_play/shareplay.h
src/core/libraries/razor_cpu/razor_cpu.cpp src/core/libraries/razor_cpu/razor_cpu.cpp
src/core/libraries/razor_cpu/razor_cpu.h src/core/libraries/razor_cpu/razor_cpu.h
src/core/libraries/mouse/mouse.cpp
src/core/libraries/mouse/mouse.h
) )
set(VIDEOOUT_LIB src/core/libraries/videoout/buffer.h set(VIDEOOUT_LIB src/core/libraries/videoout/buffer.h
@ -413,7 +415,9 @@ set(VDEC_LIB src/core/libraries/videodec/videodec2_impl.cpp
src/core/libraries/videodec/videodec_impl.h src/core/libraries/videodec/videodec_impl.h
) )
set(NP_LIBS src/core/libraries/np_manager/np_manager.cpp set(NP_LIBS src/core/libraries/np_common/np_common.cpp
src/core/libraries/np_common/np_common.h
src/core/libraries/np_manager/np_manager.cpp
src/core/libraries/np_manager/np_manager.h src/core/libraries/np_manager/np_manager.h
src/core/libraries/np_score/np_score.cpp src/core/libraries/np_score/np_score.cpp
src/core/libraries/np_score/np_score.h src/core/libraries/np_score/np_score.h
@ -1040,7 +1044,6 @@ install(TARGETS shadps4 BUNDLE DESTINATION .)
if (ENABLE_QT_GUI AND CMAKE_SYSTEM_NAME STREQUAL "Linux") if (ENABLE_QT_GUI AND CMAKE_SYSTEM_NAME STREQUAL "Linux")
install(FILES "dist/net.shadps4.shadPS4.desktop" DESTINATION "share/applications") install(FILES "dist/net.shadps4.shadPS4.desktop" DESTINATION "share/applications")
install(FILES "dist/net.shadps4.shadPS4.releases.xml" DESTINATION "share/metainfo/releases")
install(FILES "dist/net.shadps4.shadPS4.metainfo.xml" DESTINATION "share/metainfo") install(FILES "dist/net.shadps4.shadPS4.metainfo.xml" DESTINATION "share/metainfo")
install(FILES ".github/shadps4.png" DESTINATION "share/icons/hicolor/512x512/apps" RENAME "net.shadps4.shadPS4.png") install(FILES ".github/shadps4.png" DESTINATION "share/icons/hicolor/512x512/apps" RENAME "net.shadps4.shadPS4.png")
install(FILES "src/images/net.shadps4.shadPS4.svg" DESTINATION "share/icons/hicolor/scalable/apps") install(FILES "src/images/net.shadps4.shadPS4.svg" DESTINATION "share/icons/hicolor/scalable/apps")

View File

@ -11,7 +11,6 @@ path = [
"dist/net.shadps4.shadPS4.desktop", "dist/net.shadps4.shadPS4.desktop",
"dist/net.shadps4.shadPS4_metadata.pot", "dist/net.shadps4.shadPS4_metadata.pot",
"dist/net.shadps4.shadPS4.metainfo.xml", "dist/net.shadps4.shadPS4.metainfo.xml",
"dist/net.shadps4.shadPS4.releases.xml",
"documents/changelog.md", "documents/changelog.md",
"documents/Quickstart/2.png", "documents/Quickstart/2.png",
"documents/Screenshots/*", "documents/Screenshots/*",

View File

@ -36,9 +36,30 @@
<categories> <categories>
<category translate="no">Game</category> <category translate="no">Game</category>
</categories> </categories>
<releases type="external" url="https://cdn.jsdelivr.net/gh/fpiesche/flatpak-builds/apps/net.shadps4.shadPS4/net.shadps4.shadPS4.releases.xml"> <releases>
<release version="v.0.4.0" date="2024-11-03"> <release version="0.5.0" date="2024-12-25">
<description></description> <url>https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.5.0</url>
</release>
<release version="0.4.0" date="2024-10-31">
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.4.0</url>
</release>
<release version="0.3.0" date="2024-09-23">
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.3.0</url>
</release>
<release version="0.2.0" date="2024-08-15">
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.2.0</url>
</release>
<release version="0.1.0" date="2024-07-01">
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/0.1.0</url>
</release>
<release version="0.0.3" date="2024-03-23">
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v0.0.3</url>
</release>
<release version="0.0.2" date="2023-10-21">
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v0.0.2</url>
</release>
<release version="0.0.1" date="2024-09-29">
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v0.0.1</url>
</release> </release>
</releases> </releases>
<content_rating type="oars-1.1"/> <content_rating type="oars-1.1"/>

View File

@ -1,23 +0,0 @@
<releases>
<release version="0.4.0" date="2024-10-31">
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.4.0</url>
</release>
<release version="0.3.0" date="2024-09-23">
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.3.0</url>
</release>
<release version="0.2.0" date="2024-08-15">
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.2.0</url>
</release>
<release version="0.1.0" date="2024-07-01">
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/0.1.0</url>
</release>
<release version="0.0.3" date="2024-03-23">
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v0.0.3</url>
</release>
<release version="0.0.2" date="2023-10-21">
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v0.0.2</url>
</release>
<release version="0.0.1" date="2024-09-29">
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v0.0.1</url>
</release>
</releases>

View File

@ -213,9 +213,7 @@ endif()
# Discord RPC # Discord RPC
if (ENABLE_DISCORD_RPC) if (ENABLE_DISCORD_RPC)
set(BUILD_EXAMPLES OFF)
add_subdirectory(discord-rpc) add_subdirectory(discord-rpc)
target_include_directories(discord-rpc INTERFACE discord-rpc/include)
endif() endif()
# GCN Headers # GCN Headers

@ -1 +1 @@
Subproject commit 4ec218155d73bcb8022f8f7ca72305d801f84beb Subproject commit 51b09d426a4a1bcfa6ee6d4894e57d669f4a2e65

2
externals/sirit vendored

@ -1 +1 @@
Subproject commit 1e74f4ef8d2a0e3221a4de51977663f342b53c35 Subproject commit 26ad5a9d0fe13260b0d7d6c64419d01a196b2e32

View File

@ -33,6 +33,7 @@ namespace Config {
static bool isNeo = false; static bool isNeo = false;
static bool isFullscreen = false; static bool isFullscreen = false;
static std::string fullscreenMode = "borderless";
static bool playBGM = false; static bool playBGM = false;
static bool isTrophyPopupDisabled = false; static bool isTrophyPopupDisabled = false;
static int BGMvolume = 50; static int BGMvolume = 50;
@ -47,6 +48,7 @@ static std::string updateChannel;
static std::string backButtonBehavior = "left"; static std::string backButtonBehavior = "left";
static bool useSpecialPad = false; static bool useSpecialPad = false;
static int specialPadClass = 1; static int specialPadClass = 1;
static bool isMotionControlsEnabled = true;
static bool isDebugDump = false; static bool isDebugDump = false;
static bool isShaderDebug = false; static bool isShaderDebug = false;
static bool isShowSplash = false; static bool isShowSplash = false;
@ -104,10 +106,14 @@ bool isNeoModeConsole() {
return isNeo; return isNeo;
} }
bool isFullscreenMode() { bool getIsFullscreen() {
return isFullscreen; return isFullscreen;
} }
std::string getFullscreenMode() {
return fullscreenMode;
}
bool getisTrophyPopupDisabled() { bool getisTrophyPopupDisabled() {
return isTrophyPopupDisabled; return isTrophyPopupDisabled;
} }
@ -172,6 +178,10 @@ int getSpecialPadClass() {
return specialPadClass; return specialPadClass;
} }
bool getIsMotionControlsEnabled() {
return isMotionControlsEnabled;
}
bool debugDump() { bool debugDump() {
return isDebugDump; return isDebugDump;
} }
@ -304,10 +314,14 @@ void setVblankDiv(u32 value) {
vblankDivider = value; vblankDivider = value;
} }
void setFullscreenMode(bool enable) { void setIsFullscreen(bool enable) {
isFullscreen = enable; isFullscreen = enable;
} }
void setFullscreenMode(std::string mode) {
fullscreenMode = mode;
}
void setisTrophyPopupDisabled(bool disable) { void setisTrophyPopupDisabled(bool disable) {
isTrophyPopupDisabled = disable; isTrophyPopupDisabled = disable;
} }
@ -368,6 +382,10 @@ void setSpecialPadClass(int type) {
specialPadClass = type; specialPadClass = type;
} }
void setIsMotionControlsEnabled(bool use) {
isMotionControlsEnabled = use;
}
void setSeparateUpdateEnabled(bool use) { void setSeparateUpdateEnabled(bool use) {
separateupdatefolder = use; separateupdatefolder = use;
} }
@ -566,6 +584,7 @@ void load(const std::filesystem::path& path) {
isNeo = toml::find_or<bool>(general, "isPS4Pro", false); isNeo = toml::find_or<bool>(general, "isPS4Pro", false);
isFullscreen = toml::find_or<bool>(general, "Fullscreen", false); isFullscreen = toml::find_or<bool>(general, "Fullscreen", false);
fullscreenMode = toml::find_or<std::string>(general, "FullscreenMode", "borderless");
playBGM = toml::find_or<bool>(general, "playBGM", false); playBGM = toml::find_or<bool>(general, "playBGM", false);
isTrophyPopupDisabled = toml::find_or<bool>(general, "isTrophyPopupDisabled", false); isTrophyPopupDisabled = toml::find_or<bool>(general, "isTrophyPopupDisabled", false);
BGMvolume = toml::find_or<int>(general, "BGMvolume", 50); BGMvolume = toml::find_or<int>(general, "BGMvolume", 50);
@ -594,6 +613,7 @@ void load(const std::filesystem::path& path) {
backButtonBehavior = toml::find_or<std::string>(input, "backButtonBehavior", "left"); backButtonBehavior = toml::find_or<std::string>(input, "backButtonBehavior", "left");
useSpecialPad = toml::find_or<bool>(input, "useSpecialPad", false); useSpecialPad = toml::find_or<bool>(input, "useSpecialPad", false);
specialPadClass = toml::find_or<int>(input, "specialPadClass", 1); specialPadClass = toml::find_or<int>(input, "specialPadClass", 1);
isMotionControlsEnabled = toml::find_or<bool>(input, "isMotionControlsEnabled", true);
} }
if (data.contains("GPU")) { if (data.contains("GPU")) {
@ -691,6 +711,7 @@ void save(const std::filesystem::path& path) {
data["General"]["isPS4Pro"] = isNeo; data["General"]["isPS4Pro"] = isNeo;
data["General"]["Fullscreen"] = isFullscreen; data["General"]["Fullscreen"] = isFullscreen;
data["General"]["FullscreenMode"] = fullscreenMode;
data["General"]["isTrophyPopupDisabled"] = isTrophyPopupDisabled; data["General"]["isTrophyPopupDisabled"] = isTrophyPopupDisabled;
data["General"]["playBGM"] = playBGM; data["General"]["playBGM"] = playBGM;
data["General"]["BGMvolume"] = BGMvolume; data["General"]["BGMvolume"] = BGMvolume;
@ -709,6 +730,7 @@ void save(const std::filesystem::path& path) {
data["Input"]["backButtonBehavior"] = backButtonBehavior; data["Input"]["backButtonBehavior"] = backButtonBehavior;
data["Input"]["useSpecialPad"] = useSpecialPad; data["Input"]["useSpecialPad"] = useSpecialPad;
data["Input"]["specialPadClass"] = specialPadClass; data["Input"]["specialPadClass"] = specialPadClass;
data["Input"]["isMotionControlsEnabled"] = isMotionControlsEnabled;
data["GPU"]["screenWidth"] = screenWidth; data["GPU"]["screenWidth"] = screenWidth;
data["GPU"]["screenHeight"] = screenHeight; data["GPU"]["screenHeight"] = screenHeight;
data["GPU"]["nullGpu"] = isNullGpu; data["GPU"]["nullGpu"] = isNullGpu;

View File

@ -17,9 +17,9 @@ void saveMainWindow(const std::filesystem::path& path);
std::string getTrophyKey(); std::string getTrophyKey();
void setTrophyKey(std::string key); void setTrophyKey(std::string key);
bool getIsFullscreen();
std::string getFullscreenMode();
bool isNeoModeConsole(); bool isNeoModeConsole();
bool isFullscreenMode();
bool getPlayBGM(); bool getPlayBGM();
int getBGMvolume(); int getBGMvolume();
bool getisTrophyPopupDisabled(); bool getisTrophyPopupDisabled();
@ -38,6 +38,7 @@ int getCursorHideTimeout();
std::string getBackButtonBehavior(); std::string getBackButtonBehavior();
bool getUseSpecialPad(); bool getUseSpecialPad();
int getSpecialPadClass(); int getSpecialPadClass();
bool getIsMotionControlsEnabled();
u32 getScreenWidth(); u32 getScreenWidth();
u32 getScreenHeight(); u32 getScreenHeight();
@ -65,7 +66,8 @@ void setVblankDiv(u32 value);
void setGpuId(s32 selectedGpuId); void setGpuId(s32 selectedGpuId);
void setScreenWidth(u32 width); void setScreenWidth(u32 width);
void setScreenHeight(u32 height); void setScreenHeight(u32 height);
void setFullscreenMode(bool enable); void setIsFullscreen(bool enable);
void setFullscreenMode(std::string mode);
void setisTrophyPopupDisabled(bool disable); void setisTrophyPopupDisabled(bool disable);
void setPlayBGM(bool enable); void setPlayBGM(bool enable);
void setBGMvolume(int volume); void setBGMvolume(int volume);
@ -84,6 +86,7 @@ void setCursorHideTimeout(int newcursorHideTimeout);
void setBackButtonBehavior(const std::string& type); void setBackButtonBehavior(const std::string& type);
void setUseSpecialPad(bool use); void setUseSpecialPad(bool use);
void setSpecialPadClass(int type); void setSpecialPadClass(int type);
void setIsMotionControlsEnabled(bool use);
void setLogType(const std::string& type); void setLogType(const std::string& type);
void setLogFilter(const std::string& type); void setLogFilter(const std::string& type);
@ -139,4 +142,4 @@ void setDefaultValues();
// settings // settings
u32 GetLanguage(); u32 GetLanguage();
}; // namespace Config }; // namespace Config

View File

@ -111,7 +111,7 @@ public:
return raw_firmware_ver; return raw_firmware_ver;
} }
[[nodiscard]] const PSFAttributes& PSFAttributes() const { [[nodiscard]] const PSFAttributes& GetPSFAttributes() const {
ASSERT(initialized); ASSERT(initialized);
return psf_attributes; return psf_attributes;
} }

View File

@ -98,6 +98,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) {
SUB(Lib, Ssl) \ SUB(Lib, Ssl) \
SUB(Lib, SysModule) \ SUB(Lib, SysModule) \
SUB(Lib, Move) \ SUB(Lib, Move) \
SUB(Lib, NpCommon) \
SUB(Lib, NpManager) \ SUB(Lib, NpManager) \
SUB(Lib, NpScore) \ SUB(Lib, NpScore) \
SUB(Lib, NpTrophy) \ SUB(Lib, NpTrophy) \
@ -126,6 +127,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) {
SUB(Lib, Vdec2) \ SUB(Lib, Vdec2) \
SUB(Lib, Videodec) \ SUB(Lib, Videodec) \
SUB(Lib, RazorCpu) \ SUB(Lib, RazorCpu) \
SUB(Lib, Mouse) \
CLS(Frontend) \ CLS(Frontend) \
CLS(Render) \ CLS(Render) \
SUB(Render, Vulkan) \ SUB(Render, Vulkan) \

View File

@ -65,6 +65,7 @@ enum class Class : u8 {
Lib_Ssl, ///< The LibSceSsl implementation. Lib_Ssl, ///< The LibSceSsl implementation.
Lib_Http, ///< The LibSceHttp implementation. Lib_Http, ///< The LibSceHttp implementation.
Lib_SysModule, ///< The LibSceSysModule implementation Lib_SysModule, ///< The LibSceSysModule implementation
Lib_NpCommon, ///< The LibSceNpCommon implementation
Lib_NpManager, ///< The LibSceNpManager implementation Lib_NpManager, ///< The LibSceNpManager implementation
Lib_NpScore, ///< The LibSceNpScore implementation Lib_NpScore, ///< The LibSceNpScore implementation
Lib_NpTrophy, ///< The LibSceNpTrophy implementation Lib_NpTrophy, ///< The LibSceNpTrophy implementation
@ -93,6 +94,7 @@ enum class Class : u8 {
Lib_Vdec2, ///< The LibSceVideodec2 implementation. Lib_Vdec2, ///< The LibSceVideodec2 implementation.
Lib_Videodec, ///< The LibSceVideodec implementation. Lib_Videodec, ///< The LibSceVideodec implementation.
Lib_RazorCpu, ///< The LibRazorCpu implementation. Lib_RazorCpu, ///< The LibRazorCpu implementation.
Lib_Mouse, ///< The LibSceMouse implementation
Frontend, ///< Emulator UI Frontend, ///< Emulator UI
Render, ///< Video Core Render, ///< Video Core
Render_Vulkan, ///< Vulkan backend Render_Vulkan, ///< Vulkan backend

View File

@ -40,7 +40,8 @@ void MntPoints::UnmountAll() {
m_mnt_pairs.clear(); m_mnt_pairs.clear();
} }
std::filesystem::path MntPoints::GetHostPath(std::string_view path, bool* is_read_only) { std::filesystem::path MntPoints::GetHostPath(std::string_view path, bool* is_read_only,
bool force_base_path) {
// Evil games like Turok2 pass double slashes e.g /app0//game.kpf // Evil games like Turok2 pass double slashes e.g /app0//game.kpf
std::string corrected_path(path); std::string corrected_path(path);
size_t pos = corrected_path.find("//"); size_t pos = corrected_path.find("//");
@ -72,7 +73,7 @@ std::filesystem::path MntPoints::GetHostPath(std::string_view path, bool* is_rea
patch_path /= rel_path; patch_path /= rel_path;
if ((corrected_path.starts_with("/app0") || corrected_path.starts_with("/hostapp")) && if ((corrected_path.starts_with("/app0") || corrected_path.starts_with("/hostapp")) &&
std::filesystem::exists(patch_path)) { !force_base_path && std::filesystem::exists(patch_path)) {
return patch_path; return patch_path;
} }
@ -132,8 +133,10 @@ std::filesystem::path MntPoints::GetHostPath(std::string_view path, bool* is_rea
return std::optional<std::filesystem::path>(current_path); return std::optional<std::filesystem::path>(current_path);
}; };
if (const auto path = search(patch_path)) { if (!force_base_path) {
return *path; if (const auto path = search(patch_path)) {
return *path;
}
} }
if (const auto path = search(host_path)) { if (const auto path = search(host_path)) {
return *path; return *path;
@ -144,6 +147,39 @@ std::filesystem::path MntPoints::GetHostPath(std::string_view path, bool* is_rea
return host_path; return host_path;
} }
// TODO: Does not handle mount points inside mount points.
void MntPoints::IterateDirectory(std::string_view guest_directory,
const IterateDirectoryCallback& callback) {
const auto base_path = GetHostPath(guest_directory, nullptr, true);
const auto patch_path = GetHostPath(guest_directory, nullptr, false);
// Only need to consider patch path if it exists and does not resolve to the same as base.
const auto apply_patch = base_path != patch_path && std::filesystem::exists(patch_path);
// Pass 1: Any files that existed in the base directory, using patch directory if needed.
if (std::filesystem::exists(base_path)) {
for (const auto& entry : std::filesystem::directory_iterator(base_path)) {
if (apply_patch) {
const auto patch_entry_path = patch_path / entry.path().filename();
if (std::filesystem::exists(patch_entry_path)) {
callback(patch_entry_path, !std::filesystem::is_directory(patch_entry_path));
continue;
}
}
callback(entry.path(), !entry.is_directory());
}
}
// Pass 2: Any files that exist only in the patch directory.
if (apply_patch) {
for (const auto& entry : std::filesystem::directory_iterator(patch_path)) {
const auto base_entry_path = base_path / entry.path().filename();
if (!std::filesystem::exists(base_entry_path)) {
callback(entry.path(), !entry.is_directory());
}
}
}
}
int HandleTable::CreateHandle() { int HandleTable::CreateHandle() {
std::scoped_lock lock{m_mutex}; std::scoped_lock lock{m_mutex};

View File

@ -36,7 +36,11 @@ public:
void UnmountAll(); void UnmountAll();
std::filesystem::path GetHostPath(std::string_view guest_directory, std::filesystem::path GetHostPath(std::string_view guest_directory,
bool* is_read_only = nullptr); bool* is_read_only = nullptr, bool force_base_path = false);
using IterateDirectoryCallback =
std::function<void(const std::filesystem::path& host_path, bool is_file)>;
void IterateDirectory(std::string_view guest_directory,
const IterateDirectoryCallback& callback);
const MntPair* GetMountFromHostPath(const std::string& host_path) { const MntPair* GetMountFromHostPath(const std::string& host_path) {
std::scoped_lock lock{m_mutex}; std::scoped_lock lock{m_mutex};

View File

@ -46,17 +46,6 @@ static std::map<std::string, FactoryDevice> available_device = {
namespace Libraries::Kernel { namespace Libraries::Kernel {
auto GetDirectoryEntries(const std::filesystem::path& path) {
std::vector<Core::FileSys::DirEntry> files;
for (const auto& entry : std::filesystem::directory_iterator(path)) {
auto& dir_entry = files.emplace_back();
dir_entry.name = entry.path().filename().string();
dir_entry.isFile = !std::filesystem::is_directory(entry.path().string());
}
return files;
}
int PS4_SYSV_ABI sceKernelOpen(const char* raw_path, int flags, u16 mode) { int PS4_SYSV_ABI sceKernelOpen(const char* raw_path, int flags, u16 mode) {
LOG_INFO(Kernel_Fs, "path = {} flags = {:#x} mode = {}", raw_path, flags, mode); LOG_INFO(Kernel_Fs, "path = {} flags = {:#x} mode = {}", raw_path, flags, mode);
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance(); auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
@ -115,7 +104,12 @@ int PS4_SYSV_ABI sceKernelOpen(const char* raw_path, int flags, u16 mode) {
if (create) { if (create) {
return handle; // dir already exists return handle; // dir already exists
} else { } else {
file->dirents = GetDirectoryEntries(file->m_host_name); mnt->IterateDirectory(file->m_guest_name,
[&file](const auto& ent_path, const auto ent_is_file) {
auto& dir_entry = file->dirents.emplace_back();
dir_entry.name = ent_path.filename().string();
dir_entry.isFile = ent_is_file;
});
file->dirents_index = 0; file->dirents_index = 0;
} }
} }
@ -695,66 +689,12 @@ static int GetDents(int fd, char* buf, int nbytes, s64* basep) {
return sizeof(OrbisKernelDirent); return sizeof(OrbisKernelDirent);
} }
static int HandleSeparateUpdateDents(int fd, char* buf, int nbytes, s64* basep) {
int dir_entries = 0;
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
auto* file = h->GetFile(fd);
auto update_dir_name = std::string{fmt::UTF(file->m_host_name.u8string()).data};
auto mount = mnt->GetMountFromHostPath(update_dir_name);
auto suffix = std::string{fmt::UTF(mount->host_path.u8string()).data};
size_t pos = update_dir_name.find("-UPDATE");
if (pos != std::string::npos) {
update_dir_name.erase(pos, 7);
auto guest_name = mount->mount + "/" + update_dir_name.substr(suffix.size() + 1);
int descriptor;
auto existent_folder = h->GetFile(update_dir_name);
if (!existent_folder) {
u32 handle = h->CreateHandle();
auto* new_file = h->GetFile(handle);
new_file->type = Core::FileSys::FileType::Directory;
new_file->m_guest_name = guest_name;
new_file->m_host_name = update_dir_name;
if (!std::filesystem::is_directory(new_file->m_host_name)) {
h->DeleteHandle(handle);
return dir_entries;
} else {
new_file->dirents = GetDirectoryEntries(new_file->m_host_name);
new_file->dirents_index = 0;
}
new_file->is_opened = true;
descriptor = h->GetFileDescriptor(new_file);
} else {
descriptor = h->GetFileDescriptor(existent_folder);
}
dir_entries = GetDents(descriptor, buf, nbytes, basep);
if (dir_entries == ORBIS_OK && existent_folder) {
existent_folder->dirents_index = 0;
file->dirents_index = 0;
}
}
return dir_entries;
}
int PS4_SYSV_ABI sceKernelGetdents(int fd, char* buf, int nbytes) { int PS4_SYSV_ABI sceKernelGetdents(int fd, char* buf, int nbytes) {
int a = GetDents(fd, buf, nbytes, nullptr); return GetDents(fd, buf, nbytes, nullptr);
if (a == ORBIS_OK) {
return HandleSeparateUpdateDents(fd, buf, nbytes, nullptr);
}
return a;
} }
int PS4_SYSV_ABI sceKernelGetdirentries(int fd, char* buf, int nbytes, s64* basep) { int PS4_SYSV_ABI sceKernelGetdirentries(int fd, char* buf, int nbytes, s64* basep) {
int a = GetDents(fd, buf, nbytes, basep); return GetDents(fd, buf, nbytes, basep);
if (a == ORBIS_OK) {
return HandleSeparateUpdateDents(fd, buf, nbytes, basep);
}
return a;
} }
s64 PS4_SYSV_ABI sceKernelPwrite(int d, void* buf, size_t nbytes, s64 offset) { s64 PS4_SYSV_ABI sceKernelPwrite(int d, void* buf, size_t nbytes, s64 offset) {

View File

@ -15,7 +15,7 @@ namespace Libraries::Kernel {
int PS4_SYSV_ABI sceKernelIsNeoMode() { int PS4_SYSV_ABI sceKernelIsNeoMode() {
LOG_DEBUG(Kernel_Sce, "called"); LOG_DEBUG(Kernel_Sce, "called");
return Config::isNeoModeConsole() && return Config::isNeoModeConsole() &&
Common::ElfInfo::Instance().PSFAttributes().support_neo_mode; Common::ElfInfo::Instance().GetPSFAttributes().support_neo_mode;
} }
int PS4_SYSV_ABI sceKernelGetCompiledSdkVersion(int* ver) { int PS4_SYSV_ABI sceKernelGetCompiledSdkVersion(int* ver) {

View File

@ -18,11 +18,13 @@
#include "core/libraries/libc_internal/libc_internal.h" #include "core/libraries/libc_internal/libc_internal.h"
#include "core/libraries/libpng/pngdec.h" #include "core/libraries/libpng/pngdec.h"
#include "core/libraries/libs.h" #include "core/libraries/libs.h"
#include "core/libraries/mouse/mouse.h"
#include "core/libraries/move/move.h" #include "core/libraries/move/move.h"
#include "core/libraries/network/http.h" #include "core/libraries/network/http.h"
#include "core/libraries/network/net.h" #include "core/libraries/network/net.h"
#include "core/libraries/network/netctl.h" #include "core/libraries/network/netctl.h"
#include "core/libraries/network/ssl.h" #include "core/libraries/network/ssl.h"
#include "core/libraries/np_common/np_common.h"
#include "core/libraries/np_manager/np_manager.h" #include "core/libraries/np_manager/np_manager.h"
#include "core/libraries/np_score/np_score.h" #include "core/libraries/np_score/np_score.h"
#include "core/libraries/np_trophy/np_trophy.h" #include "core/libraries/np_trophy/np_trophy.h"
@ -71,6 +73,7 @@ void InitHLELibs(Core::Loader::SymbolsResolver* sym) {
Libraries::SysModule::RegisterlibSceSysmodule(sym); Libraries::SysModule::RegisterlibSceSysmodule(sym);
Libraries::Posix::Registerlibsceposix(sym); Libraries::Posix::Registerlibsceposix(sym);
Libraries::AudioIn::RegisterlibSceAudioIn(sym); Libraries::AudioIn::RegisterlibSceAudioIn(sym);
Libraries::NpCommon::RegisterlibSceNpCommon(sym);
Libraries::NpManager::RegisterlibSceNpManager(sym); Libraries::NpManager::RegisterlibSceNpManager(sym);
Libraries::NpScore::RegisterlibSceNpScore(sym); Libraries::NpScore::RegisterlibSceNpScore(sym);
Libraries::NpTrophy::RegisterlibSceNpTrophy(sym); Libraries::NpTrophy::RegisterlibSceNpTrophy(sym);
@ -97,6 +100,7 @@ void InitHLELibs(Core::Loader::SymbolsResolver* sym) {
Libraries::Move::RegisterlibSceMove(sym); Libraries::Move::RegisterlibSceMove(sym);
Libraries::Fiber::RegisterlibSceFiber(sym); Libraries::Fiber::RegisterlibSceFiber(sym);
Libraries::JpegEnc::RegisterlibSceJpegEnc(sym); Libraries::JpegEnc::RegisterlibSceJpegEnc(sym);
Libraries::Mouse::RegisterlibSceMouse(sym);
} }
} // namespace Libraries } // namespace Libraries

View File

@ -0,0 +1,99 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// Generated By moduleGenerator
#include "common/logging/log.h"
#include "core/libraries/error_codes.h"
#include "core/libraries/libs.h"
#include "mouse.h"
namespace Libraries::Mouse {
int PS4_SYSV_ABI sceMouseClose() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseConnectPort() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseDebugGetDeviceId() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseDeviceOpen() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseDisconnectDevice() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseDisconnectPort() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseGetDeviceInfo() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseInit() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseMbusInit() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseOpen() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseRead() {
LOG_DEBUG(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseSetHandType() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseSetPointerSpeed() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseSetProcessPrivilege() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
void RegisterlibSceMouse(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("cAnT0Rw-IwU", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseClose);
LIB_FUNCTION("Ymyy1HSSJLQ", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseConnectPort);
LIB_FUNCTION("BRXOoXQtb+k", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseDebugGetDeviceId);
LIB_FUNCTION("WiGKINCZWkc", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseDeviceOpen);
LIB_FUNCTION("eDQTFHbgeTU", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseDisconnectDevice);
LIB_FUNCTION("jJP1vYMEPd4", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseDisconnectPort);
LIB_FUNCTION("QA9Qupz3Zjw", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseGetDeviceInfo);
LIB_FUNCTION("Qs0wWulgl7U", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseInit);
LIB_FUNCTION("1FeceR5YhAo", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseMbusInit);
LIB_FUNCTION("RaqxZIf6DvE", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseOpen);
LIB_FUNCTION("x8qnXqh-tiM", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseRead);
LIB_FUNCTION("crkFfp-cmFo", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseSetHandType);
LIB_FUNCTION("ghLUU2Z5Lcg", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseSetPointerSpeed);
LIB_FUNCTION("6aANndpS0Wo", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseSetProcessPrivilege);
};
} // namespace Libraries::Mouse

View File

@ -0,0 +1,29 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
namespace Core::Loader {
class SymbolsResolver;
}
namespace Libraries::Mouse {
int PS4_SYSV_ABI sceMouseClose();
int PS4_SYSV_ABI sceMouseConnectPort();
int PS4_SYSV_ABI sceMouseDebugGetDeviceId();
int PS4_SYSV_ABI sceMouseDeviceOpen();
int PS4_SYSV_ABI sceMouseDisconnectDevice();
int PS4_SYSV_ABI sceMouseDisconnectPort();
int PS4_SYSV_ABI sceMouseGetDeviceInfo();
int PS4_SYSV_ABI sceMouseInit();
int PS4_SYSV_ABI sceMouseMbusInit();
int PS4_SYSV_ABI sceMouseOpen();
int PS4_SYSV_ABI sceMouseRead();
int PS4_SYSV_ABI sceMouseSetHandType();
int PS4_SYSV_ABI sceMouseSetPointerSpeed();
int PS4_SYSV_ABI sceMouseSetProcessPrivilege();
void RegisterlibSceMouse(Core::Loader::SymbolsResolver* sym);
} // namespace Libraries::Mouse

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,9 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "core/libraries/error_codes.h"
constexpr int ORBIS_NP_ERROR_INVALID_ARGUMENT = 0x80550003;
constexpr int ORBIS_NP_UTIL_ERROR_NOT_MATCH = 0x80550609;

View File

@ -1,7 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "common/config.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "core/libraries/error_codes.h" #include "core/libraries/error_codes.h"
#include "core/libraries/libs.h" #include "core/libraries/libs.h"

View File

@ -174,10 +174,11 @@ int MemoryManager::PoolReserve(void** out_addr, VAddr virtual_addr, size_t size,
// Fixed mapping means the virtual address must exactly match the provided one. // Fixed mapping means the virtual address must exactly match the provided one.
if (True(flags & MemoryMapFlags::Fixed)) { if (True(flags & MemoryMapFlags::Fixed)) {
const auto& vma = FindVMA(mapped_addr)->second; auto& vma = FindVMA(mapped_addr)->second;
// If the VMA is mapped, unmap the region first. // If the VMA is mapped, unmap the region first.
if (vma.IsMapped()) { if (vma.IsMapped()) {
UnmapMemoryImpl(mapped_addr, size); UnmapMemoryImpl(mapped_addr, size);
vma = FindVMA(mapped_addr)->second;
} }
const size_t remaining_size = vma.base + vma.size - mapped_addr; const size_t remaining_size = vma.base + vma.size - mapped_addr;
ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size); ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size);
@ -211,10 +212,11 @@ int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, Mem
// Fixed mapping means the virtual address must exactly match the provided one. // Fixed mapping means the virtual address must exactly match the provided one.
if (True(flags & MemoryMapFlags::Fixed)) { if (True(flags & MemoryMapFlags::Fixed)) {
const auto& vma = FindVMA(mapped_addr)->second; auto& vma = FindVMA(mapped_addr)->second;
// If the VMA is mapped, unmap the region first. // If the VMA is mapped, unmap the region first.
if (vma.IsMapped()) { if (vma.IsMapped()) {
UnmapMemoryImpl(mapped_addr, size); UnmapMemoryImpl(mapped_addr, size);
vma = FindVMA(mapped_addr)->second;
} }
const size_t remaining_size = vma.base + vma.size - mapped_addr; const size_t remaining_size = vma.base + vma.size - mapped_addr;
ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size); ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size);
@ -396,14 +398,18 @@ s32 MemoryManager::UnmapMemoryImpl(VAddr virtual_addr, size_t size) {
ASSERT_MSG(vma_base.Contains(virtual_addr, size), ASSERT_MSG(vma_base.Contains(virtual_addr, size),
"Existing mapping does not contain requested unmap range"); "Existing mapping does not contain requested unmap range");
const auto type = vma_base.type;
if (type == VMAType::Free) {
return ORBIS_OK;
}
const auto vma_base_addr = vma_base.base; const auto vma_base_addr = vma_base.base;
const auto vma_base_size = vma_base.size; const auto vma_base_size = vma_base.size;
const auto phys_base = vma_base.phys_base; const auto phys_base = vma_base.phys_base;
const bool is_exec = vma_base.is_exec; const bool is_exec = vma_base.is_exec;
const auto start_in_vma = virtual_addr - vma_base_addr; const auto start_in_vma = virtual_addr - vma_base_addr;
const auto type = vma_base.type;
const bool has_backing = type == VMAType::Direct || type == VMAType::File; const bool has_backing = type == VMAType::Direct || type == VMAType::File;
if (type == VMAType::Direct) { if (type == VMAType::Direct || type == VMAType::Pooled) {
rasterizer->UnmapMemory(virtual_addr, size); rasterizer->UnmapMemory(virtual_addr, size);
} }
if (type == VMAType::Flexible) { if (type == VMAType::Flexible) {
@ -421,10 +427,12 @@ s32 MemoryManager::UnmapMemoryImpl(VAddr virtual_addr, size_t size) {
MergeAdjacent(vma_map, new_it); MergeAdjacent(vma_map, new_it);
bool readonly_file = vma.prot == MemoryProt::CpuRead && type == VMAType::File; bool readonly_file = vma.prot == MemoryProt::CpuRead && type == VMAType::File;
// Unmap the memory region. if (type != VMAType::Reserved && type != VMAType::PoolReserved) {
impl.Unmap(vma_base_addr, vma_base_size, start_in_vma, start_in_vma + size, phys_base, is_exec, // Unmap the memory region.
has_backing, readonly_file); impl.Unmap(vma_base_addr, vma_base_size, start_in_vma, start_in_vma + size, phys_base,
TRACK_FREE(virtual_addr, "VMEM"); is_exec, has_backing, readonly_file);
TRACK_FREE(virtual_addr, "VMEM");
}
return ORBIS_OK; return ORBIS_OK;
} }

View File

@ -217,41 +217,15 @@ void Emulator::Run(const std::filesystem::path& file) {
linker->LoadModule(eboot_path); linker->LoadModule(eboot_path);
// check if we have system modules to load // check if we have system modules to load
LoadSystemModules(eboot_path, game_info.game_serial); LoadSystemModules(game_info.game_serial);
// Load all prx from game's sce_module folder // Load all prx from game's sce_module folder
std::vector<std::filesystem::path> modules_to_load; mnt->IterateDirectory("/app0/sce_module", [this](const auto& path, const auto is_file) {
std::filesystem::path game_module_folder = file.parent_path() / "sce_module"; if (is_file) {
if (std::filesystem::is_directory(game_module_folder)) { LOG_INFO(Loader, "Loading {}", fmt::UTF(path.u8string()));
for (const auto& entry : std::filesystem::directory_iterator(game_module_folder)) { linker->LoadModule(path);
if (entry.is_regular_file()) {
modules_to_load.push_back(entry.path());
}
} }
} });
// Load all prx from separate update's sce_module folder
std::filesystem::path game_patch_folder = game_folder;
game_patch_folder += "-UPDATE";
std::filesystem::path update_module_folder = game_patch_folder / "sce_module";
if (std::filesystem::is_directory(update_module_folder)) {
for (const auto& entry : std::filesystem::directory_iterator(update_module_folder)) {
auto it = std::find_if(modules_to_load.begin(), modules_to_load.end(),
[&entry](const std::filesystem::path& p) {
return p.filename() == entry.path().filename();
});
if (it != modules_to_load.end()) {
*it = entry.path();
} else {
modules_to_load.push_back(entry.path());
}
}
}
for (const auto& module_path : modules_to_load) {
LOG_INFO(Loader, "Loading {}", fmt::UTF(module_path.u8string()));
linker->LoadModule(module_path);
}
#ifdef ENABLE_DISCORD_RPC #ifdef ENABLE_DISCORD_RPC
// Discord RPC // Discord RPC
@ -278,7 +252,7 @@ void Emulator::Run(const std::filesystem::path& file) {
std::exit(0); std::exit(0);
} }
void Emulator::LoadSystemModules(const std::filesystem::path& file, std::string game_serial) { void Emulator::LoadSystemModules(const std::string& game_serial) {
constexpr std::array<SysModules, 11> ModulesToLoad{ constexpr std::array<SysModules, 11> ModulesToLoad{
{{"libSceNgs2.sprx", &Libraries::Ngs2::RegisterlibSceNgs2}, {{"libSceNgs2.sprx", &Libraries::Ngs2::RegisterlibSceNgs2},
{"libSceUlt.sprx", nullptr}, {"libSceUlt.sprx", nullptr},

View File

@ -29,7 +29,7 @@ public:
void UpdatePlayTime(const std::string& serial); void UpdatePlayTime(const std::string& serial);
private: private:
void LoadSystemModules(const std::filesystem::path& file, std::string game_serial); void LoadSystemModules(const std::string& game_serial);
Core::MemoryManager* memory; Core::MemoryManager* memory;
Input::GameController* controller; Input::GameController* controller;

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <SDL3/SDL.h> #include <SDL3/SDL.h>
#include "common/config.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "core/libraries/kernel/time.h" #include "core/libraries/kernel/time.h"
#include "core/libraries/pad/pad.h" #include "core/libraries/pad/pad.h"
@ -189,11 +190,6 @@ void GameController::CalculateOrientation(Libraries::Pad::OrbisFVector3& acceler
gz += Kp * ez + Ki * eInt[2]; gz += Kp * ez + Ki * eInt[2];
//// Integrate rate of change of quaternion //// Integrate rate of change of quaternion
// float pa = q2, pb = q3, pc = q4;
// q1 += (-q2 * gx - q3 * gy - q4 * gz) * (0.5f * deltaTime);
// q2 += (pa * gx + pb * gz - pc * gy) * (0.5f * deltaTime);
// q3 += (pb * gy - pa * gz + pc * gx) * (0.5f * deltaTime);
// q4 += (pc * gz + pa * gy - pb * gx) * (0.5f * deltaTime);
q1 += (-q2 * gx - q3 * gy - q4 * gz) * (0.5f * deltaTime); q1 += (-q2 * gx - q3 * gy - q4 * gz) * (0.5f * deltaTime);
q2 += (q1 * gx + q3 * gz - q4 * gy) * (0.5f * deltaTime); q2 += (q1 * gx + q3 * gz - q4 * gy) * (0.5f * deltaTime);
q3 += (q1 * gy - q2 * gz + q4 * gx) * (0.5f * deltaTime); q3 += (q1 * gy - q2 * gz + q4 * gx) * (0.5f * deltaTime);
@ -247,18 +243,21 @@ void GameController::TryOpenSDLController() {
int gamepad_count; int gamepad_count;
SDL_JoystickID* gamepads = SDL_GetGamepads(&gamepad_count); SDL_JoystickID* gamepads = SDL_GetGamepads(&gamepad_count);
m_sdl_gamepad = gamepad_count > 0 ? SDL_OpenGamepad(gamepads[0]) : nullptr; m_sdl_gamepad = gamepad_count > 0 ? SDL_OpenGamepad(gamepads[0]) : nullptr;
if (SDL_SetGamepadSensorEnabled(m_sdl_gamepad, SDL_SENSOR_GYRO, true)) { if (Config::getIsMotionControlsEnabled()) {
gyro_poll_rate = SDL_GetGamepadSensorDataRate(m_sdl_gamepad, SDL_SENSOR_GYRO); if (SDL_SetGamepadSensorEnabled(m_sdl_gamepad, SDL_SENSOR_GYRO, true)) {
LOG_INFO(Input, "Gyro initialized, poll rate: {}", gyro_poll_rate); gyro_poll_rate = SDL_GetGamepadSensorDataRate(m_sdl_gamepad, SDL_SENSOR_GYRO);
} else { LOG_INFO(Input, "Gyro initialized, poll rate: {}", gyro_poll_rate);
LOG_ERROR(Input, "Failed to initialize gyro controls for gamepad"); } else {
} LOG_ERROR(Input, "Failed to initialize gyro controls for gamepad");
if (SDL_SetGamepadSensorEnabled(m_sdl_gamepad, SDL_SENSOR_ACCEL, true)) { }
accel_poll_rate = SDL_GetGamepadSensorDataRate(m_sdl_gamepad, SDL_SENSOR_ACCEL); if (SDL_SetGamepadSensorEnabled(m_sdl_gamepad, SDL_SENSOR_ACCEL, true)) {
LOG_INFO(Input, "Accel initialized, poll rate: {}", accel_poll_rate); accel_poll_rate = SDL_GetGamepadSensorDataRate(m_sdl_gamepad, SDL_SENSOR_ACCEL);
} else { LOG_INFO(Input, "Accel initialized, poll rate: {}", accel_poll_rate);
LOG_ERROR(Input, "Failed to initialize accel controls for gamepad"); } else {
LOG_ERROR(Input, "Failed to initialize accel controls for gamepad");
}
} }
SDL_free(gamepads); SDL_free(gamepads);
SetLightBarRGB(0, 0, 255); SetLightBarRGB(0, 0, 255);

View File

@ -86,7 +86,7 @@ int main(int argc, char* argv[]) {
exit(1); exit(1);
} }
// Set fullscreen mode without saving it to config file // Set fullscreen mode without saving it to config file
Config::setFullscreenMode(is_fullscreen); Config::setIsFullscreen(is_fullscreen);
}}, }},
{"--fullscreen", [&](int& i) { arg_map["-f"](i); }}, {"--fullscreen", [&](int& i) { arg_map["-f"](i); }},
{"--add-game-folder", {"--add-game-folder",

View File

@ -283,7 +283,7 @@ public:
#ifdef Q_OS_WIN #ifdef Q_OS_WIN
if (createShortcutWin(linkPath, ebootPath, icoPath, exePath)) { if (createShortcutWin(linkPath, ebootPath, icoPath, exePath)) {
#else #else
if (createShortcutLinux(linkPath, ebootPath, iconPath)) { if (createShortcutLinux(linkPath, m_games[itemID].name, ebootPath, iconPath)) {
#endif #endif
QMessageBox::information( QMessageBox::information(
nullptr, tr("Shortcut creation"), nullptr, tr("Shortcut creation"),
@ -301,7 +301,7 @@ public:
#ifdef Q_OS_WIN #ifdef Q_OS_WIN
if (createShortcutWin(linkPath, ebootPath, iconPath, exePath)) { if (createShortcutWin(linkPath, ebootPath, iconPath, exePath)) {
#else #else
if (createShortcutLinux(linkPath, ebootPath, iconPath)) { if (createShortcutLinux(linkPath, m_games[itemID].name, ebootPath, iconPath)) {
#endif #endif
QMessageBox::information( QMessageBox::information(
nullptr, tr("Shortcut creation"), nullptr, tr("Shortcut creation"),
@ -510,8 +510,8 @@ private:
return SUCCEEDED(hres); return SUCCEEDED(hres);
} }
#else #else
bool createShortcutLinux(const QString& linkPath, const QString& targetPath, bool createShortcutLinux(const QString& linkPath, const std::string& name,
const QString& iconPath) { const QString& targetPath, const QString& iconPath) {
QFile shortcutFile(linkPath); QFile shortcutFile(linkPath);
if (!shortcutFile.open(QIODevice::WriteOnly | QIODevice::Text)) { if (!shortcutFile.open(QIODevice::WriteOnly | QIODevice::Text)) {
QMessageBox::critical(nullptr, "Error", QMessageBox::critical(nullptr, "Error",
@ -522,7 +522,7 @@ private:
QTextStream out(&shortcutFile); QTextStream out(&shortcutFile);
out << "[Desktop Entry]\n"; out << "[Desktop Entry]\n";
out << "Version=1.0\n"; out << "Version=1.0\n";
out << "Name=" << QFileInfo(linkPath).baseName() << "\n"; out << "Name=" << QString::fromStdString(name) << "\n";
out << "Exec=" << QCoreApplication::applicationFilePath() << " \"" << targetPath << "\"\n"; out << "Exec=" << QCoreApplication::applicationFilePath() << " \"" << targetPath << "\"\n";
out << "Icon=" << iconPath << "\n"; out << "Icon=" << iconPath << "\n";
out << "Terminal=false\n"; out << "Terminal=false\n";

View File

@ -97,7 +97,7 @@ int main(int argc, char* argv[]) {
exit(1); exit(1);
} }
// Set fullscreen mode without saving it to config file // Set fullscreen mode without saving it to config file
Config::setFullscreenMode(is_fullscreen); Config::setIsFullscreen(is_fullscreen);
}}, }},
{"--fullscreen", [&](int& i) { arg_map["-f"](i); }}, {"--fullscreen", [&](int& i) { arg_map["-f"](i); }},
{"--add-game-folder", {"--add-game-folder",
@ -190,4 +190,4 @@ int main(int argc, char* argv[]) {
// Show the main window and run the Qt application // Show the main window and run the Qt application
m_main_window->show(); m_main_window->show();
return a.exec(); return a.exec();
} }

View File

@ -47,6 +47,9 @@ PKGViewer::PKGViewer(std::shared_ptr<GameInfoClass> game_info_get, QWidget* pare
connect(treeWidget, &QTreeWidget::customContextMenuRequested, this, connect(treeWidget, &QTreeWidget::customContextMenuRequested, this,
[=, this](const QPoint& pos) { [=, this](const QPoint& pos) {
if (treeWidget->selectedItems().isEmpty()) {
return;
}
m_gui_context_menus.RequestGameMenuPKGViewer(pos, m_full_pkg_list, treeWidget, m_gui_context_menus.RequestGameMenuPKGViewer(pos, m_full_pkg_list, treeWidget,
InstallDragDropPkg); InstallDragDropPkg);
}); });

View File

@ -17,6 +17,7 @@
#ifdef ENABLE_UPDATER #ifdef ENABLE_UPDATER
#include "check_update.h" #include "check_update.h"
#endif #endif
#include <QDesktopServices>
#include <toml.hpp> #include <toml.hpp>
#include "background_music_player.h" #include "background_music_player.h"
#include "common/logging/backend.h" #include "common/logging/backend.h"
@ -203,6 +204,16 @@ SettingsDialog::SettingsDialog(std::span<const QString> physical_devices,
}); });
} }
// DEBUG TAB
{
connect(ui->OpenLogLocationButton, &QPushButton::clicked, this, []() {
QString userPath;
Common::FS::PathToQString(userPath,
Common::FS::GetUserPath(Common::FS::PathType::UserDir));
QDesktopServices::openUrl(QUrl::fromLocalFile(userPath + "/log"));
});
}
// Descriptions // Descriptions
{ {
// General // General
@ -300,6 +311,8 @@ void SettingsDialog::LoadValuesFromConfig() {
ui->discordRPCCheckbox->setChecked( ui->discordRPCCheckbox->setChecked(
toml::find_or<bool>(data, "General", "enableDiscordRPC", true)); toml::find_or<bool>(data, "General", "enableDiscordRPC", true));
ui->fullscreenCheckBox->setChecked(toml::find_or<bool>(data, "General", "Fullscreen", false)); ui->fullscreenCheckBox->setChecked(toml::find_or<bool>(data, "General", "Fullscreen", false));
ui->fullscreenModeComboBox->setCurrentText(QString::fromStdString(
toml::find_or<std::string>(data, "General", "FullscreenMode", "Borderless")));
ui->separateUpdatesCheckBox->setChecked( ui->separateUpdatesCheckBox->setChecked(
toml::find_or<bool>(data, "General", "separateUpdateEnabled", false)); toml::find_or<bool>(data, "General", "separateUpdateEnabled", false));
ui->showSplashCheckBox->setChecked(toml::find_or<bool>(data, "General", "showSplash", false)); ui->showSplashCheckBox->setChecked(toml::find_or<bool>(data, "General", "showSplash", false));
@ -339,6 +352,8 @@ void SettingsDialog::LoadValuesFromConfig() {
toml::find_or<std::string>(data, "Input", "backButtonBehavior", "left")); toml::find_or<std::string>(data, "Input", "backButtonBehavior", "left"));
int index = ui->backButtonBehaviorComboBox->findData(backButtonBehavior); int index = ui->backButtonBehaviorComboBox->findData(backButtonBehavior);
ui->backButtonBehaviorComboBox->setCurrentIndex(index != -1 ? index : 0); ui->backButtonBehaviorComboBox->setCurrentIndex(index != -1 ? index : 0);
ui->motionControlsCheckBox->setChecked(
toml::find_or<bool>(data, "Input", "isMotionControlsEnabled", true));
ui->removeFolderButton->setEnabled(!ui->gameFoldersListWidget->selectedItems().isEmpty()); ui->removeFolderButton->setEnabled(!ui->gameFoldersListWidget->selectedItems().isEmpty());
ResetInstallFolders(); ResetInstallFolders();
@ -532,7 +547,9 @@ void SettingsDialog::UpdateSettings() {
const QVector<std::string> TouchPadIndex = {"left", "center", "right", "none"}; const QVector<std::string> TouchPadIndex = {"left", "center", "right", "none"};
Config::setBackButtonBehavior(TouchPadIndex[ui->backButtonBehaviorComboBox->currentIndex()]); Config::setBackButtonBehavior(TouchPadIndex[ui->backButtonBehaviorComboBox->currentIndex()]);
Config::setFullscreenMode(ui->fullscreenCheckBox->isChecked()); Config::setIsFullscreen(ui->fullscreenCheckBox->isChecked());
Config::setFullscreenMode(ui->fullscreenModeComboBox->currentText().toStdString());
Config::setIsMotionControlsEnabled(ui->motionControlsCheckBox->isChecked());
Config::setisTrophyPopupDisabled(ui->disableTrophycheckBox->isChecked()); Config::setisTrophyPopupDisabled(ui->disableTrophycheckBox->isChecked());
Config::setPlayBGM(ui->playBGMCheckBox->isChecked()); Config::setPlayBGM(ui->playBGMCheckBox->isChecked());
Config::setLogType(ui->logTypeComboBox->currentText().toStdString()); Config::setLogType(ui->logTypeComboBox->currentText().toStdString());

View File

@ -12,7 +12,7 @@
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>970</width> <width>970</width>
<height>670</height> <height>750</height>
</rect> </rect>
</property> </property>
<property name="sizePolicy"> <property name="sizePolicy">
@ -133,6 +133,35 @@
<string>Enable Fullscreen</string> <string>Enable Fullscreen</string>
</property> </property>
</widget> </widget>
</item>
<item>
<widget class="QGroupBox" name="fullscreenModeGroupBox">
<property name="title">
<string>Fullscreen Mode</string>
</property>
<layout class="QVBoxLayout" name="fullscreenModeLayout">
<item>
<widget class="QComboBox" name="fullscreenModeComboBox">
<property name="sizePolicy">
<sizepolicy hsizetype="Preferred" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<item>
<property name="text">
<string>Borderless</string>
</property>
</item>
<item>
<property name="text">
<string>True</string>
</property>
</item>
</widget>
</item>
</layout>
</widget>
</item> </item>
<item> <item>
<widget class="QCheckBox" name="separateUpdatesCheckBox"> <widget class="QCheckBox" name="separateUpdatesCheckBox">
@ -536,6 +565,9 @@
<property name="leftMargin"> <property name="leftMargin">
<number>0</number> <number>0</number>
</property> </property>
<property name="bottomMargin">
<number>80</number>
</property>
<item> <item>
<layout class="QHBoxLayout" name="hLayoutTrophy"> <layout class="QHBoxLayout" name="hLayoutTrophy">
<item> <item>
@ -566,6 +598,12 @@
<height>0</height> <height>0</height>
</size> </size>
</property> </property>
<property name="font">
<font>
<pointsize>10</pointsize>
<bold>false</bold>
</font>
</property>
</widget> </widget>
</item> </item>
</layout> </layout>
@ -815,6 +853,13 @@
</layout> </layout>
</widget> </widget>
</item> </item>
<item>
<widget class="QCheckBox" name="motionControlsCheckBox">
<property name="text">
<string>Enable Motion Controls</string>
</property>
</widget>
</item>
<item> <item>
<widget class="QWidget" name="controllerWidgetSpacer" native="true"> <widget class="QWidget" name="controllerWidgetSpacer" native="true">
<property name="enabled"> <property name="enabled">
@ -1349,6 +1394,13 @@
</item> </item>
</layout> </layout>
</item> </item>
<item>
<widget class="QPushButton" name="OpenLogLocationButton">
<property name="text">
<string>Open Log Location</string>
</property>
</widget>
</item>
</layout> </layout>
</widget> </widget>
</item> </item>

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -93,7 +93,23 @@ WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_
} }
SDL_SetWindowMinimumSize(window, 640, 360); SDL_SetWindowMinimumSize(window, 640, 360);
SDL_SetWindowFullscreen(window, Config::isFullscreenMode());
bool error = false;
const SDL_DisplayID displayIndex = SDL_GetDisplayForWindow(window);
if (displayIndex < 0) {
LOG_ERROR(Frontend, "Error getting display index: {}", SDL_GetError());
error = true;
}
const SDL_DisplayMode* displayMode;
if ((displayMode = SDL_GetCurrentDisplayMode(displayIndex)) == 0) {
LOG_ERROR(Frontend, "Error getting display mode: {}", SDL_GetError());
error = true;
}
if (!error) {
SDL_SetWindowFullscreenMode(window,
Config::getFullscreenMode() == "True" ? displayMode : NULL);
}
SDL_SetWindowFullscreen(window, Config::getIsFullscreen());
SDL_InitSubSystem(SDL_INIT_GAMEPAD); SDL_InitSubSystem(SDL_INIT_GAMEPAD);
controller->TryOpenSDLController(); controller->TryOpenSDLController();

View File

@ -172,20 +172,18 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod
const auto& texture = ctx.images[handle & 0xFFFF]; const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id image = ctx.OpLoad(texture.image_type, texture.id);
const auto sharp = ctx.info.images[handle & 0xFFFF].GetSharp(ctx.info); const auto sharp = ctx.info.images[handle & 0xFFFF].GetSharp(ctx.info);
const auto type = sharp.GetBoundType();
const Id zero = ctx.u32_zero_value; const Id zero = ctx.u32_zero_value;
const auto mips{[&] { return has_mips ? ctx.OpImageQueryLevels(ctx.U32[1], image) : zero; }}; const auto mips{[&] { return has_mips ? ctx.OpImageQueryLevels(ctx.U32[1], image) : zero; }};
const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa && !texture.is_storage}; const bool uses_lod{texture.view_type != AmdGpu::ImageType::Color2DMsaa && !texture.is_storage};
const auto query{[&](Id type) { const auto query{[&](Id type) {
return uses_lod ? ctx.OpImageQuerySizeLod(type, image, lod) return uses_lod ? ctx.OpImageQuerySizeLod(type, image, lod)
: ctx.OpImageQuerySize(type, image); : ctx.OpImageQuerySize(type, image);
}}; }};
switch (type) { switch (texture.view_type) {
case AmdGpu::ImageType::Color1D: case AmdGpu::ImageType::Color1D:
return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[1]), zero, zero, mips()); return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[1]), zero, zero, mips());
case AmdGpu::ImageType::Color1DArray: case AmdGpu::ImageType::Color1DArray:
case AmdGpu::ImageType::Color2D: case AmdGpu::ImageType::Color2D:
case AmdGpu::ImageType::Cube:
case AmdGpu::ImageType::Color2DMsaa: case AmdGpu::ImageType::Color2DMsaa:
return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[2]), zero, mips()); return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[2]), zero, mips());
case AmdGpu::ImageType::Color2DArray: case AmdGpu::ImageType::Color2DArray:
@ -257,4 +255,20 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id
ctx.OpImageWrite(image, coords, texel, operands.mask, operands.operands); ctx.OpImageWrite(image, coords, texel, operands.mask, operands.operands);
} }
Id EmitCubeFaceCoord(EmitContext& ctx, IR::Inst* inst, Id cube_coords) {
if (ctx.profile.supports_native_cube_calc) {
return ctx.OpCubeFaceCoordAMD(ctx.F32[2], cube_coords);
} else {
UNREACHABLE_MSG("SPIR-V Instruction");
}
}
Id EmitCubeFaceIndex(EmitContext& ctx, IR::Inst* inst, Id cube_coords) {
if (ctx.profile.supports_native_cube_calc) {
return ctx.OpCubeFaceIndexAMD(ctx.F32[1], cube_coords);
} else {
UNREACHABLE_MSG("SPIR-V Instruction");
}
}
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View File

@ -439,6 +439,8 @@ Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitCubeFaceCoord(EmitContext& ctx, IR::Inst* inst, Id cube_coords);
Id EmitCubeFaceIndex(EmitContext& ctx, IR::Inst* inst, Id cube_coords);
Id EmitLaneId(EmitContext& ctx); Id EmitLaneId(EmitContext& ctx);
Id EmitWarpId(EmitContext& ctx); Id EmitWarpId(EmitContext& ctx);
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index); Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);

View File

@ -773,8 +773,8 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) { Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
const auto image = desc.GetSharp(ctx.info); const auto image = desc.GetSharp(ctx.info);
const auto format = desc.is_atomic ? GetFormat(image) : spv::ImageFormat::Unknown; const auto format = desc.is_atomic ? GetFormat(image) : spv::ImageFormat::Unknown;
const auto type = image.GetBoundType(); const auto type = image.GetViewType(desc.is_array);
const u32 sampled = desc.IsStorage(image) ? 2 : 1; const u32 sampled = desc.is_written ? 2 : 1;
switch (type) { switch (type) {
case AmdGpu::ImageType::Color1D: case AmdGpu::ImageType::Color1D:
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, sampled, format); return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, sampled, format);
@ -788,9 +788,6 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, true, sampled, format); return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, true, sampled, format);
case AmdGpu::ImageType::Color3D: case AmdGpu::ImageType::Color3D:
return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format); return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format);
case AmdGpu::ImageType::Cube:
return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, desc.is_array, false, sampled,
format);
default: default:
break; break;
} }
@ -802,7 +799,7 @@ void EmitContext::DefineImagesAndSamplers() {
const auto sharp = image_desc.GetSharp(info); const auto sharp = image_desc.GetSharp(info);
const auto nfmt = sharp.GetNumberFmt(); const auto nfmt = sharp.GetNumberFmt();
const bool is_integer = AmdGpu::IsInteger(nfmt); const bool is_integer = AmdGpu::IsInteger(nfmt);
const bool is_storage = image_desc.IsStorage(sharp); const bool is_storage = image_desc.is_written;
const VectorIds& data_types = GetAttributeType(*this, nfmt); const VectorIds& data_types = GetAttributeType(*this, nfmt);
const Id sampled_type = data_types[1]; const Id sampled_type = data_types[1];
const Id image_type{ImageType(*this, image_desc, sampled_type)}; const Id image_type{ImageType(*this, image_desc, sampled_type)};
@ -817,6 +814,7 @@ void EmitContext::DefineImagesAndSamplers() {
.sampled_type = is_storage ? sampled_type : TypeSampledImage(image_type), .sampled_type = is_storage ? sampled_type : TypeSampledImage(image_type),
.pointer_type = pointer_type, .pointer_type = pointer_type,
.image_type = image_type, .image_type = image_type,
.view_type = sharp.GetViewType(image_desc.is_array),
.is_integer = is_integer, .is_integer = is_integer,
.is_storage = is_storage, .is_storage = is_storage,
}); });

View File

@ -222,6 +222,7 @@ public:
Id sampled_type; Id sampled_type;
Id pointer_type; Id pointer_type;
Id image_type; Id image_type;
AmdGpu::ImageType view_type;
bool is_integer = false; bool is_integer = false;
bool is_storage = false; bool is_storage = false;
}; };

View File

@ -47,13 +47,26 @@ static IR::Condition MakeCondition(const GcnInst& inst) {
} }
} }
static bool IgnoresExecMask(Opcode opcode) { static bool IgnoresExecMask(const GcnInst& inst) {
switch (opcode) { // EXEC mask does not affect scalar instructions or branches.
case Opcode::V_WRITELANE_B32: switch (inst.category) {
case InstCategory::ScalarALU:
case InstCategory::ScalarMemory:
case InstCategory::FlowControl:
return true; return true;
default: default:
return false; break;
} }
// Read/Write Lane instructions are not affected either.
switch (inst.opcode) {
case Opcode::V_READLANE_B32:
case Opcode::V_WRITELANE_B32:
case Opcode::V_READFIRSTLANE_B32:
return true;
default:
break;
}
return false;
} }
static constexpr size_t LabelReserveSize = 32; static constexpr size_t LabelReserveSize = 32;
@ -147,8 +160,7 @@ void CFG::EmitDivergenceLabels() {
// If all instructions in the scope ignore exec masking, we shouldn't insert a // If all instructions in the scope ignore exec masking, we shouldn't insert a
// scope. // scope.
const auto start = inst_list.begin() + curr_begin + 1; const auto start = inst_list.begin() + curr_begin + 1;
if (!std::ranges::all_of(start, inst_list.begin() + index, IgnoresExecMask, if (!std::ranges::all_of(start, inst_list.begin() + index, IgnoresExecMask)) {
&GcnInst::opcode)) {
// Add a label to the instruction right after the open scope call. // Add a label to the instruction right after the open scope call.
// It is the start of a new basic block. // It is the start of a new basic block.
const auto& save_inst = inst_list[curr_begin]; const auto& save_inst = inst_list[curr_begin];

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/ir/reinterpret.h"
#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/runtime_info.h"
namespace Shader::Gcn { namespace Shader::Gcn {
@ -31,14 +32,16 @@ void Translator::EmitExport(const GcnInst& inst) {
return; return;
} }
const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0); const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0);
const auto [r, g, b, a] = runtime_info.fs_info.color_buffers[index].swizzle; const auto col_buf = runtime_info.fs_info.color_buffers[index];
const auto converted = IR::ApplyWriteNumberConversion(ir, value, col_buf.num_conversion);
const auto [r, g, b, a] = col_buf.swizzle;
const std::array swizzle_array = {r, g, b, a}; const std::array swizzle_array = {r, g, b, a};
const auto swizzled_comp = swizzle_array[comp]; const auto swizzled_comp = swizzle_array[comp];
if (u32(swizzled_comp) < u32(AmdGpu::CompSwizzle::Red)) { if (u32(swizzled_comp) < u32(AmdGpu::CompSwizzle::Red)) {
ir.SetAttribute(attrib, value, comp); ir.SetAttribute(attrib, converted, comp);
return; return;
} }
ir.SetAttribute(attrib, value, u32(swizzled_comp) - u32(AmdGpu::CompSwizzle::Red)); ir.SetAttribute(attrib, converted, u32(swizzled_comp) - u32(AmdGpu::CompSwizzle::Red));
}; };
const auto unpack = [&](u32 idx) { const auto unpack = [&](u32 idx) {

View File

@ -301,6 +301,9 @@ private:
IR::U32 VMovRelSHelper(u32 src_vgprno, const IR::U32 m0); IR::U32 VMovRelSHelper(u32 src_vgprno, const IR::U32 m0);
void VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0); void VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0);
IR::F32 SelectCubeResult(const IR::F32& x, const IR::F32& y, const IR::F32& z,
const IR::F32& x_res, const IR::F32& y_res, const IR::F32& z_res);
void LogMissingOpcode(const GcnInst& inst); void LogMissingOpcode(const GcnInst& inst);
private: private:

View File

@ -3,6 +3,7 @@
#include "shader_recompiler/frontend/opcodes.h" #include "shader_recompiler/frontend/opcodes.h"
#include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/profile.h"
namespace Shader::Gcn { namespace Shader::Gcn {
@ -904,7 +905,7 @@ void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) {
case ConditionOp::GE: case ConditionOp::GE:
return ir.FPGreaterThanEqual(src0, src1); return ir.FPGreaterThanEqual(src0, src1);
case ConditionOp::U: case ConditionOp::U:
return ir.LogicalNot(ir.LogicalAnd(ir.FPIsNan(src0), ir.FPIsNan(src1))); return ir.LogicalOr(ir.FPIsNan(src0), ir.FPIsNan(src1));
default: default:
UNREACHABLE(); UNREACHABLE();
} }
@ -1042,20 +1043,92 @@ void Translator::V_MAD_U32_U24(const GcnInst& inst) {
V_MAD_I32_I24(inst, false); V_MAD_I32_I24(inst, false);
} }
IR::F32 Translator::SelectCubeResult(const IR::F32& x, const IR::F32& y, const IR::F32& z,
const IR::F32& x_res, const IR::F32& y_res,
const IR::F32& z_res) {
const auto abs_x = ir.FPAbs(x);
const auto abs_y = ir.FPAbs(y);
const auto abs_z = ir.FPAbs(z);
const auto z_face_cond{
ir.LogicalAnd(ir.FPGreaterThanEqual(abs_z, abs_x), ir.FPGreaterThanEqual(abs_z, abs_y))};
const auto y_face_cond{ir.FPGreaterThanEqual(abs_y, abs_x)};
return IR::F32{ir.Select(z_face_cond, z_res, ir.Select(y_face_cond, y_res, x_res))};
}
void Translator::V_CUBEID_F32(const GcnInst& inst) { void Translator::V_CUBEID_F32(const GcnInst& inst) {
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[2])); const auto x = GetSrc<IR::F32>(inst.src[0]);
const auto y = GetSrc<IR::F32>(inst.src[1]);
const auto z = GetSrc<IR::F32>(inst.src[2]);
IR::F32 result;
if (profile.supports_native_cube_calc) {
result = ir.CubeFaceIndex(ir.CompositeConstruct(x, y, z));
} else {
const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))};
const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))};
const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))};
const IR::F32 x_face{ir.Select(x_neg_cond, ir.Imm32(5.f), ir.Imm32(4.f))};
const IR::F32 y_face{ir.Select(y_neg_cond, ir.Imm32(3.f), ir.Imm32(2.f))};
const IR::F32 z_face{ir.Select(z_neg_cond, ir.Imm32(1.f), ir.Imm32(0.f))};
result = SelectCubeResult(x, y, z, x_face, y_face, z_face);
}
SetDst(inst.dst[0], result);
} }
void Translator::V_CUBESC_F32(const GcnInst& inst) { void Translator::V_CUBESC_F32(const GcnInst& inst) {
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[0])); const auto x = GetSrc<IR::F32>(inst.src[0]);
const auto y = GetSrc<IR::F32>(inst.src[1]);
const auto z = GetSrc<IR::F32>(inst.src[2]);
IR::F32 result;
if (profile.supports_native_cube_calc) {
const auto coords{ir.CubeFaceCoord(ir.CompositeConstruct(x, y, z))};
result = IR::F32{ir.CompositeExtract(coords, 0)};
} else {
const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))};
const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))};
const IR::F32 x_sc{ir.Select(x_neg_cond, ir.FPNeg(x), x)};
const IR::F32 z_sc{ir.Select(z_neg_cond, z, ir.FPNeg(z))};
result = SelectCubeResult(x, y, z, x_sc, x, z_sc);
}
SetDst(inst.dst[0], result);
} }
void Translator::V_CUBETC_F32(const GcnInst& inst) { void Translator::V_CUBETC_F32(const GcnInst& inst) {
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[1])); const auto x = GetSrc<IR::F32>(inst.src[0]);
const auto y = GetSrc<IR::F32>(inst.src[1]);
const auto z = GetSrc<IR::F32>(inst.src[2]);
IR::F32 result;
if (profile.supports_native_cube_calc) {
const auto coords{ir.CubeFaceCoord(ir.CompositeConstruct(x, y, z))};
result = IR::F32{ir.CompositeExtract(coords, 1)};
} else {
const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))};
const IR::F32 x_z_sc{ir.FPNeg(y)};
const IR::F32 y_sc{ir.Select(y_neg_cond, ir.FPNeg(z), z)};
result = SelectCubeResult(x, y, z, x_z_sc, y_sc, x_z_sc);
}
SetDst(inst.dst[0], result);
} }
void Translator::V_CUBEMA_F32(const GcnInst& inst) { void Translator::V_CUBEMA_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.Imm32(1.f)); const auto x = GetSrc<IR::F32>(inst.src[0]);
const auto y = GetSrc<IR::F32>(inst.src[1]);
const auto z = GetSrc<IR::F32>(inst.src[2]);
const auto two{ir.Imm32(4.f)};
const IR::F32 x_major_axis{ir.FPMul(x, two)};
const IR::F32 y_major_axis{ir.FPMul(y, two)};
const IR::F32 z_major_axis{ir.FPMul(z, two)};
const auto result{SelectCubeResult(x, y, z, x_major_axis, y_major_axis, z_major_axis)};
SetDst(inst.dst[0], result);
} }
void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) { void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) {

View File

@ -418,6 +418,7 @@ void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) {
IR::TextureInstInfo info{}; IR::TextureInstInfo info{};
info.has_lod.Assign(has_mip); info.has_lod.Assign(has_mip);
info.is_array.Assign(mimg.da);
const IR::Value texel = ir.ImageRead(handle, body, {}, {}, info); const IR::Value texel = ir.ImageRead(handle, body, {}, {}, info);
for (u32 i = 0; i < 4; i++) { for (u32 i = 0; i < 4; i++) {
@ -442,6 +443,7 @@ void Translator::IMAGE_STORE(bool has_mip, const GcnInst& inst) {
IR::TextureInstInfo info{}; IR::TextureInstInfo info{};
info.has_lod.Assign(has_mip); info.has_lod.Assign(has_mip);
info.is_array.Assign(mimg.da);
boost::container::static_vector<IR::F32, 4> comps; boost::container::static_vector<IR::F32, 4> comps;
for (u32 i = 0; i < 4; i++) { for (u32 i = 0; i < 4; i++) {
@ -456,13 +458,18 @@ void Translator::IMAGE_STORE(bool has_mip, const GcnInst& inst) {
} }
void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) { void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) {
const auto& mimg = inst.control.mimg;
IR::VectorReg dst_reg{inst.dst[0].code}; IR::VectorReg dst_reg{inst.dst[0].code};
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4}; const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
const auto flags = ImageResFlags(inst.control.mimg.dmask); const auto flags = ImageResFlags(inst.control.mimg.dmask);
const bool has_mips = flags.test(ImageResComponent::MipCount); const bool has_mips = flags.test(ImageResComponent::MipCount);
const IR::U32 lod = ir.GetVectorReg(IR::VectorReg(inst.src[0].code)); const IR::U32 lod = ir.GetVectorReg(IR::VectorReg(inst.src[0].code));
const IR::Value tsharp = ir.GetScalarReg(tsharp_reg); const IR::Value tsharp = ir.GetScalarReg(tsharp_reg);
const IR::Value size = ir.ImageQueryDimension(tsharp, lod, ir.Imm1(has_mips));
IR::TextureInstInfo info{};
info.is_array.Assign(mimg.da);
const IR::Value size = ir.ImageQueryDimension(tsharp, lod, ir.Imm1(has_mips), info);
if (flags.test(ImageResComponent::Width)) { if (flags.test(ImageResComponent::Width)) {
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(size, 0)}); ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(size, 0)});
@ -484,6 +491,9 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) {
IR::VectorReg addr_reg{inst.src[0].code}; IR::VectorReg addr_reg{inst.src[0].code};
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4}; const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
IR::TextureInstInfo info{};
info.is_array.Assign(mimg.da);
const IR::Value value = ir.GetVectorReg(val_reg); const IR::Value value = ir.GetVectorReg(val_reg);
const IR::Value handle = ir.GetScalarReg(tsharp_reg); const IR::Value handle = ir.GetScalarReg(tsharp_reg);
const IR::Value body = const IR::Value body =
@ -494,25 +504,25 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) {
case AtomicOp::Swap: case AtomicOp::Swap:
return ir.ImageAtomicExchange(handle, body, value, {}); return ir.ImageAtomicExchange(handle, body, value, {});
case AtomicOp::Add: case AtomicOp::Add:
return ir.ImageAtomicIAdd(handle, body, value, {}); return ir.ImageAtomicIAdd(handle, body, value, info);
case AtomicOp::Smin: case AtomicOp::Smin:
return ir.ImageAtomicIMin(handle, body, value, true, {}); return ir.ImageAtomicIMin(handle, body, value, true, info);
case AtomicOp::Umin: case AtomicOp::Umin:
return ir.ImageAtomicUMin(handle, body, value, {}); return ir.ImageAtomicUMin(handle, body, value, info);
case AtomicOp::Smax: case AtomicOp::Smax:
return ir.ImageAtomicIMax(handle, body, value, true, {}); return ir.ImageAtomicIMax(handle, body, value, true, info);
case AtomicOp::Umax: case AtomicOp::Umax:
return ir.ImageAtomicUMax(handle, body, value, {}); return ir.ImageAtomicUMax(handle, body, value, info);
case AtomicOp::And: case AtomicOp::And:
return ir.ImageAtomicAnd(handle, body, value, {}); return ir.ImageAtomicAnd(handle, body, value, info);
case AtomicOp::Or: case AtomicOp::Or:
return ir.ImageAtomicOr(handle, body, value, {}); return ir.ImageAtomicOr(handle, body, value, info);
case AtomicOp::Xor: case AtomicOp::Xor:
return ir.ImageAtomicXor(handle, body, value, {}); return ir.ImageAtomicXor(handle, body, value, info);
case AtomicOp::Inc: case AtomicOp::Inc:
return ir.ImageAtomicInc(handle, body, value, {}); return ir.ImageAtomicInc(handle, body, value, info);
case AtomicOp::Dec: case AtomicOp::Dec:
return ir.ImageAtomicDec(handle, body, value, {}); return ir.ImageAtomicDec(handle, body, value, info);
default: default:
UNREACHABLE(); UNREACHABLE();
} }
@ -643,11 +653,14 @@ void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
IR::VectorReg addr_reg{inst.src[0].code}; IR::VectorReg addr_reg{inst.src[0].code};
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4}; const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
IR::TextureInstInfo info{};
info.is_array.Assign(mimg.da);
const IR::Value handle = ir.GetScalarReg(tsharp_reg); const IR::Value handle = ir.GetScalarReg(tsharp_reg);
const IR::Value body = ir.CompositeConstruct( const IR::Value body = ir.CompositeConstruct(
ir.GetVectorReg<IR::F32>(addr_reg), ir.GetVectorReg<IR::F32>(addr_reg + 1), ir.GetVectorReg<IR::F32>(addr_reg), ir.GetVectorReg<IR::F32>(addr_reg + 1),
ir.GetVectorReg<IR::F32>(addr_reg + 2), ir.GetVectorReg<IR::F32>(addr_reg + 3)); ir.GetVectorReg<IR::F32>(addr_reg + 2), ir.GetVectorReg<IR::F32>(addr_reg + 3));
const IR::Value lod = ir.ImageQueryLod(handle, body, {}); const IR::Value lod = ir.ImageQueryLod(handle, body, info);
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 0)}); ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 0)});
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 1)}); ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 1)});
} }

View File

@ -70,14 +70,8 @@ struct ImageResource {
bool is_depth{}; bool is_depth{};
bool is_atomic{}; bool is_atomic{};
bool is_array{}; bool is_array{};
bool is_read{};
bool is_written{}; bool is_written{};
[[nodiscard]] bool IsStorage(const AmdGpu::Image& image) const noexcept {
// Need cube as storage when used with ImageRead.
return is_written || (is_read && image.GetBoundType() == AmdGpu::ImageType::Cube);
}
[[nodiscard]] constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept; [[nodiscard]] constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept;
}; };
using ImageResourceList = boost::container::small_vector<ImageResource, 16>; using ImageResourceList = boost::container::small_vector<ImageResource, 16>;

View File

@ -1732,11 +1732,6 @@ Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const
return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, dref); return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, dref);
} }
Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod,
const IR::U1& skip_mips) {
return Inst(Opcode::ImageQueryDimensions, handle, lod, skip_mips);
}
Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod, Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod,
const IR::U1& skip_mips, TextureInstInfo info) { const IR::U1& skip_mips, TextureInstInfo info) {
return Inst(Opcode::ImageQueryDimensions, Flags{info}, handle, lod, skip_mips); return Inst(Opcode::ImageQueryDimensions, Flags{info}, handle, lod, skip_mips);
@ -1763,6 +1758,14 @@ void IREmitter::ImageWrite(const Value& handle, const Value& coords, const U32&
Inst(Opcode::ImageWrite, Flags{info}, handle, coords, lod, multisampling, color); Inst(Opcode::ImageWrite, Flags{info}, handle, coords, lod, multisampling, color);
} }
[[nodiscard]] Value IREmitter::CubeFaceCoord(const Value& cube_coords) {
return Inst(Opcode::CubeFaceCoord, cube_coords);
}
[[nodiscard]] F32 IREmitter::CubeFaceIndex(const Value& cube_coords) {
return Inst<F32>(Opcode::CubeFaceIndex, cube_coords);
}
// Debug print maps to SPIRV's NonSemantic DebugPrintf instruction // Debug print maps to SPIRV's NonSemantic DebugPrintf instruction
// Renderdoc will hook in its own implementation of the SPIRV instruction // Renderdoc will hook in its own implementation of the SPIRV instruction
// Renderdoc accepts format specifiers, e.g. %u, listed here: // Renderdoc accepts format specifiers, e.g. %u, listed here:

View File

@ -324,8 +324,6 @@ public:
const F32& dref, const F32& lod, const F32& dref, const F32& lod,
const Value& offset, TextureInstInfo info); const Value& offset, TextureInstInfo info);
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod,
const U1& skip_mips);
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod, [[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod,
const U1& skip_mips, TextureInstInfo info); const U1& skip_mips, TextureInstInfo info);
@ -344,6 +342,9 @@ public:
void ImageWrite(const Value& handle, const Value& coords, const U32& lod, void ImageWrite(const Value& handle, const Value& coords, const U32& lod,
const U32& multisampling, const Value& color, TextureInstInfo info); const U32& multisampling, const Value& color, TextureInstInfo info);
[[nodiscard]] Value CubeFaceCoord(const Value& cube_coords);
[[nodiscard]] F32 CubeFaceIndex(const Value& cube_coords);
void EmitVertex(); void EmitVertex();
void EmitPrimitive(); void EmitPrimitive();

View File

@ -374,6 +374,10 @@ OPCODE(ImageAtomicOr32, U32, Opaq
OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, )
// Cube operations - optional, usable if profile.supports_native_cube_calc
OPCODE(CubeFaceCoord, F32x2, F32x3, )
OPCODE(CubeFaceIndex, F32, F32x3, )
// Warp operations // Warp operations
OPCODE(LaneId, U32, ) OPCODE(LaneId, U32, )
OPCODE(WarpId, U32, ) OPCODE(WarpId, U32, )

View File

@ -161,10 +161,9 @@ public:
u32 Add(const ImageResource& desc) { u32 Add(const ImageResource& desc) {
const u32 index{Add(image_resources, desc, [&desc](const auto& existing) { const u32 index{Add(image_resources, desc, [&desc](const auto& existing) {
return desc.sharp_idx == existing.sharp_idx; return desc.sharp_idx == existing.sharp_idx && desc.is_array == existing.is_array;
})}; })};
auto& image = image_resources[index]; auto& image = image_resources[index];
image.is_read |= desc.is_read;
image.is_written |= desc.is_written; image.is_written |= desc.is_written;
return index; return index;
} }
@ -301,8 +300,7 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
}); });
} }
void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
Descriptors& descriptors) {
s32 binding{}; s32 binding{};
AmdGpu::Buffer buffer; AmdGpu::Buffer buffer;
if (binding = TryHandleInlineCbuf(inst, info, descriptors, buffer); binding == -1) { if (binding = TryHandleInlineCbuf(inst, info, descriptors, buffer); binding == -1) {
@ -317,19 +315,189 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
}); });
} }
// Update buffer descriptor format.
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
// Replace handle with binding index in buffer resource list. // Replace handle with binding index in buffer resource list.
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(0, ir.Imm32(binding)); inst.SetArg(0, ir.Imm32(binding));
}
void PatchTextureBufferSharp(IR::Block& block, IR::Inst& inst, Info& info,
Descriptors& descriptors) {
const IR::Inst* handle = inst.Arg(0).InstRecursive();
const IR::Inst* producer = handle->Arg(0).InstRecursive();
const auto sharp = TrackSharp(producer, info);
const s32 binding = descriptors.Add(TextureBufferResource{
.sharp_idx = sharp,
.is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
});
// Replace handle with binding index in texture buffer resource list.
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(0, ir.Imm32(binding));
}
void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
const auto opcode = inst->GetOpcode();
if (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
opcode == IR::Opcode::GetUserData) {
return inst;
}
return std::nullopt;
};
const auto result = IR::BreadthFirstSearch(&inst, pred);
ASSERT_MSG(result, "Unable to find image sharp source");
const IR::Inst* producer = result.value();
const bool has_sampler = producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2;
const auto tsharp_handle = has_sampler ? producer->Arg(0).InstRecursive() : producer;
// Read image sharp.
const auto tsharp = TrackSharp(tsharp_handle, info);
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
auto image = info.ReadUdSharp<AmdGpu::Image>(tsharp);
if (!image.Valid()) {
LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!");
image = AmdGpu::Image::Null();
}
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite;
// Patch image instruction if image is FMask.
if (image.IsFmask()) {
ASSERT_MSG(!is_written, "FMask storage instructions are not supported");
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
switch (inst.GetOpcode()) {
case IR::Opcode::ImageRead:
case IR::Opcode::ImageSampleRaw: {
IR::F32 fmaskx = ir.BitCast<IR::F32>(ir.Imm32(0x76543210));
IR::F32 fmasky = ir.BitCast<IR::F32>(ir.Imm32(0xfedcba98));
inst.ReplaceUsesWith(ir.CompositeConstruct(fmaskx, fmasky));
return;
}
case IR::Opcode::ImageQueryLod:
inst.ReplaceUsesWith(ir.Imm32(1));
return;
case IR::Opcode::ImageQueryDimensions: {
IR::Value dims = ir.CompositeConstruct(ir.Imm32(static_cast<u32>(image.width)), // x
ir.Imm32(static_cast<u32>(image.width)), // y
ir.Imm32(1), ir.Imm32(1)); // depth, mip
inst.ReplaceUsesWith(dims);
// Track FMask resource to do specialization.
descriptors.Add(FMaskResource{
.sharp_idx = tsharp,
});
return;
}
default:
UNREACHABLE_MSG("Can't patch fmask instruction {}", inst.GetOpcode());
}
}
u32 image_binding = descriptors.Add(ImageResource{
.sharp_idx = tsharp,
.is_depth = bool(inst_info.is_depth),
.is_atomic = IsImageAtomicInstruction(inst),
.is_array = bool(inst_info.is_array),
.is_written = is_written,
});
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
// Read sampler sharp.
const auto [sampler_binding, sampler] = [&] -> std::pair<u32, AmdGpu::Sampler> {
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
const IR::Value& handle = producer->Arg(1);
// Inline sampler resource.
if (handle.IsImmediate()) {
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()};
const auto binding = descriptors.Add(SamplerResource{
.sharp_idx = std::numeric_limits<u32>::max(),
.inline_sampler = inline_sampler,
});
return {binding, inline_sampler};
}
// Normal sampler resource.
const auto ssharp_handle = handle.InstRecursive();
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
const auto ssharp = TrackSharp(ssharp_ud, info);
const auto binding = descriptors.Add(SamplerResource{
.sharp_idx = ssharp,
.associated_image = image_binding,
.disable_aniso = disable_aniso,
});
return {binding, info.ReadUdSharp<AmdGpu::Sampler>(ssharp)};
}();
// Patch image and sampler handle.
inst.SetArg(0, ir.Imm32(image_binding | sampler_binding << 16));
} else {
// Patch image handle.
inst.SetArg(0, ir.Imm32(image_binding));
}
}
void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
// Insert gds binding in the shader if it doesn't exist already.
// The buffer is used for append/consume counters.
constexpr static AmdGpu::Buffer GdsSharp{.base_address = 1};
const u32 binding = descriptors.Add(BufferResource{
.used_types = IR::Type::U32,
.inline_cbuf = GdsSharp,
.is_gds_buffer = true,
.is_written = true,
});
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
return inst;
}
return std::nullopt;
};
// Attempt to deduce the GDS address of counter at compile time.
const u32 gds_addr = [&] {
const IR::Value& gds_offset = inst.Arg(0);
if (gds_offset.IsImmediate()) {
// Nothing to do, offset is known.
return gds_offset.U32() & 0xFFFF;
}
const auto result = IR::BreadthFirstSearch(&inst, pred);
ASSERT_MSG(result, "Unable to track M0 source");
// M0 must be set by some user data register.
const IR::Inst* prod = gds_offset.InstRecursive();
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
u32 m0_val = info.user_data[ud_reg] >> 16;
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
m0_val += prod->Arg(1).U32();
}
return m0_val & 0xFFFF;
}();
// Patch instruction.
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
inst.SetArg(1, ir.Imm32(binding));
}
void PatchBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) {
const auto handle = inst.Arg(0);
const auto buffer_res = info.buffers[handle.U32()];
const auto buffer = buffer_res.GetSharp(info);
ASSERT(!buffer.add_tid_enable); ASSERT(!buffer.add_tid_enable);
// Address of constant buffer reads can be calculated at IR emittion time. // Address of constant buffer reads can be calculated at IR emission time.
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) { if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
return; return;
} }
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
const IR::U32 index_stride = ir.Imm32(buffer.index_stride); const IR::U32 index_stride = ir.Imm32(buffer.index_stride);
const IR::U32 element_size = ir.Imm32(buffer.element_size); const IR::U32 element_size = ir.Imm32(buffer.element_size);
@ -366,82 +534,38 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
inst.SetArg(1, address); inst.SetArg(1, address);
} }
void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, void PatchTextureBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) {
Descriptors& descriptors) { const auto handle = inst.Arg(0);
const IR::Inst* handle = inst.Arg(0).InstRecursive(); const auto buffer_res = info.texture_buffers[handle.U32()];
const IR::Inst* producer = handle->Arg(0).InstRecursive(); const auto buffer = buffer_res.GetSharp(info);
const auto sharp = TrackSharp(producer, info);
const auto buffer = info.ReadUdSharp<AmdGpu::Buffer>(sharp);
const s32 binding = descriptors.Add(TextureBufferResource{
.sharp_idx = sharp,
.is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
});
// Replace handle with binding index in texture buffer resource list.
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(0, ir.Imm32(binding));
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable); ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
}
IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t,
const IR::Value& z, bool is_written, bool is_array) {
// When cubemap is written with imageStore it is treated like 2DArray.
if (is_written) {
return ir.CompositeConstruct(s, t, z);
}
ASSERT(s.Type() == IR::Type::F32); // in case of fetched image need to adjust the code below
// We need to fix x and y coordinate,
// because the s and t coordinate will be scaled and plus 1.5 by v_madak_f32.
// We already force the scale value to be 1.0 when handling v_cubema_f32,
// here we subtract 1.5 to recover the original value.
const IR::Value x = ir.FPSub(IR::F32{s}, ir.Imm32(1.5f));
const IR::Value y = ir.FPSub(IR::F32{t}, ir.Imm32(1.5f));
if (is_array) {
const IR::U32 array_index = ir.ConvertFToU(32, IR::F32{z});
const IR::U32 face_id = ir.BitwiseAnd(array_index, ir.Imm32(7u));
const IR::U32 slice_id = ir.ShiftRightLogical(array_index, ir.Imm32(3u));
return ir.CompositeConstruct(x, y, ir.ConvertIToF(32, 32, false, face_id),
ir.ConvertIToF(32, 32, false, slice_id));
} else {
return ir.CompositeConstruct(x, y, z);
}
}
void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
Descriptors& descriptors, const IR::Inst* producer,
const u32 image_binding, const AmdGpu::Image& image) {
// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
const auto [sampler_binding, sampler] = [&] -> std::pair<u32, AmdGpu::Sampler> {
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
const IR::Value& handle = producer->Arg(1);
// Inline sampler resource.
if (handle.IsImmediate()) {
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()};
const auto binding = descriptors.Add(SamplerResource{
.sharp_idx = std::numeric_limits<u32>::max(),
.inline_sampler = inline_sampler,
});
return {binding, inline_sampler};
}
// Normal sampler resource.
const auto ssharp_handle = handle.InstRecursive();
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
const auto ssharp = TrackSharp(ssharp_ud, info);
const auto binding = descriptors.Add(SamplerResource{
.sharp_idx = ssharp,
.associated_image = image_binding,
.disable_aniso = disable_aniso,
});
return {binding, info.ReadUdSharp<AmdGpu::Sampler>(ssharp)};
}();
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) {
const auto swizzled = ApplySwizzle(ir, inst.Arg(2), buffer.DstSelect());
const auto converted =
ApplyWriteNumberConversionVec4(ir, swizzled, buffer.GetNumberConversion());
inst.SetArg(2, converted);
} else if (inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32) {
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info);
const auto swizzled = ApplySwizzle(ir, texel, buffer.DstSelect());
const auto converted =
ApplyReadNumberConversionVec4(ir, swizzled, buffer.GetNumberConversion());
inst.ReplaceUsesWith(converted);
}
}
void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
const ImageResource& image_res, const AmdGpu::Image& image) {
const auto handle = inst.Arg(0);
const auto sampler_res = info.samplers[(handle.U32() >> 16) & 0xFFFF];
auto sampler = sampler_res.GetSharp(info);
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto inst_info = inst.Flags<IR::TextureInstInfo>(); const auto inst_info = inst.Flags<IR::TextureInstInfo>();
const IR::U32 handle = ir.Imm32(image_binding | sampler_binding << 16); const auto view_type = image.GetViewType(image_res.is_array);
IR::Inst* body1 = inst.Arg(1).InstRecursive(); IR::Inst* body1 = inst.Arg(1).InstRecursive();
IR::Inst* body2 = inst.Arg(2).InstRecursive(); IR::Inst* body2 = inst.Arg(2).InstRecursive();
@ -488,7 +612,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(off), ir.Imm32(6), true); return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(off), ir.Imm32(6), true);
}; };
switch (image.GetType()) { switch (view_type) {
case AmdGpu::ImageType::Color1D: case AmdGpu::ImageType::Color1D:
case AmdGpu::ImageType::Color1DArray: case AmdGpu::ImageType::Color1DArray:
return read(0); return read(0);
@ -497,7 +621,6 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
case AmdGpu::ImageType::Color2DMsaa: case AmdGpu::ImageType::Color2DMsaa:
return ir.CompositeConstruct(read(0), read(8)); return ir.CompositeConstruct(read(0), read(8));
case AmdGpu::ImageType::Color3D: case AmdGpu::ImageType::Color3D:
case AmdGpu::ImageType::Cube:
return ir.CompositeConstruct(read(0), read(8), read(16)); return ir.CompositeConstruct(read(0), read(8), read(16));
default: default:
UNREACHABLE(); UNREACHABLE();
@ -509,7 +632,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
if (!inst_info.has_derivatives) { if (!inst_info.has_derivatives) {
return {}; return {};
} }
switch (image.GetType()) { switch (view_type) {
case AmdGpu::ImageType::Color1D: case AmdGpu::ImageType::Color1D:
case AmdGpu::ImageType::Color1DArray: case AmdGpu::ImageType::Color1DArray:
// du/dx, du/dy // du/dx, du/dy
@ -523,7 +646,6 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
return {ir.CompositeConstruct(get_addr_reg(addr_reg - 4), get_addr_reg(addr_reg - 3)), return {ir.CompositeConstruct(get_addr_reg(addr_reg - 4), get_addr_reg(addr_reg - 3)),
ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1))}; ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1))};
case AmdGpu::ImageType::Color3D: case AmdGpu::ImageType::Color3D:
case AmdGpu::ImageType::Cube:
// (du/dx, dv/dx, dw/dx), (du/dy, dv/dy, dw/dy) // (du/dx, dv/dx, dw/dx), (du/dy, dv/dy, dw/dy)
addr_reg = addr_reg + 6; addr_reg = addr_reg + 6;
return {ir.CompositeConstruct(get_addr_reg(addr_reg - 6), get_addr_reg(addr_reg - 5), return {ir.CompositeConstruct(get_addr_reg(addr_reg - 6), get_addr_reg(addr_reg - 5),
@ -539,7 +661,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
// Query dimensions of image if needed for normalization. // Query dimensions of image if needed for normalization.
// We can't use the image sharp because it could be bound to a different image later. // We can't use the image sharp because it could be bound to a different image later.
const auto dimensions = const auto dimensions =
unnormalized ? ir.ImageQueryDimension(ir.Imm32(image_binding), ir.Imm32(0u), ir.Imm1(false)) unnormalized ? ir.ImageQueryDimension(handle, ir.Imm32(0u), ir.Imm1(false), inst_info)
: IR::Value{}; : IR::Value{};
const auto get_coord = [&](u32 coord_idx, u32 dim_idx) -> IR::Value { const auto get_coord = [&](u32 coord_idx, u32 dim_idx) -> IR::Value {
const auto coord = get_addr_reg(coord_idx); const auto coord = get_addr_reg(coord_idx);
@ -554,7 +676,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
// Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler // Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler
const IR::Value coords = [&] -> IR::Value { const IR::Value coords = [&] -> IR::Value {
switch (image.GetType()) { switch (view_type) {
case AmdGpu::ImageType::Color1D: // x case AmdGpu::ImageType::Color1D: // x
addr_reg = addr_reg + 1; addr_reg = addr_reg + 1;
return get_coord(addr_reg - 1, 0); return get_coord(addr_reg - 1, 0);
@ -573,10 +695,6 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
addr_reg = addr_reg + 3; addr_reg = addr_reg + 3;
return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1), return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
get_coord(addr_reg - 1, 2)); get_coord(addr_reg - 1, 2));
case AmdGpu::ImageType::Cube: // x, y, face
addr_reg = addr_reg + 3;
return PatchCubeCoord(ir, get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
get_addr_reg(addr_reg - 1), false, inst_info.is_array);
default: default:
UNREACHABLE(); UNREACHABLE();
} }
@ -589,7 +707,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
: IR::F32{}; : IR::F32{};
const IR::F32 lod_clamp = inst_info.has_lod_clamp ? get_addr_reg(addr_reg++) : IR::F32{}; const IR::F32 lod_clamp = inst_info.has_lod_clamp ? get_addr_reg(addr_reg++) : IR::F32{};
auto new_inst = [&] -> IR::Value { auto texel = [&] -> IR::Value {
if (inst_info.is_gather) { if (inst_info.is_gather) {
if (inst_info.is_depth) { if (inst_info.is_depth) {
return ir.ImageGatherDref(handle, coords, offset, dref, inst_info); return ir.ImageGatherDref(handle, coords, offset, dref, inst_info);
@ -611,98 +729,35 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
} }
return ir.ImageSampleImplicitLod(handle, coords, bias, offset, inst_info); return ir.ImageSampleImplicitLod(handle, coords, bias, offset, inst_info);
}(); }();
inst.ReplaceUsesWithAndRemove(new_inst);
const auto converted = ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion());
inst.ReplaceUsesWith(converted);
} }
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) {
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> { // Nothing to patch for dimension query.
const auto opcode = inst->GetOpcode();
if (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
opcode == IR::Opcode::GetUserData) {
return inst;
}
return std::nullopt;
};
const auto result = IR::BreadthFirstSearch(&inst, pred);
ASSERT_MSG(result, "Unable to find image sharp source");
const IR::Inst* producer = result.value();
const bool has_sampler = producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2;
const auto tsharp_handle = has_sampler ? producer->Arg(0).InstRecursive() : producer;
// Read image sharp.
const auto tsharp = TrackSharp(tsharp_handle, info);
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
auto image = info.ReadUdSharp<AmdGpu::Image>(tsharp);
if (!image.Valid()) {
LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!");
image = AmdGpu::Image::Null();
}
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
const bool is_read = inst.GetOpcode() == IR::Opcode::ImageRead;
const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite;
// Patch image instruction if image is FMask.
if (image.IsFmask()) {
ASSERT_MSG(!is_written, "FMask storage instructions are not supported");
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
switch (inst.GetOpcode()) {
case IR::Opcode::ImageRead:
case IR::Opcode::ImageSampleRaw: {
IR::F32 fmaskx = ir.BitCast<IR::F32>(ir.Imm32(0x76543210));
IR::F32 fmasky = ir.BitCast<IR::F32>(ir.Imm32(0xfedcba98));
inst.ReplaceUsesWith(ir.CompositeConstruct(fmaskx, fmasky));
return;
}
case IR::Opcode::ImageQueryLod:
inst.ReplaceUsesWith(ir.Imm32(1));
return;
case IR::Opcode::ImageQueryDimensions: {
IR::Value dims = ir.CompositeConstruct(ir.Imm32(static_cast<u32>(image.width)), // x
ir.Imm32(static_cast<u32>(image.width)), // y
ir.Imm32(1), ir.Imm32(1)); // depth, mip
inst.ReplaceUsesWith(dims);
// Track FMask resource to do specialization.
descriptors.Add(FMaskResource{
.sharp_idx = tsharp,
});
return;
}
default:
UNREACHABLE_MSG("Can't patch fmask instruction {}", inst.GetOpcode());
}
}
u32 image_binding = descriptors.Add(ImageResource{
.sharp_idx = tsharp,
.is_depth = bool(inst_info.is_depth),
.is_atomic = IsImageAtomicInstruction(inst),
.is_array = bool(inst_info.is_array),
.is_read = is_read,
.is_written = is_written,
});
// Sample instructions must be resolved into a new instruction using address register data.
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
PatchImageSampleInstruction(block, inst, info, descriptors, producer, image_binding, image);
return;
}
// Patch image handle
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(0, ir.Imm32(image_binding));
// No need to patch coordinates if we are just querying.
if (inst.GetOpcode() == IR::Opcode::ImageQueryDimensions) { if (inst.GetOpcode() == IR::Opcode::ImageQueryDimensions) {
return; return;
} }
const auto handle = inst.Arg(0);
const auto image_res = info.images[handle.U32() & 0xFFFF];
auto image = image_res.GetSharp(info);
// Sample instructions must be handled separately using address register data.
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
PatchImageSampleArgs(block, inst, info, image_res, image);
return;
}
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
const auto view_type = image.GetViewType(image_res.is_array);
// Now that we know the image type, adjust texture coordinate vector. // Now that we know the image type, adjust texture coordinate vector.
IR::Inst* body = inst.Arg(1).InstRecursive(); IR::Inst* body = inst.Arg(1).InstRecursive();
const auto [coords, arg] = [&] -> std::pair<IR::Value, IR::Value> { const auto [coords, arg] = [&] -> std::pair<IR::Value, IR::Value> {
switch (image.GetType()) { switch (view_type) {
case AmdGpu::ImageType::Color1D: // x, [lod] case AmdGpu::ImageType::Color1D: // x, [lod]
return {body->Arg(0), body->Arg(1)}; return {body->Arg(0), body->Arg(1)};
case AmdGpu::ImageType::Color1DArray: // x, slice, [lod] case AmdGpu::ImageType::Color1DArray: // x, slice, [lod]
@ -718,153 +773,74 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
[[fallthrough]]; [[fallthrough]];
case AmdGpu::ImageType::Color3D: // x, y, z, [lod] case AmdGpu::ImageType::Color3D: // x, y, z, [lod]
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)}; return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
case AmdGpu::ImageType::Cube: // x, y, face, [lod]
return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2), is_written,
inst_info.is_array),
body->Arg(3)};
default: default:
UNREACHABLE_MSG("Unknown image type {}", image.GetType()); UNREACHABLE_MSG("Unknown image type {}", view_type);
} }
}(); }();
inst.SetArg(1, coords);
if (inst_info.has_lod) { const auto has_ms = view_type == AmdGpu::ImageType::Color2DMsaa ||
ASSERT(inst.GetOpcode() == IR::Opcode::ImageRead || view_type == AmdGpu::ImageType::Color2DMsaaArray;
inst.GetOpcode() == IR::Opcode::ImageWrite); ASSERT(!inst_info.has_lod || !has_ms);
ASSERT(image.GetType() != AmdGpu::ImageType::Color2DMsaa && const auto lod = inst_info.has_lod ? IR::U32{arg} : IR::U32{};
image.GetType() != AmdGpu::ImageType::Color2DMsaaArray); const auto ms = has_ms ? IR::U32{arg} : IR::U32{};
inst.SetArg(2, arg);
} else if ((image.GetType() == AmdGpu::ImageType::Color2DMsaa ||
image.GetType() == AmdGpu::ImageType::Color2DMsaaArray) &&
(inst.GetOpcode() == IR::Opcode::ImageRead ||
inst.GetOpcode() == IR::Opcode::ImageWrite)) {
inst.SetArg(3, arg);
}
}
void PatchTextureBufferInterpretation(IR::Block& block, IR::Inst& inst, Info& info) { const auto is_storage = image_res.is_written;
const auto binding = inst.Arg(0).U32(); if (inst.GetOpcode() == IR::Opcode::ImageRead) {
const auto buffer_res = info.texture_buffers[binding]; auto texel = ir.ImageRead(handle, coords, lod, ms, inst_info);
const auto buffer = buffer_res.GetSharp(info); if (is_storage) {
if (!buffer.Valid()) { // Storage image requires shader swizzle.
// Don't need to swizzle invalid buffer. texel = ApplySwizzle(ir, texel, image.DstSelect());
return;
}
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) {
inst.SetArg(2, ApplySwizzle(ir, inst.Arg(2), buffer.DstSelect()));
} else if (inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32) {
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info);
const auto swizzled = ApplySwizzle(ir, texel, buffer.DstSelect());
inst.ReplaceUsesWith(swizzled);
}
}
void PatchImageInterpretation(IR::Block& block, IR::Inst& inst, Info& info) {
const auto binding = inst.Arg(0).U32();
const auto image_res = info.images[binding & 0xFFFF];
const auto image = image_res.GetSharp(info);
if (!image.Valid() || !image_res.IsStorage(image)) {
// Don't need to swizzle invalid or non-storage image.
return;
}
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
if (inst.GetOpcode() == IR::Opcode::ImageWrite) {
inst.SetArg(4, ApplySwizzle(ir, inst.Arg(4), image.DstSelect()));
} else if (inst.GetOpcode() == IR::Opcode::ImageRead) {
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
const auto lod = inst.Arg(2);
const auto ms = inst.Arg(3);
const auto texel =
ir.ImageRead(inst.Arg(0), inst.Arg(1), lod.IsEmpty() ? IR::U32{} : IR::U32{lod},
ms.IsEmpty() ? IR::U32{} : IR::U32{ms}, inst_info);
const auto swizzled = ApplySwizzle(ir, texel, image.DstSelect());
inst.ReplaceUsesWith(swizzled);
}
}
void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info,
Descriptors& descriptors) {
// Insert gds binding in the shader if it doesn't exist already.
// The buffer is used for append/consume counters.
constexpr static AmdGpu::Buffer GdsSharp{.base_address = 1};
const u32 binding = descriptors.Add(BufferResource{
.used_types = IR::Type::U32,
.inline_cbuf = GdsSharp,
.is_gds_buffer = true,
.is_written = true,
});
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
return inst;
} }
return std::nullopt; const auto converted =
}; ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion());
inst.ReplaceUsesWith(converted);
} else {
inst.SetArg(1, coords);
if (inst.GetOpcode() == IR::Opcode::ImageWrite) {
inst.SetArg(2, lod);
inst.SetArg(3, ms);
// Attempt to deduce the GDS address of counter at compile time. auto texel = inst.Arg(4);
const u32 gds_addr = [&] { if (is_storage) {
const IR::Value& gds_offset = inst.Arg(0); // Storage image requires shader swizzle.
if (gds_offset.IsImmediate()) { texel = ApplySwizzle(ir, texel, image.DstSelect());
// Nothing to do, offset is known. }
return gds_offset.U32() & 0xFFFF; const auto converted =
ApplyWriteNumberConversionVec4(ir, texel, image.GetNumberConversion());
inst.SetArg(4, converted);
} }
const auto result = IR::BreadthFirstSearch(&inst, pred); }
ASSERT_MSG(result, "Unable to track M0 source");
// M0 must be set by some user data register.
const IR::Inst* prod = gds_offset.InstRecursive();
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
u32 m0_val = info.user_data[ud_reg] >> 16;
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
m0_val += prod->Arg(1).U32();
}
return m0_val & 0xFFFF;
}();
// Patch instruction.
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
inst.SetArg(1, ir.Imm32(binding));
} }
void ResourceTrackingPass(IR::Program& program) { void ResourceTrackingPass(IR::Program& program) {
// Iterate resource instructions and patch them after finding the sharp. // Iterate resource instructions and patch them after finding the sharp.
auto& info = program.info; auto& info = program.info;
// Pass 1: Track resource sharps
Descriptors descriptors{info}; Descriptors descriptors{info};
for (IR::Block* const block : program.blocks) { for (IR::Block* const block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) { for (IR::Inst& inst : block->Instructions()) {
if (IsBufferInstruction(inst)) { if (IsBufferInstruction(inst)) {
PatchBufferInstruction(*block, inst, info, descriptors); PatchBufferSharp(*block, inst, info, descriptors);
continue; } else if (IsTextureBufferInstruction(inst)) {
} PatchTextureBufferSharp(*block, inst, info, descriptors);
if (IsTextureBufferInstruction(inst)) { } else if (IsImageInstruction(inst)) {
PatchTextureBufferInstruction(*block, inst, info, descriptors); PatchImageSharp(*block, inst, info, descriptors);
continue; } else if (IsDataRingInstruction(inst)) {
} PatchDataRingAccess(*block, inst, info, descriptors);
if (IsImageInstruction(inst)) {
PatchImageInstruction(*block, inst, info, descriptors);
continue;
}
if (IsDataRingInstruction(inst)) {
PatchDataRingInstruction(*block, inst, info, descriptors);
} }
} }
} }
// Second pass to reinterpret format read/write where needed, since we now know
// the bindings and their properties. // Pass 2: Patch instruction args
for (IR::Block* const block : program.blocks) { for (IR::Block* const block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) { for (IR::Inst& inst : block->Instructions()) {
if (IsTextureBufferInstruction(inst)) { if (IsBufferInstruction(inst)) {
PatchTextureBufferInterpretation(*block, inst, info); PatchBufferArgs(*block, inst, info);
continue; } else if (IsTextureBufferInstruction(inst)) {
} PatchTextureBufferArgs(*block, inst, info);
if (IsImageInstruction(inst)) { } else if (IsImageInstruction(inst)) {
PatchImageInterpretation(*block, inst, info); PatchImageArgs(*block, inst, info);
} }
} }
} }

View File

@ -5,7 +5,7 @@
namespace Shader::Optimization { namespace Shader::Optimization {
void Visit(Info& info, IR::Inst& inst) { void Visit(Info& info, const IR::Inst& inst) {
switch (inst.GetOpcode()) { switch (inst.GetOpcode()) {
case IR::Opcode::GetAttribute: case IR::Opcode::GetAttribute:
case IR::Opcode::GetAttributeU32: case IR::Opcode::GetAttributeU32:

View File

@ -4,7 +4,7 @@
#pragma once #pragma once
#include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/ir_emitter.h"
#include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/types.h"
namespace Shader::IR { namespace Shader::IR {
@ -21,4 +21,66 @@ inline Value ApplySwizzle(IREmitter& ir, const Value& vector, const AmdGpu::Comp
return swizzled; return swizzled;
} }
/// Applies a number conversion in the read direction.
inline F32 ApplyReadNumberConversion(IREmitter& ir, const F32& value,
const AmdGpu::NumberConversion& conversion) {
switch (conversion) {
case AmdGpu::NumberConversion::None:
return value;
case AmdGpu::NumberConversion::UintToUscaled:
return ir.ConvertUToF(32, 32, ir.BitCast<U32>(value));
case AmdGpu::NumberConversion::SintToSscaled:
return ir.ConvertSToF(32, 32, ir.BitCast<U32>(value));
case AmdGpu::NumberConversion::UnormToUbnorm:
// Convert 0...1 to -1...1
return ir.FPSub(ir.FPMul(value, ir.Imm32(2.f)), ir.Imm32(1.f));
default:
UNREACHABLE();
}
}
inline Value ApplyReadNumberConversionVec4(IREmitter& ir, const Value& value,
const AmdGpu::NumberConversion& conversion) {
if (conversion == AmdGpu::NumberConversion::None) {
return value;
}
const auto x = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 0)}, conversion);
const auto y = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 1)}, conversion);
const auto z = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 2)}, conversion);
const auto w = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 3)}, conversion);
return ir.CompositeConstruct(x, y, z, w);
}
/// Applies a number conversion in the write direction.
inline F32 ApplyWriteNumberConversion(IREmitter& ir, const F32& value,
const AmdGpu::NumberConversion& conversion) {
switch (conversion) {
case AmdGpu::NumberConversion::None:
return value;
case AmdGpu::NumberConversion::UintToUscaled:
// Need to return float type to maintain IR semantics.
return ir.BitCast<F32>(U32{ir.ConvertFToU(32, value)});
case AmdGpu::NumberConversion::SintToSscaled:
// Need to return float type to maintain IR semantics.
return ir.BitCast<F32>(U32{ir.ConvertFToS(32, value)});
case AmdGpu::NumberConversion::UnormToUbnorm:
// Convert -1...1 to 0...1
return ir.FPDiv(ir.FPAdd(value, ir.Imm32(1.f)), ir.Imm32(2.f));
default:
UNREACHABLE();
}
}
inline Value ApplyWriteNumberConversionVec4(IREmitter& ir, const Value& value,
const AmdGpu::NumberConversion& conversion) {
if (conversion == AmdGpu::NumberConversion::None) {
return value;
}
const auto x = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 0)}, conversion);
const auto y = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 1)}, conversion);
const auto z = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 2)}, conversion);
const auto w = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 3)}, conversion);
return ir.CompositeConstruct(x, y, z, w);
}
} // namespace Shader::IR } // namespace Shader::IR

View File

@ -24,6 +24,7 @@ struct Profile {
bool support_explicit_workgroup_layout{}; bool support_explicit_workgroup_layout{};
bool support_legacy_vertex_attributes{}; bool support_legacy_vertex_attributes{};
bool supports_image_load_store_lod{}; bool supports_image_load_store_lod{};
bool supports_native_cube_calc{};
bool has_broken_spirv_clamp{}; bool has_broken_spirv_clamp{};
bool lower_left_origin_mode{}; bool lower_left_origin_mode{};
bool needs_manual_interpolation{}; bool needs_manual_interpolation{};

View File

@ -180,6 +180,7 @@ struct FragmentRuntimeInfo {
std::array<PsInput, 32> inputs; std::array<PsInput, 32> inputs;
struct PsColorBuffer { struct PsColorBuffer {
AmdGpu::NumberFormat num_format; AmdGpu::NumberFormat num_format;
AmdGpu::NumberConversion num_conversion;
AmdGpu::CompMapping swizzle; AmdGpu::CompMapping swizzle;
auto operator<=>(const PsColorBuffer&) const noexcept = default; auto operator<=>(const PsColorBuffer&) const noexcept = default;

View File

@ -32,6 +32,7 @@ struct BufferSpecialization {
struct TextureBufferSpecialization { struct TextureBufferSpecialization {
bool is_integer = false; bool is_integer = false;
AmdGpu::CompMapping dst_select{}; AmdGpu::CompMapping dst_select{};
AmdGpu::NumberConversion num_conversion{};
auto operator<=>(const TextureBufferSpecialization&) const = default; auto operator<=>(const TextureBufferSpecialization&) const = default;
}; };
@ -41,6 +42,7 @@ struct ImageSpecialization {
bool is_integer = false; bool is_integer = false;
bool is_storage = false; bool is_storage = false;
AmdGpu::CompMapping dst_select{}; AmdGpu::CompMapping dst_select{};
AmdGpu::NumberConversion num_conversion{};
auto operator<=>(const ImageSpecialization&) const = default; auto operator<=>(const ImageSpecialization&) const = default;
}; };
@ -107,15 +109,17 @@ struct StageSpecialization {
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
spec.dst_select = sharp.DstSelect(); spec.dst_select = sharp.DstSelect();
spec.num_conversion = sharp.GetNumberConversion();
}); });
ForEachSharp(binding, images, info->images, ForEachSharp(binding, images, info->images,
[](auto& spec, const auto& desc, AmdGpu::Image sharp) { [](auto& spec, const auto& desc, AmdGpu::Image sharp) {
spec.type = sharp.GetBoundType(); spec.type = sharp.GetViewType(desc.is_array);
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
spec.is_storage = desc.IsStorage(sharp); spec.is_storage = desc.is_written;
if (spec.is_storage) { if (spec.is_storage) {
spec.dst_select = sharp.DstSelect(); spec.dst_select = sharp.DstSelect();
} }
spec.num_conversion = sharp.GetNumberConversion();
}); });
ForEachSharp(binding, fmasks, info->fmasks, ForEachSharp(binding, fmasks, info->fmasks,
[](auto& spec, const auto& desc, AmdGpu::Image sharp) { [](auto& spec, const auto& desc, AmdGpu::Image sharp) {

View File

@ -20,9 +20,9 @@
#include "common/types.h" #include "common/types.h"
#include "common/unique_function.h" #include "common/unique_function.h"
#include "shader_recompiler/params.h" #include "shader_recompiler/params.h"
#include "types.h"
#include "video_core/amdgpu/pixel_format.h" #include "video_core/amdgpu/pixel_format.h"
#include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/resource.h"
#include "video_core/amdgpu/types.h"
namespace Vulkan { namespace Vulkan {
class Rasterizer; class Rasterizer;
@ -899,7 +899,12 @@ struct Liverpool {
// There is a small difference between T# and CB number types, account for it. // There is a small difference between T# and CB number types, account for it.
return RemapNumberFormat(info.number_type == NumberFormat::SnormNz return RemapNumberFormat(info.number_type == NumberFormat::SnormNz
? NumberFormat::Srgb ? NumberFormat::Srgb
: info.number_type.Value()); : info.number_type.Value(),
info.format);
}
[[nodiscard]] NumberConversion GetNumberConversion() const {
return MapNumberConversion(info.number_type);
} }
[[nodiscard]] CompMapping Swizzle() const { [[nodiscard]] CompMapping Swizzle() const {
@ -938,7 +943,7 @@ struct Liverpool {
const auto swap_idx = static_cast<u32>(info.comp_swap.Value()); const auto swap_idx = static_cast<u32>(info.comp_swap.Value());
const auto components_idx = NumComponents(info.format) - 1; const auto components_idx = NumComponents(info.format) - 1;
const auto mrt_swizzle = mrt_swizzles[swap_idx][components_idx]; const auto mrt_swizzle = mrt_swizzles[swap_idx][components_idx];
return RemapComponents(info.format, mrt_swizzle); return RemapSwizzle(info.format, mrt_swizzle);
} }
}; };

View File

@ -100,7 +100,7 @@ std::string_view NameOf(NumberFormat fmt) {
return "Srgb"; return "Srgb";
case NumberFormat::Ubnorm: case NumberFormat::Ubnorm:
return "Ubnorm"; return "Ubnorm";
case NumberFormat::UbnromNz: case NumberFormat::UbnormNz:
return "UbnormNz"; return "UbnormNz";
case NumberFormat::Ubint: case NumberFormat::Ubint:
return "Ubint"; return "Ubint";

View File

@ -11,96 +11,6 @@
namespace AmdGpu { namespace AmdGpu {
enum class CompSwizzle : u32 {
Zero = 0,
One = 1,
Red = 4,
Green = 5,
Blue = 6,
Alpha = 7,
};
struct CompMapping {
CompSwizzle r : 3;
CompSwizzle g : 3;
CompSwizzle b : 3;
CompSwizzle a : 3;
auto operator<=>(const CompMapping& other) const = default;
template <typename T>
[[nodiscard]] std::array<T, 4> Apply(const std::array<T, 4>& data) const {
return {
ApplySingle(data, r),
ApplySingle(data, g),
ApplySingle(data, b),
ApplySingle(data, a),
};
}
private:
template <typename T>
T ApplySingle(const std::array<T, 4>& data, const CompSwizzle swizzle) const {
switch (swizzle) {
case CompSwizzle::Zero:
return T(0);
case CompSwizzle::One:
return T(1);
case CompSwizzle::Red:
return data[0];
case CompSwizzle::Green:
return data[1];
case CompSwizzle::Blue:
return data[2];
case CompSwizzle::Alpha:
return data[3];
default:
UNREACHABLE();
}
}
};
inline DataFormat RemapDataFormat(const DataFormat format) {
switch (format) {
case DataFormat::Format11_11_10:
return DataFormat::Format10_11_11;
case DataFormat::Format10_10_10_2:
return DataFormat::Format2_10_10_10;
case DataFormat::Format5_5_5_1:
return DataFormat::Format1_5_5_5;
default:
return format;
}
}
inline NumberFormat RemapNumberFormat(const NumberFormat format) {
return format;
}
inline CompMapping RemapComponents(const DataFormat format, const CompMapping components) {
switch (format) {
case DataFormat::Format11_11_10: {
CompMapping result;
result.r = components.b;
result.g = components.g;
result.b = components.r;
result.a = components.a;
return result;
}
case DataFormat::Format10_10_10_2:
case DataFormat::Format5_5_5_1: {
CompMapping result;
result.r = components.a;
result.g = components.b;
result.b = components.g;
result.a = components.r;
return result;
}
default:
return components;
}
}
// Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture] // Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture]
struct Buffer { struct Buffer {
u64 base_address : 44; u64 base_address : 44;
@ -140,17 +50,21 @@ struct Buffer {
.b = CompSwizzle(dst_sel_z), .b = CompSwizzle(dst_sel_z),
.a = CompSwizzle(dst_sel_w), .a = CompSwizzle(dst_sel_w),
}; };
return RemapComponents(DataFormat(data_format), dst_sel); return RemapSwizzle(DataFormat(data_format), dst_sel);
} }
NumberFormat GetNumberFmt() const noexcept { NumberFormat GetNumberFmt() const noexcept {
return RemapNumberFormat(NumberFormat(num_format)); return RemapNumberFormat(NumberFormat(num_format), DataFormat(data_format));
} }
DataFormat GetDataFmt() const noexcept { DataFormat GetDataFmt() const noexcept {
return RemapDataFormat(DataFormat(data_format)); return RemapDataFormat(DataFormat(data_format));
} }
NumberConversion GetNumberConversion() const noexcept {
return MapNumberConversion(NumberFormat(num_format));
}
u32 GetStride() const noexcept { u32 GetStride() const noexcept {
return stride; return stride;
} }
@ -305,22 +219,22 @@ struct Image {
.b = CompSwizzle(dst_sel_z), .b = CompSwizzle(dst_sel_z),
.a = CompSwizzle(dst_sel_w), .a = CompSwizzle(dst_sel_w),
}; };
return RemapComponents(DataFormat(data_format), dst_sel); return RemapSwizzle(DataFormat(data_format), dst_sel);
} }
u32 Pitch() const { u32 Pitch() const {
return pitch + 1; return pitch + 1;
} }
u32 NumLayers(bool is_array) const { [[nodiscard]] u32 NumLayers() const noexcept {
u32 slices = GetType() == ImageType::Color3D ? 1 : depth + 1; // Depth is the number of layers for Array images.
if (GetType() == ImageType::Cube) { u32 slices = depth + 1;
if (is_array) { if (GetType() == ImageType::Color3D) {
slices = last_array + 1; // Depth is the actual texture depth for 3D images.
ASSERT(slices % 6 == 0); slices = 1;
} else { } else if (IsCube()) {
slices = 6; // Depth is the number of full cubes for Cube images.
} slices *= 6;
} }
if (pow2pad) { if (pow2pad) {
slices = std::bit_ceil(slices); slices = std::bit_ceil(slices);
@ -342,8 +256,12 @@ struct Image {
return 1; return 1;
} }
bool IsCube() const noexcept {
return static_cast<ImageType>(type) == ImageType::Cube;
}
ImageType GetType() const noexcept { ImageType GetType() const noexcept {
return static_cast<ImageType>(type); return IsCube() ? ImageType::Color2DArray : static_cast<ImageType>(type);
} }
DataFormat GetDataFmt() const noexcept { DataFormat GetDataFmt() const noexcept {
@ -351,7 +269,11 @@ struct Image {
} }
NumberFormat GetNumberFmt() const noexcept { NumberFormat GetNumberFmt() const noexcept {
return RemapNumberFormat(NumberFormat(num_format)); return RemapNumberFormat(NumberFormat(num_format), DataFormat(data_format));
}
NumberConversion GetNumberConversion() const noexcept {
return MapNumberConversion(NumberFormat(num_format));
} }
TilingMode GetTilingMode() const { TilingMode GetTilingMode() const {
@ -371,13 +293,48 @@ struct Image {
GetDataFmt() <= DataFormat::FormatFmask64_8; GetDataFmt() <= DataFormat::FormatFmask64_8;
} }
bool IsPartialCubemap() const { [[nodiscard]] ImageType GetViewType(const bool is_array) const noexcept {
const auto viewed_slice = last_array - base_array + 1; const auto base_type = GetType();
return GetType() == ImageType::Cube && viewed_slice < 6; if (IsCube()) {
// Cube needs to remain array type regardless of instruction array specifier.
return base_type;
}
if (base_type == ImageType::Color1DArray && !is_array) {
return ImageType::Color1D;
}
if (base_type == ImageType::Color2DArray && !is_array) {
return ImageType::Color2D;
}
if (base_type == ImageType::Color2DMsaaArray && !is_array) {
return ImageType::Color2DMsaa;
}
return base_type;
} }
ImageType GetBoundType() const noexcept { [[nodiscard]] u32 NumViewLevels(const bool is_array) const noexcept {
return IsPartialCubemap() ? ImageType::Color2DArray : GetType(); switch (GetViewType(is_array)) {
case ImageType::Color2DMsaa:
case ImageType::Color2DMsaaArray:
return 1;
default:
// Constrain to actual number of available levels.
const auto max_level = std::min<u32>(last_level + 1, NumLevels());
return max_level > base_level ? max_level - base_level : 1;
}
}
[[nodiscard]] u32 NumViewLayers(const bool is_array) const noexcept {
switch (GetViewType(is_array)) {
case ImageType::Color1D:
case ImageType::Color2D:
case ImageType::Color2DMsaa:
case ImageType::Color3D:
return 1;
default:
// Constrain to actual number of available layers.
const auto max_array = std::min<u32>(last_array + 1, NumLayers());
return max_array > base_array ? max_array - base_array : 1;
}
} }
}; };
static_assert(sizeof(Image) == 32); // 256bits static_assert(sizeof(Image) == 32); // 256bits

View File

@ -5,6 +5,7 @@
#include <string_view> #include <string_view>
#include <fmt/format.h> #include <fmt/format.h>
#include "common/assert.h"
#include "common/types.h" #include "common/types.h"
namespace AmdGpu { namespace AmdGpu {
@ -177,11 +178,138 @@ enum class NumberFormat : u32 {
Float = 7, Float = 7,
Srgb = 9, Srgb = 9,
Ubnorm = 10, Ubnorm = 10,
UbnromNz = 11, UbnormNz = 11,
Ubint = 12, Ubint = 12,
Ubscaled = 13, Ubscaled = 13,
}; };
enum class CompSwizzle : u32 {
Zero = 0,
One = 1,
Red = 4,
Green = 5,
Blue = 6,
Alpha = 7,
};
enum class NumberConversion : u32 {
None,
UintToUscaled,
SintToSscaled,
UnormToUbnorm,
};
struct CompMapping {
CompSwizzle r : 3;
CompSwizzle g : 3;
CompSwizzle b : 3;
CompSwizzle a : 3;
auto operator<=>(const CompMapping& other) const = default;
template <typename T>
[[nodiscard]] std::array<T, 4> Apply(const std::array<T, 4>& data) const {
return {
ApplySingle(data, r),
ApplySingle(data, g),
ApplySingle(data, b),
ApplySingle(data, a),
};
}
private:
template <typename T>
T ApplySingle(const std::array<T, 4>& data, const CompSwizzle swizzle) const {
switch (swizzle) {
case CompSwizzle::Zero:
return T(0);
case CompSwizzle::One:
return T(1);
case CompSwizzle::Red:
return data[0];
case CompSwizzle::Green:
return data[1];
case CompSwizzle::Blue:
return data[2];
case CompSwizzle::Alpha:
return data[3];
default:
UNREACHABLE();
}
}
};
inline DataFormat RemapDataFormat(const DataFormat format) {
switch (format) {
case DataFormat::Format11_11_10:
return DataFormat::Format10_11_11;
case DataFormat::Format10_10_10_2:
return DataFormat::Format2_10_10_10;
case DataFormat::Format5_5_5_1:
return DataFormat::Format1_5_5_5;
default:
return format;
}
}
inline NumberFormat RemapNumberFormat(const NumberFormat format, const DataFormat data_format) {
switch (format) {
case NumberFormat::Uscaled:
return NumberFormat::Uint;
case NumberFormat::Sscaled:
return NumberFormat::Sint;
case NumberFormat::Ubnorm:
return NumberFormat::Unorm;
case NumberFormat::Float:
if (data_format == DataFormat::Format8) {
// Games may ask for 8-bit float when they want to access the stencil component
// of a depth-stencil image. Change to unsigned int to match the stencil format.
// This is also the closest approximation to pass the bits through unconverted.
return NumberFormat::Uint;
}
[[fallthrough]];
default:
return format;
}
}
inline CompMapping RemapSwizzle(const DataFormat format, const CompMapping swizzle) {
switch (format) {
case DataFormat::Format11_11_10: {
CompMapping result;
result.r = swizzle.b;
result.g = swizzle.g;
result.b = swizzle.r;
result.a = swizzle.a;
return result;
}
case DataFormat::Format10_10_10_2:
case DataFormat::Format5_5_5_1: {
CompMapping result;
result.r = swizzle.a;
result.g = swizzle.b;
result.b = swizzle.g;
result.a = swizzle.r;
return result;
}
default:
return swizzle;
}
}
inline NumberConversion MapNumberConversion(const NumberFormat format) {
switch (format) {
case NumberFormat::Uscaled:
return NumberConversion::UintToUscaled;
case NumberFormat::Sscaled:
return NumberConversion::SintToSscaled;
case NumberFormat::Ubnorm:
return NumberConversion::UnormToUbnorm;
default:
return NumberConversion::None;
}
}
} // namespace AmdGpu } // namespace AmdGpu
template <> template <>

View File

@ -119,19 +119,23 @@ public:
return buffer; return buffer;
} }
std::optional<vk::BufferMemoryBarrier2> GetBarrier(vk::AccessFlagBits2 dst_acess_mask, std::optional<vk::BufferMemoryBarrier2> GetBarrier(
vk::PipelineStageFlagBits2 dst_stage) { vk::Flags<vk::AccessFlagBits2> dst_acess_mask, vk::PipelineStageFlagBits2 dst_stage,
u32 offset = 0) {
if (dst_acess_mask == access_mask && stage == dst_stage) { if (dst_acess_mask == access_mask && stage == dst_stage) {
return {}; return {};
} }
DEBUG_ASSERT(offset < size_bytes);
auto barrier = vk::BufferMemoryBarrier2{ auto barrier = vk::BufferMemoryBarrier2{
.srcStageMask = stage, .srcStageMask = stage,
.srcAccessMask = access_mask, .srcAccessMask = access_mask,
.dstStageMask = dst_stage, .dstStageMask = dst_stage,
.dstAccessMask = dst_acess_mask, .dstAccessMask = dst_acess_mask,
.buffer = buffer.buffer, .buffer = buffer.buffer,
.size = size_bytes, .offset = offset,
.size = size_bytes - offset,
}; };
access_mask = dst_acess_mask; access_mask = dst_acess_mask;
stage = dst_stage; stage = dst_stage;
@ -150,8 +154,10 @@ public:
Vulkan::Scheduler* scheduler; Vulkan::Scheduler* scheduler;
MemoryUsage usage; MemoryUsage usage;
UniqueBuffer buffer; UniqueBuffer buffer;
vk::AccessFlagBits2 access_mask{vk::AccessFlagBits2::eNone}; vk::Flags<vk::AccessFlagBits2> access_mask{
vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eNone}; vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite |
vk::AccessFlagBits2::eTransferRead | vk::AccessFlagBits2::eTransferWrite};
vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eAllCommands};
}; };
class StreamBuffer : public Buffer { class StreamBuffer : public Buffer {

View File

@ -10,13 +10,13 @@
#include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/liverpool.h"
#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/texture_cache.h"
namespace VideoCore { namespace VideoCore {
static constexpr size_t NumVertexBuffers = 32;
static constexpr size_t GdsBufferSize = 64_KB; static constexpr size_t GdsBufferSize = 64_KB;
static constexpr size_t StagingBufferSize = 1_GB; static constexpr size_t StagingBufferSize = 1_GB;
static constexpr size_t UboStreamBufferSize = 64_MB; static constexpr size_t UboStreamBufferSize = 64_MB;
@ -34,21 +34,10 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
// Ensure the first slot is used for the null buffer // Ensure the first slot is used for the null buffer
const auto null_id = const auto null_id =
slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, ReadFlags, 1); slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, ReadFlags, 16);
ASSERT(null_id.index == 0); ASSERT(null_id.index == 0);
const vk::Buffer& null_buffer = slot_buffers[null_id].buffer; const vk::Buffer& null_buffer = slot_buffers[null_id].buffer;
Vulkan::SetObjectName(instance.GetDevice(), null_buffer, "Null Buffer"); Vulkan::SetObjectName(instance.GetDevice(), null_buffer, "Null Buffer");
const vk::BufferViewCreateInfo null_view_ci = {
.buffer = null_buffer,
.format = vk::Format::eR8Unorm,
.offset = 0,
.range = VK_WHOLE_SIZE,
};
const auto [null_view_result, null_view] = instance.GetDevice().createBufferView(null_view_ci);
ASSERT_MSG(null_view_result == vk::Result::eSuccess, "Failed to create null buffer view.");
null_buffer_view = null_view;
Vulkan::SetObjectName(instance.GetDevice(), null_buffer_view, "Null Buffer View");
} }
BufferCache::~BufferCache() = default; BufferCache::~BufferCache() = default;
@ -100,35 +89,22 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
} }
} }
bool BufferCache::BindVertexBuffers( void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) {
const Shader::Info& vs_info, const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader) { Vulkan::VertexInputs<vk::VertexInputAttributeDescription2EXT> attributes;
boost::container::small_vector<vk::VertexInputAttributeDescription2EXT, 16> attributes; Vulkan::VertexInputs<vk::VertexInputBindingDescription2EXT> bindings;
boost::container::small_vector<vk::VertexInputBindingDescription2EXT, 16> bindings; Vulkan::VertexInputs<AmdGpu::Buffer> guest_buffers;
SCOPE_EXIT { pipeline.GetVertexInputs(attributes, bindings, guest_buffers);
if (instance.IsVertexInputDynamicState()) {
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.setVertexInputEXT(bindings, attributes);
} else if (bindings.empty()) {
// Required to call bindVertexBuffers2EXT at least once in the current command buffer
// with non-null strides without a non-dynamic stride pipeline in between. Thus even
// when nothing is bound we still need to make a dummy call. Non-null strides in turn
// requires a count greater than 0.
const auto cmdbuf = scheduler.CommandBuffer();
const std::array null_buffers = {GetBuffer(NULL_BUFFER_ID).buffer.buffer};
constexpr std::array null_offsets = {static_cast<vk::DeviceSize>(0)};
cmdbuf.bindVertexBuffers2EXT(0, null_buffers, null_offsets, null_offsets, null_offsets);
}
};
if (!fetch_shader || fetch_shader->attributes.empty()) { if (instance.IsVertexInputDynamicState()) {
return false; // Update current vertex inputs.
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.setVertexInputEXT(bindings, attributes);
} }
std::array<vk::Buffer, NumVertexBuffers> host_buffers; if (bindings.empty()) {
std::array<vk::DeviceSize, NumVertexBuffers> host_offsets; // If there are no bindings, there is nothing further to do.
std::array<vk::DeviceSize, NumVertexBuffers> host_sizes; return;
std::array<vk::DeviceSize, NumVertexBuffers> host_strides; }
boost::container::static_vector<AmdGpu::Buffer, NumVertexBuffers> guest_buffers;
struct BufferRange { struct BufferRange {
VAddr base_address; VAddr base_address;
@ -136,61 +112,37 @@ bool BufferCache::BindVertexBuffers(
vk::Buffer vk_buffer; vk::Buffer vk_buffer;
u64 offset; u64 offset;
size_t GetSize() const { [[nodiscard]] size_t GetSize() const {
return end_address - base_address; return end_address - base_address;
} }
}; };
// Calculate buffers memory overlaps // Build list of ranges covering the requested buffers
bool has_step_rate = false; Vulkan::VertexInputs<BufferRange> ranges{};
boost::container::static_vector<BufferRange, NumVertexBuffers> ranges{}; for (const auto& buffer : guest_buffers) {
for (const auto& attrib : fetch_shader->attributes) { if (buffer.GetSize() > 0) {
if (attrib.UsesStepRates()) { ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
has_step_rate = true;
continue;
} }
}
const auto& buffer = attrib.GetSharp(vs_info); // Merge connecting ranges together
if (buffer.GetSize() == 0) { Vulkan::VertexInputs<BufferRange> ranges_merged{};
continue; if (!ranges.empty()) {
} std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) {
guest_buffers.emplace_back(buffer); return lhv.base_address < rhv.base_address;
ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
attributes.push_back({
.location = attrib.semantic,
.binding = attrib.semantic,
.format =
Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
.offset = 0,
}); });
bindings.push_back({ ranges_merged.emplace_back(ranges[0]);
.binding = attrib.semantic, for (auto range : ranges) {
.stride = buffer.GetStride(), auto& prev_range = ranges_merged.back();
.inputRate = attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None if (prev_range.end_address < range.base_address) {
? vk::VertexInputRate::eVertex ranges_merged.emplace_back(range);
: vk::VertexInputRate::eInstance, } else {
.divisor = 1, prev_range.end_address = std::max(prev_range.end_address, range.end_address);
}); }
}
if (ranges.empty()) {
return false;
}
std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) {
return lhv.base_address < rhv.base_address;
});
boost::container::static_vector<BufferRange, NumVertexBuffers> ranges_merged{ranges[0]};
for (auto range : ranges) {
auto& prev_range = ranges_merged.back();
if (prev_range.end_address < range.base_address) {
ranges_merged.emplace_back(range);
} else {
prev_range.end_address = std::max(prev_range.end_address, range.end_address);
} }
} }
// Map buffers // Map buffers for merged ranges
for (auto& range : ranges_merged) { for (auto& range : ranges_merged) {
const auto [buffer, offset] = ObtainBuffer(range.base_address, range.GetSize(), false); const auto [buffer, offset] = ObtainBuffer(range.base_address, range.GetSize(), false);
range.vk_buffer = buffer->buffer; range.vk_buffer = buffer->buffer;
@ -198,32 +150,39 @@ bool BufferCache::BindVertexBuffers(
} }
// Bind vertex buffers // Bind vertex buffers
const size_t num_buffers = guest_buffers.size(); Vulkan::VertexInputs<vk::Buffer> host_buffers;
for (u32 i = 0; i < num_buffers; ++i) { Vulkan::VertexInputs<vk::DeviceSize> host_offsets;
const auto& buffer = guest_buffers[i]; Vulkan::VertexInputs<vk::DeviceSize> host_sizes;
const auto host_buffer = std::ranges::find_if(ranges_merged, [&](const BufferRange& range) { Vulkan::VertexInputs<vk::DeviceSize> host_strides;
return (buffer.base_address >= range.base_address && const auto null_buffer =
buffer.base_address < range.end_address); instance.IsNullDescriptorSupported() ? VK_NULL_HANDLE : GetBuffer(NULL_BUFFER_ID).Handle();
}); for (const auto& buffer : guest_buffers) {
ASSERT(host_buffer != ranges_merged.cend()); if (buffer.GetSize() > 0) {
const auto host_buffer_info =
host_buffers[i] = host_buffer->vk_buffer; std::ranges::find_if(ranges_merged, [&](const BufferRange& range) {
host_offsets[i] = host_buffer->offset + buffer.base_address - host_buffer->base_address; return buffer.base_address >= range.base_address &&
host_sizes[i] = buffer.GetSize(); buffer.base_address < range.end_address;
host_strides[i] = buffer.GetStride(); });
} ASSERT(host_buffer_info != ranges_merged.cend());
host_buffers.emplace_back(host_buffer_info->vk_buffer);
if (num_buffers > 0) { host_offsets.push_back(host_buffer_info->offset + buffer.base_address -
const auto cmdbuf = scheduler.CommandBuffer(); host_buffer_info->base_address);
if (instance.IsVertexInputDynamicState()) {
cmdbuf.bindVertexBuffers(0, num_buffers, host_buffers.data(), host_offsets.data());
} else { } else {
cmdbuf.bindVertexBuffers2EXT(0, num_buffers, host_buffers.data(), host_offsets.data(), host_buffers.emplace_back(null_buffer);
host_sizes.data(), host_strides.data()); host_offsets.push_back(0);
} }
host_sizes.push_back(buffer.GetSize());
host_strides.push_back(buffer.GetStride());
} }
return has_step_rate; const auto cmdbuf = scheduler.CommandBuffer();
const auto num_buffers = guest_buffers.size();
if (instance.IsVertexInputDynamicState()) {
cmdbuf.bindVertexBuffers(0, num_buffers, host_buffers.data(), host_offsets.data());
} else {
cmdbuf.bindVertexBuffers2EXT(0, num_buffers, host_buffers.data(), host_offsets.data(),
host_sizes.data(), host_strides.data());
}
} }
void BufferCache::BindIndexBuffer(u32 index_offset) { void BufferCache::BindIndexBuffer(u32 index_offset) {
@ -479,43 +438,36 @@ void BufferCache::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
}; };
scheduler.EndRendering(); scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer(); const auto cmdbuf = scheduler.CommandBuffer();
const std::array pre_barriers = {
vk::BufferMemoryBarrier2{ boost::container::static_vector<vk::BufferMemoryBarrier2, 2> pre_barriers{};
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, if (auto src_barrier = overlap.GetBarrier(vk::AccessFlagBits2::eTransferRead,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, vk::PipelineStageFlagBits2::eTransfer)) {
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer, pre_barriers.push_back(*src_barrier);
.dstAccessMask = vk::AccessFlagBits2::eTransferRead, }
.buffer = overlap.Handle(), if (auto dst_barrier =
.offset = 0, new_buffer.GetBarrier(vk::AccessFlagBits2::eTransferWrite,
.size = overlap.SizeBytes(), vk::PipelineStageFlagBits2::eTransfer, dst_base_offset)) {
}, pre_barriers.push_back(*dst_barrier);
}; }
const std::array post_barriers = {
vk::BufferMemoryBarrier2{
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferRead,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryWrite,
.buffer = overlap.Handle(),
.offset = 0,
.size = overlap.SizeBytes(),
},
vk::BufferMemoryBarrier2{
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.buffer = new_buffer.Handle(),
.offset = dst_base_offset,
.size = overlap.SizeBytes(),
},
};
cmdbuf.pipelineBarrier2(vk::DependencyInfo{ cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion, .dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1, .bufferMemoryBarrierCount = static_cast<u32>(pre_barriers.size()),
.pBufferMemoryBarriers = pre_barriers.data(), .pBufferMemoryBarriers = pre_barriers.data(),
}); });
cmdbuf.copyBuffer(overlap.Handle(), new_buffer.Handle(), copy); cmdbuf.copyBuffer(overlap.Handle(), new_buffer.Handle(), copy);
boost::container::static_vector<vk::BufferMemoryBarrier2, 2> post_barriers{};
if (auto src_barrier =
overlap.GetBarrier(vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
vk::PipelineStageFlagBits2::eAllCommands)) {
post_barriers.push_back(*src_barrier);
}
if (auto dst_barrier = new_buffer.GetBarrier(
vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
vk::PipelineStageFlagBits2::eAllCommands, dst_base_offset)) {
post_barriers.push_back(*dst_barrier);
}
cmdbuf.pipelineBarrier2(vk::DependencyInfo{ cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion, .dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = static_cast<u32>(post_barriers.size()), .bufferMemoryBarrierCount = static_cast<u32>(post_barriers.size()),
@ -626,7 +578,8 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
const auto cmdbuf = scheduler.CommandBuffer(); const auto cmdbuf = scheduler.CommandBuffer();
const vk::BufferMemoryBarrier2 pre_barrier = { const vk::BufferMemoryBarrier2 pre_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead, .srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite |
vk::AccessFlagBits2::eTransferRead | vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer, .dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite, .dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = buffer.Handle(), .buffer = buffer.Handle(),

View File

@ -5,8 +5,6 @@
#include <shared_mutex> #include <shared_mutex>
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include <boost/icl/interval_map.hpp>
#include <tsl/robin_map.h>
#include "common/div_ceil.h" #include "common/div_ceil.h"
#include "common/slot_vector.h" #include "common/slot_vector.h"
#include "common/types.h" #include "common/types.h"
@ -26,6 +24,10 @@ struct FetchShaderData;
struct Info; struct Info;
} // namespace Shader } // namespace Shader
namespace Vulkan {
class GraphicsPipeline;
}
namespace VideoCore { namespace VideoCore {
using BufferId = Common::SlotId; using BufferId = Common::SlotId;
@ -71,16 +73,11 @@ public:
return slot_buffers[id]; return slot_buffers[id];
} }
[[nodiscard]] vk::BufferView& NullBufferView() {
return null_buffer_view;
}
/// Invalidates any buffer in the logical page range. /// Invalidates any buffer in the logical page range.
void InvalidateMemory(VAddr device_addr, u64 size); void InvalidateMemory(VAddr device_addr, u64 size);
/// Binds host vertex buffers for the current draw. /// Binds host vertex buffers for the current draw.
bool BindVertexBuffers(const Shader::Info& vs_info, void BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline);
const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader);
/// Bind host index buffer for the current draw. /// Bind host index buffer for the current draw.
void BindIndexBuffer(u32 index_offset); void BindIndexBuffer(u32 index_offset);
@ -160,7 +157,6 @@ private:
std::shared_mutex mutex; std::shared_mutex mutex;
Common::SlotVector<Buffer> slot_buffers; Common::SlotVector<Buffer> slot_buffers;
RangeSet gpu_modified_ranges; RangeSet gpu_modified_ranges;
vk::BufferView null_buffer_view;
MemoryTracker memory_tracker; MemoryTracker memory_tracker;
PageTable page_table; PageTable page_table;
}; };

View File

@ -447,7 +447,7 @@ static constexpr vk::FormatFeatureFlags2 GetNumberFormatFeatureFlags(
case AmdGpu::NumberFormat::Srgb: case AmdGpu::NumberFormat::Srgb:
return ImageRead | Mrt; return ImageRead | Mrt;
case AmdGpu::NumberFormat::Ubnorm: case AmdGpu::NumberFormat::Ubnorm:
case AmdGpu::NumberFormat::UbnromNz: case AmdGpu::NumberFormat::UbnormNz:
case AmdGpu::NumberFormat::Ubint: case AmdGpu::NumberFormat::Ubint:
case AmdGpu::NumberFormat::Ubscaled: case AmdGpu::NumberFormat::Ubscaled:
return ImageRead; return ImageRead;
@ -468,6 +468,7 @@ static constexpr SurfaceFormatInfo CreateSurfaceFormatInfo(const AmdGpu::DataFor
} }
std::span<const SurfaceFormatInfo> SurfaceFormats() { std::span<const SurfaceFormatInfo> SurfaceFormats() {
// Uscaled, Sscaled, and Ubnorm formats are automatically remapped and handled in shader.
static constexpr std::array formats{ static constexpr std::array formats{
// Invalid // Invalid
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Unorm, CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Unorm,
@ -490,7 +491,7 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
vk::Format::eUndefined), vk::Format::eUndefined),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Ubnorm, CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Ubnorm,
vk::Format::eUndefined), vk::Format::eUndefined),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::UbnromNz, CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::UbnormNz,
vk::Format::eUndefined), vk::Format::eUndefined),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Ubint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Ubint,
vk::Format::eUndefined), vk::Format::eUndefined),
@ -501,10 +502,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
vk::Format::eR8Unorm), vk::Format::eR8Unorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Snorm, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Snorm,
vk::Format::eR8Snorm), vk::Format::eR8Snorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Uscaled,
vk::Format::eR8Uscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Sscaled,
vk::Format::eR8Sscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Uint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Uint,
vk::Format::eR8Uint), vk::Format::eR8Uint),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Sint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Sint,
@ -516,10 +513,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
vk::Format::eR16Unorm), vk::Format::eR16Unorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Snorm, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Snorm,
vk::Format::eR16Snorm), vk::Format::eR16Snorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Uscaled,
vk::Format::eR16Uscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Sscaled,
vk::Format::eR16Sscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Uint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Uint,
vk::Format::eR16Uint), vk::Format::eR16Uint),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Sint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Sint,
@ -531,10 +524,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
vk::Format::eR8G8Unorm), vk::Format::eR8G8Unorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Snorm, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Snorm,
vk::Format::eR8G8Snorm), vk::Format::eR8G8Snorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Uscaled,
vk::Format::eR8G8Uscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Sscaled,
vk::Format::eR8G8Sscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Uint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Uint,
vk::Format::eR8G8Uint), vk::Format::eR8G8Uint),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Sint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Sint,
@ -553,10 +542,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
vk::Format::eR16G16Unorm), vk::Format::eR16G16Unorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Snorm, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Snorm,
vk::Format::eR16G16Snorm), vk::Format::eR16G16Snorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Uscaled,
vk::Format::eR16G16Uscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Sscaled,
vk::Format::eR16G16Sscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Uint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Uint,
vk::Format::eR16G16Uint), vk::Format::eR16G16Uint),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Sint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Sint,
@ -573,10 +558,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
vk::Format::eA2B10G10R10UnormPack32), vk::Format::eA2B10G10R10UnormPack32),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Snorm, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Snorm,
vk::Format::eA2B10G10R10SnormPack32), vk::Format::eA2B10G10R10SnormPack32),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Uscaled,
vk::Format::eA2B10G10R10UscaledPack32),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Sscaled,
vk::Format::eA2B10G10R10SscaledPack32),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Uint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Uint,
vk::Format::eA2B10G10R10UintPack32), vk::Format::eA2B10G10R10UintPack32),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Sint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Sint,
@ -586,10 +567,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
vk::Format::eR8G8B8A8Unorm), vk::Format::eR8G8B8A8Unorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Snorm, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Snorm,
vk::Format::eR8G8B8A8Snorm), vk::Format::eR8G8B8A8Snorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Uscaled,
vk::Format::eR8G8B8A8Uscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Sscaled,
vk::Format::eR8G8B8A8Sscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Uint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Uint,
vk::Format::eR8G8B8A8Uint), vk::Format::eR8G8B8A8Uint),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Sint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Sint,
@ -608,10 +585,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
vk::Format::eR16G16B16A16Unorm), vk::Format::eR16G16B16A16Unorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Snorm, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Snorm,
vk::Format::eR16G16B16A16Snorm), vk::Format::eR16G16B16A16Snorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16,
AmdGpu::NumberFormat::Uscaled, vk::Format::eR16G16B16A16Uscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16,
AmdGpu::NumberFormat::Sscaled, vk::Format::eR16G16B16A16Sscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Uint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Uint,
vk::Format::eR16G16B16A16Uint), vk::Format::eR16G16B16A16Uint),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Sint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Sint,

View File

@ -18,6 +18,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
: Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache, true}, compute_key{compute_key_} { : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache, true}, compute_key{compute_key_} {
auto& info = stages[int(Shader::LogicalStage::Compute)]; auto& info = stages[int(Shader::LogicalStage::Compute)];
info = &info_; info = &info_;
const auto debug_str = GetDebugString();
const vk::PipelineShaderStageCreateInfo shader_ci = { const vk::PipelineShaderStageCreateInfo shader_ci = {
.stage = vk::ShaderStageFlagBits::eCompute, .stage = vk::ShaderStageFlagBits::eCompute,
@ -58,9 +59,8 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
for (const auto& image : info->images) { for (const auto& image : info->images) {
bindings.push_back({ bindings.push_back({
.binding = binding++, .binding = binding++,
.descriptorType = image.IsStorage(image.GetSharp(*info)) .descriptorType = image.is_written ? vk::DescriptorType::eStorageImage
? vk::DescriptorType::eStorageImage : vk::DescriptorType::eSampledImage,
: vk::DescriptorType::eSampledImage,
.descriptorCount = 1, .descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute, .stageFlags = vk::ShaderStageFlagBits::eCompute,
}); });
@ -89,8 +89,9 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
.bindingCount = static_cast<u32>(bindings.size()), .bindingCount = static_cast<u32>(bindings.size()),
.pBindings = bindings.data(), .pBindings = bindings.data(),
}; };
const auto device = instance.GetDevice();
auto [descriptor_set_result, descriptor_set] = auto [descriptor_set_result, descriptor_set] =
instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci); device.createDescriptorSetLayoutUnique(desc_layout_ci);
ASSERT_MSG(descriptor_set_result == vk::Result::eSuccess, ASSERT_MSG(descriptor_set_result == vk::Result::eSuccess,
"Failed to create compute descriptor set layout: {}", "Failed to create compute descriptor set layout: {}",
vk::to_string(descriptor_set_result)); vk::to_string(descriptor_set_result));
@ -107,6 +108,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
ASSERT_MSG(layout_result == vk::Result::eSuccess, ASSERT_MSG(layout_result == vk::Result::eSuccess,
"Failed to create compute pipeline layout: {}", vk::to_string(layout_result)); "Failed to create compute pipeline layout: {}", vk::to_string(layout_result));
pipeline_layout = std::move(layout); pipeline_layout = std::move(layout);
SetObjectName(device, *pipeline_layout, "Compute PipelineLayout {}", debug_str);
const vk::ComputePipelineCreateInfo compute_pipeline_ci = { const vk::ComputePipelineCreateInfo compute_pipeline_ci = {
.stage = shader_ci, .stage = shader_ci,
@ -117,6 +119,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
ASSERT_MSG(pipeline_result == vk::Result::eSuccess, "Failed to create compute pipeline: {}", ASSERT_MSG(pipeline_result == vk::Result::eSuccess, "Failed to create compute pipeline: {}",
vk::to_string(pipeline_result)); vk::to_string(pipeline_result));
pipeline = std::move(pipe); pipeline = std::move(pipe);
SetObjectName(device, *pipeline, "Compute Pipeline {}", debug_str);
} }
ComputePipeline::~ComputePipeline() = default; ComputePipeline::~ComputePipeline() = default;

View File

@ -8,7 +8,6 @@
#include "common/assert.h" #include "common/assert.h"
#include "common/io_file.h" #include "common/io_file.h"
#include "common/scope_exit.h"
#include "shader_recompiler/backend/spirv/emit_spirv_quad_rect.h" #include "shader_recompiler/backend/spirv/emit_spirv_quad_rect.h"
#include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/runtime_info.h"
@ -16,6 +15,7 @@
#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/texture_cache.h"
@ -36,6 +36,7 @@ GraphicsPipeline::GraphicsPipeline(
const vk::Device device = instance.GetDevice(); const vk::Device device = instance.GetDevice();
std::ranges::copy(infos, stages.begin()); std::ranges::copy(infos, stages.begin());
BuildDescSetLayout(); BuildDescSetLayout();
const auto debug_str = GetDebugString();
const vk::PushConstantRange push_constants = { const vk::PushConstantRange push_constants = {
.stageFlags = gp_stage_flags, .stageFlags = gp_stage_flags,
@ -54,36 +55,13 @@ GraphicsPipeline::GraphicsPipeline(
ASSERT_MSG(layout_result == vk::Result::eSuccess, ASSERT_MSG(layout_result == vk::Result::eSuccess,
"Failed to create graphics pipeline layout: {}", vk::to_string(layout_result)); "Failed to create graphics pipeline layout: {}", vk::to_string(layout_result));
pipeline_layout = std::move(layout); pipeline_layout = std::move(layout);
SetObjectName(device, *pipeline_layout, "Graphics PipelineLayout {}", debug_str);
boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings; VertexInputs<vk::VertexInputAttributeDescription> vertex_attributes;
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes; VertexInputs<vk::VertexInputBindingDescription> vertex_bindings;
if (fetch_shader && !instance.IsVertexInputDynamicState()) { VertexInputs<AmdGpu::Buffer> guest_buffers;
const auto& vs_info = GetStage(Shader::LogicalStage::Vertex); if (!instance.IsVertexInputDynamicState()) {
for (const auto& attrib : fetch_shader->attributes) { GetVertexInputs(vertex_attributes, vertex_bindings, guest_buffers);
if (attrib.UsesStepRates()) {
// Skip attribute binding as the data will be pulled by shader
continue;
}
const auto buffer = attrib.GetSharp(vs_info);
if (buffer.GetSize() == 0) {
continue;
}
vertex_attributes.push_back({
.location = attrib.semantic,
.binding = attrib.semantic,
.format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
.offset = 0,
});
vertex_bindings.push_back({
.binding = attrib.semantic,
.stride = buffer.GetStride(),
.inputRate =
attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None
? vk::VertexInputRate::eVertex
: vk::VertexInputRate::eInstance,
});
}
} }
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
@ -159,7 +137,7 @@ GraphicsPipeline::GraphicsPipeline(
} }
if (instance.IsVertexInputDynamicState()) { if (instance.IsVertexInputDynamicState()) {
dynamic_states.push_back(vk::DynamicState::eVertexInputEXT); dynamic_states.push_back(vk::DynamicState::eVertexInputEXT);
} else { } else if (!vertex_bindings.empty()) {
dynamic_states.push_back(vk::DynamicState::eVertexInputBindingStrideEXT); dynamic_states.push_back(vk::DynamicState::eVertexInputBindingStrideEXT);
} }
@ -322,10 +300,56 @@ GraphicsPipeline::GraphicsPipeline(
ASSERT_MSG(pipeline_result == vk::Result::eSuccess, "Failed to create graphics pipeline: {}", ASSERT_MSG(pipeline_result == vk::Result::eSuccess, "Failed to create graphics pipeline: {}",
vk::to_string(pipeline_result)); vk::to_string(pipeline_result));
pipeline = std::move(pipe); pipeline = std::move(pipe);
SetObjectName(device, *pipeline, "Graphics Pipeline {}", debug_str);
} }
GraphicsPipeline::~GraphicsPipeline() = default; GraphicsPipeline::~GraphicsPipeline() = default;
template <typename Attribute, typename Binding>
void GraphicsPipeline::GetVertexInputs(VertexInputs<Attribute>& attributes,
VertexInputs<Binding>& bindings,
VertexInputs<AmdGpu::Buffer>& guest_buffers) const {
if (!fetch_shader || fetch_shader->attributes.empty()) {
return;
}
const auto& vs_info = GetStage(Shader::LogicalStage::Vertex);
for (const auto& attrib : fetch_shader->attributes) {
if (attrib.UsesStepRates()) {
// Skip attribute binding as the data will be pulled by shader.
continue;
}
const auto& buffer = attrib.GetSharp(vs_info);
attributes.push_back(Attribute{
.location = attrib.semantic,
.binding = attrib.semantic,
.format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
.offset = 0,
});
bindings.push_back(Binding{
.binding = attrib.semantic,
.stride = buffer.GetStride(),
.inputRate = attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None
? vk::VertexInputRate::eVertex
: vk::VertexInputRate::eInstance,
});
if constexpr (std::is_same_v<Binding, vk::VertexInputBindingDescription2EXT>) {
bindings.back().divisor = 1;
}
guest_buffers.emplace_back(buffer);
}
}
// Declare templated GetVertexInputs for necessary types.
template void GraphicsPipeline::GetVertexInputs(
VertexInputs<vk::VertexInputAttributeDescription>& attributes,
VertexInputs<vk::VertexInputBindingDescription>& bindings,
VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
template void GraphicsPipeline::GetVertexInputs(
VertexInputs<vk::VertexInputAttributeDescription2EXT>& attributes,
VertexInputs<vk::VertexInputBindingDescription2EXT>& bindings,
VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
void GraphicsPipeline::BuildDescSetLayout() { void GraphicsPipeline::BuildDescSetLayout() {
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings; boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
u32 binding{}; u32 binding{};
@ -364,9 +388,8 @@ void GraphicsPipeline::BuildDescSetLayout() {
for (const auto& image : stage->images) { for (const auto& image : stage->images) {
bindings.push_back({ bindings.push_back({
.binding = binding++, .binding = binding++,
.descriptorType = image.IsStorage(image.GetSharp(*stage)) .descriptorType = image.is_written ? vk::DescriptorType::eStorageImage
? vk::DescriptorType::eStorageImage : vk::DescriptorType::eSampledImage,
: vk::DescriptorType::eSampledImage,
.descriptorCount = 1, .descriptorCount = 1,
.stageFlags = gp_stage_flags, .stageFlags = gp_stage_flags,
}); });

View File

@ -3,6 +3,7 @@
#pragma once #pragma once
#include <boost/container/static_vector.hpp>
#include <xxhash.h> #include <xxhash.h>
#include "common/types.h" #include "common/types.h"
@ -27,11 +28,15 @@ class DescriptorHeap;
using Liverpool = AmdGpu::Liverpool; using Liverpool = AmdGpu::Liverpool;
template <typename T>
using VertexInputs = boost::container::static_vector<T, MaxVertexBufferCount>;
struct GraphicsPipelineKey { struct GraphicsPipelineKey {
std::array<size_t, MaxShaderStages> stage_hashes; std::array<size_t, MaxShaderStages> stage_hashes;
u32 num_color_attachments; u32 num_color_attachments;
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats; std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
std::array<AmdGpu::NumberFormat, Liverpool::NumColorBuffers> color_num_formats; std::array<AmdGpu::NumberFormat, Liverpool::NumColorBuffers> color_num_formats;
std::array<AmdGpu::NumberConversion, Liverpool::NumColorBuffers> color_num_conversions;
std::array<AmdGpu::CompMapping, Liverpool::NumColorBuffers> color_swizzles; std::array<AmdGpu::CompMapping, Liverpool::NumColorBuffers> color_swizzles;
vk::Format depth_format; vk::Format depth_format;
vk::Format stencil_format; vk::Format stencil_format;
@ -99,6 +104,11 @@ public:
key.prim_type == AmdGpu::PrimitiveType::QuadList; key.prim_type == AmdGpu::PrimitiveType::QuadList;
} }
/// Gets the attributes and bindings for vertex inputs.
template <typename Attribute, typename Binding>
void GetVertexInputs(VertexInputs<Attribute>& attributes, VertexInputs<Binding>& bindings,
VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
private: private:
void BuildDescSetLayout(); void BuildDescSetLayout();

View File

@ -271,6 +271,7 @@ bool Instance::CreateDevice() {
maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME); maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME); legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME);
image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME); image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME);
amd_gcn_shader = add_extension(VK_AMD_GCN_SHADER_EXTENSION_NAME);
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2 // These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
// with extensions. // with extensions.

View File

@ -159,6 +159,11 @@ public:
return image_load_store_lod; return image_load_store_lod;
} }
/// Returns true when VK_AMD_gcn_shader is supported.
bool IsAmdGcnShaderSupported() const {
return amd_gcn_shader;
}
/// Returns true when geometry shaders are supported by the device /// Returns true when geometry shaders are supported by the device
bool IsGeometryStageSupported() const { bool IsGeometryStageSupported() const {
return features.geometryShader; return features.geometryShader;
@ -334,6 +339,7 @@ private:
bool list_restart{}; bool list_restart{};
bool legacy_vertex_attributes{}; bool legacy_vertex_attributes{};
bool image_load_store_lod{}; bool image_load_store_lod{};
bool amd_gcn_shader{};
u64 min_imported_host_pointer_alignment{}; u64 min_imported_host_pointer_alignment{};
u32 subgroup_size{}; u32 subgroup_size{};
bool tooling_info{}; bool tooling_info{};

View File

@ -168,6 +168,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
for (u32 i = 0; i < Shader::MaxColorBuffers; i++) { for (u32 i = 0; i < Shader::MaxColorBuffers; i++) {
info.fs_info.color_buffers[i] = { info.fs_info.color_buffers[i] = {
.num_format = graphics_key.color_num_formats[i], .num_format = graphics_key.color_num_formats[i],
.num_conversion = graphics_key.color_num_conversions[i],
.swizzle = graphics_key.color_swizzles[i], .swizzle = graphics_key.color_swizzles[i],
}; };
} }
@ -203,6 +204,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
.support_explicit_workgroup_layout = true, .support_explicit_workgroup_layout = true,
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(), .support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
.supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(), .supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(),
.supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(),
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() && .needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary || .needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||
@ -302,6 +304,7 @@ bool PipelineCache::RefreshGraphicsKey() {
key.num_color_attachments = 0; key.num_color_attachments = 0;
key.color_formats.fill(vk::Format::eUndefined); key.color_formats.fill(vk::Format::eUndefined);
key.color_num_formats.fill(AmdGpu::NumberFormat::Unorm); key.color_num_formats.fill(AmdGpu::NumberFormat::Unorm);
key.color_num_conversions.fill(AmdGpu::NumberConversion::None);
key.blend_controls.fill({}); key.blend_controls.fill({});
key.write_masks.fill({}); key.write_masks.fill({});
key.color_swizzles.fill({}); key.color_swizzles.fill({});
@ -330,6 +333,7 @@ bool PipelineCache::RefreshGraphicsKey() {
key.color_formats[remapped_cb] = key.color_formats[remapped_cb] =
LiverpoolToVK::SurfaceFormat(col_buf.GetDataFmt(), col_buf.GetNumberFmt()); LiverpoolToVK::SurfaceFormat(col_buf.GetDataFmt(), col_buf.GetNumberFmt());
key.color_num_formats[remapped_cb] = col_buf.GetNumberFmt(); key.color_num_formats[remapped_cb] = col_buf.GetNumberFmt();
key.color_num_conversions[remapped_cb] = col_buf.GetNumberConversion();
key.color_swizzles[remapped_cb] = col_buf.Swizzle(); key.color_swizzles[remapped_cb] = col_buf.Swizzle();
} }
@ -416,17 +420,17 @@ bool PipelineCache::RefreshGraphicsKey() {
} }
} }
const auto vs_info = infos[static_cast<u32>(Shader::LogicalStage::Vertex)]; const auto* vs_info = infos[static_cast<u32>(Shader::LogicalStage::Vertex)];
if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) { if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) {
// Without vertex input dynamic state, the pipeline needs to specialize on format.
// Stride will still be handled outside the pipeline using dynamic state.
u32 vertex_binding = 0; u32 vertex_binding = 0;
for (const auto& attrib : fetch_shader->attributes) { for (const auto& attrib : fetch_shader->attributes) {
if (attrib.UsesStepRates()) { if (attrib.UsesStepRates()) {
// Skip attribute binding as the data will be pulled by shader.
continue; continue;
} }
const auto& buffer = attrib.GetSharp(*vs_info); const auto& buffer = attrib.GetSharp(*vs_info);
if (buffer.GetSize() == 0) {
continue;
}
ASSERT(vertex_binding < MaxVertexBufferCount); ASSERT(vertex_binding < MaxVertexBufferCount);
key.vertex_buffer_formats[vertex_binding++] = key.vertex_buffer_formats[vertex_binding++] =
Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()); Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt());

View File

@ -6,6 +6,7 @@
#include "shader_recompiler/info.h" #include "shader_recompiler/info.h"
#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_pipeline_common.h" #include "video_core/renderer_vulkan/vk_pipeline_common.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/texture_cache.h"
@ -55,4 +56,19 @@ void Pipeline::BindResources(DescriptorWrites& set_writes, const BufferBarriers&
cmdbuf.bindDescriptorSets(bind_point, *pipeline_layout, 0, desc_set, {}); cmdbuf.bindDescriptorSets(bind_point, *pipeline_layout, 0, desc_set, {});
} }
std::string Pipeline::GetDebugString() const {
std::string stage_desc;
for (const auto& stage : stages) {
if (stage) {
const auto shader_name = PipelineCache::GetShaderName(stage->stage, stage->pgm_hash);
if (stage_desc.empty()) {
stage_desc = shader_name;
} else {
stage_desc = fmt::format("{},{}", stage_desc, shader_name);
}
}
}
return stage_desc;
}
} // namespace Vulkan } // namespace Vulkan

View File

@ -61,6 +61,8 @@ public:
const Shader::PushData& push_data) const; const Shader::PushData& push_data) const;
protected: protected:
[[nodiscard]] std::string GetDebugString() const;
const Instance& instance; const Instance& instance;
Scheduler& scheduler; Scheduler& scheduler;
DescriptorHeap& desc_heap; DescriptorHeap& desc_heap;

View File

@ -248,9 +248,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
return; return;
} }
const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex); buffer_cache.BindVertexBuffers(*pipeline);
const auto& fetch_shader = pipeline->GetFetchShader();
buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
if (is_indexed) { if (is_indexed) {
buffer_cache.BindIndexBuffer(index_offset); buffer_cache.BindIndexBuffer(index_offset);
} }
@ -258,6 +256,8 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
BeginRendering(*pipeline, state); BeginRendering(*pipeline, state);
UpdateDynamicState(*pipeline); UpdateDynamicState(*pipeline);
const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex);
const auto& fetch_shader = pipeline->GetFetchShader();
const auto [vertex_offset, instance_offset] = GetDrawOffsets(regs, vs_info, fetch_shader); const auto [vertex_offset, instance_offset] = GetDrawOffsets(regs, vs_info, fetch_shader);
const auto cmdbuf = scheduler.CommandBuffer(); const auto cmdbuf = scheduler.CommandBuffer();
@ -292,9 +292,7 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
return; return;
} }
const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex); buffer_cache.BindVertexBuffers(*pipeline);
const auto& fetch_shader = pipeline->GetFetchShader();
buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
if (is_indexed) { if (is_indexed) {
buffer_cache.BindIndexBuffer(0); buffer_cache.BindIndexBuffer(0);
} }
@ -537,6 +535,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
} }
// Second pass to re-bind buffers that were updated after binding // Second pass to re-bind buffers that were updated after binding
auto& null_buffer = buffer_cache.GetBuffer(VideoCore::NULL_BUFFER_ID);
for (u32 i = 0; i < buffer_bindings.size(); i++) { for (u32 i = 0; i < buffer_bindings.size(); i++) {
const auto& [buffer_id, vsharp] = buffer_bindings[i]; const auto& [buffer_id, vsharp] = buffer_bindings[i];
const auto& desc = stage.buffers[i]; const auto& desc = stage.buffers[i];
@ -548,7 +547,6 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
} else if (instance.IsNullDescriptorSupported()) { } else if (instance.IsNullDescriptorSupported()) {
buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE); buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE);
} else { } else {
auto& null_buffer = buffer_cache.GetBuffer(VideoCore::NULL_BUFFER_ID);
buffer_infos.emplace_back(null_buffer.Handle(), 0, VK_WHOLE_SIZE); buffer_infos.emplace_back(null_buffer.Handle(), 0, VK_WHOLE_SIZE);
} }
} else { } else {
@ -582,17 +580,19 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
++binding.buffer; ++binding.buffer;
} }
const auto null_buffer_view =
instance.IsNullDescriptorSupported() ? VK_NULL_HANDLE : buffer_cache.NullBufferView();
for (u32 i = 0; i < texbuffer_bindings.size(); i++) { for (u32 i = 0; i < texbuffer_bindings.size(); i++) {
const auto& [buffer_id, vsharp] = texbuffer_bindings[i]; const auto& [buffer_id, vsharp] = texbuffer_bindings[i];
const auto& desc = stage.texture_buffers[i]; const auto& desc = stage.texture_buffers[i];
vk::BufferView& buffer_view = buffer_views.emplace_back(null_buffer_view); // Fallback format for null buffer view; never used in valid buffer case.
const auto data_fmt = vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid
? vsharp.GetDataFmt()
: AmdGpu::DataFormat::Format8;
const u32 fmt_stride = AmdGpu::NumBits(data_fmt) >> 3;
vk::BufferView buffer_view;
if (buffer_id) { if (buffer_id) {
const u32 alignment = instance.TexelBufferMinAlignment(); const u32 alignment = instance.TexelBufferMinAlignment();
const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer( const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(
vsharp.base_address, vsharp.GetSize(), desc.is_written, true, buffer_id); vsharp.base_address, vsharp.GetSize(), desc.is_written, true, buffer_id);
const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3;
const u32 buf_stride = vsharp.GetStride(); const u32 buf_stride = vsharp.GetStride();
ASSERT_MSG(buf_stride % fmt_stride == 0, ASSERT_MSG(buf_stride % fmt_stride == 0,
"Texel buffer stride must match format stride"); "Texel buffer stride must match format stride");
@ -600,9 +600,8 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
const u32 adjust = offset - offset_aligned; const u32 adjust = offset - offset_aligned;
ASSERT(adjust % fmt_stride == 0); ASSERT(adjust % fmt_stride == 0);
push_data.AddTexelOffset(binding.buffer, buf_stride / fmt_stride, adjust / fmt_stride); push_data.AddTexelOffset(binding.buffer, buf_stride / fmt_stride, adjust / fmt_stride);
buffer_view = buffer_view = vk_buffer->View(offset_aligned, vsharp.GetSize() + adjust,
vk_buffer->View(offset_aligned, vsharp.GetSize() + adjust, desc.is_written, desc.is_written, data_fmt, vsharp.GetNumberFmt());
vsharp.GetDataFmt(), vsharp.GetNumberFmt());
if (auto barrier = if (auto barrier =
vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite
: vk::AccessFlagBits2::eShaderRead, : vk::AccessFlagBits2::eShaderRead,
@ -612,6 +611,11 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
if (desc.is_written) { if (desc.is_written) {
texture_cache.InvalidateMemoryFromGPU(vsharp.base_address, vsharp.GetSize()); texture_cache.InvalidateMemoryFromGPU(vsharp.base_address, vsharp.GetSize());
} }
} else if (instance.IsNullDescriptorSupported()) {
buffer_view = VK_NULL_HANDLE;
} else {
buffer_view =
null_buffer.View(0, fmt_stride, desc.is_written, data_fmt, vsharp.GetNumberFmt());
} }
set_writes.push_back({ set_writes.push_back({
@ -621,7 +625,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
.descriptorCount = 1, .descriptorCount = 1,
.descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer .descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer
: vk::DescriptorType::eUniformTexelBuffer, : vk::DescriptorType::eUniformTexelBuffer,
.pTexelBufferView = &buffer_view, .pTexelBufferView = &buffer_views.emplace_back(buffer_view),
}); });
++binding.buffer; ++binding.buffer;
} }
@ -655,7 +659,7 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
if (image->binding.is_bound) { if (image->binding.is_bound) {
// The image is already bound. In case if it is about to be used as storage we need // The image is already bound. In case if it is about to be used as storage we need
// to force general layout on it. // to force general layout on it.
image->binding.force_general |= image_desc.IsStorage(tsharp); image->binding.force_general |= image_desc.is_written;
} }
if (image->binding.is_target) { if (image->binding.is_target) {
// The image is already bound as target. Since we read and output to it need to force // The image is already bound as target. Since we read and output to it need to force

View File

@ -153,7 +153,8 @@ vk::DescriptorSet DescriptorHeap::Commit(vk::DescriptorSetLayout set_layout) {
} }
// The pool has run out. Record current tick and place it in pending list. // The pool has run out. Record current tick and place it in pending list.
ASSERT_MSG(result == vk::Result::eErrorOutOfPoolMemory, ASSERT_MSG(result == vk::Result::eErrorOutOfPoolMemory ||
result == vk::Result::eErrorFragmentedPool,
"Unexpected error during descriptor set allocation {}", vk::to_string(result)); "Unexpected error during descriptor set allocation {}", vk::to_string(result));
pending_pools.emplace_back(curr_pool, master_semaphore->CurrentTick()); pending_pools.emplace_back(curr_pool, master_semaphore->CurrentTick());
if (const auto [pool, tick] = pending_pools.front(); master_semaphore->IsFree(tick)) { if (const auto [pool, tick] = pending_pools.front(); master_semaphore->IsFree(tick)) {

Some files were not shown because too many files have changed in this diff Show More