Merge branch 'shadps4-emu:main' into audio3d

This commit is contained in:
Lizardy 2024-09-08 05:34:13 -04:00 committed by GitHub
commit d3b07f8ac7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
57 changed files with 1910 additions and 492 deletions

View File

@ -25,8 +25,27 @@ jobs:
run: > run: >
sudo apt-get update && sudo apt install libx11-dev libxext-dev libwayland-dev libfuse2 clang build-essential qt6-base-dev qt6-tools-dev sudo apt-get update && sudo apt install libx11-dev libxext-dev libwayland-dev libfuse2 clang build-essential qt6-base-dev qt6-tools-dev
- name: Cache CMake dependency source code
uses: actions/cache@v4
env:
cache-name: ${{ runner.os }}-qt-cache-cmake-dependency-sources
with:
path: |
${{github.workspace}}/build
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
restore-keys: |
${{ env.cache-name }}-
- name: Cache CMake dependency build objects
uses: hendrikmuhs/ccache-action@v1.2.14
env:
cache-name: ${{ runner.os }}-qt-cache-cmake-dependency-builds
with:
append-timestamp: false
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
- name: Configure CMake - name: Configure CMake
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_QT_GUI=ON run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_QT_GUI=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
- name: Build - name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel
@ -44,4 +63,4 @@ jobs:
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: shadps4-linux-qt-${{ steps.vars.outputs.date }}-${{ steps.vars.outputs.shorthash }} name: shadps4-linux-qt-${{ steps.vars.outputs.date }}-${{ steps.vars.outputs.shorthash }}
path: Shadps4-qt.AppImage path: Shadps4-qt.AppImage

View File

@ -25,8 +25,27 @@ jobs:
run: > run: >
sudo apt-get update && sudo apt install libx11-dev libxext-dev libwayland-dev libfuse2 clang build-essential sudo apt-get update && sudo apt install libx11-dev libxext-dev libwayland-dev libfuse2 clang build-essential
- name: Cache CMake dependency source code
uses: actions/cache@v4
env:
cache-name: ${{ runner.os }}-sdl-cache-cmake-dependency-sources
with:
path: |
${{github.workspace}}/build
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
restore-keys: |
${{ env.cache-name }}-
- name: Cache CMake dependency build objects
uses: hendrikmuhs/ccache-action@v1.2.14
env:
cache-name: ${{ runner.os }}-sdl-cache-cmake-dependency-builds
with:
append-timestamp: false
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
- name: Configure CMake - name: Configure CMake
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
- name: Build - name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel

View File

@ -40,8 +40,29 @@ jobs:
arch: clang_64 arch: clang_64
archives: qtbase qttools archives: qtbase qttools
- name: Cache CMake dependency source code
uses: actions/cache@v4
env:
cache-name: ${{ runner.os }}-qt-cache-cmake-dependency-sources
with:
path: |
${{github.workspace}}/build
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
restore-keys: |
${{ env.cache-name }}-
- name: Cache CMake dependency build objects
uses: hendrikmuhs/ccache-action@v1.2.14
env:
cache-name: ${{runner.os}}-qt-cache-cmake-dependency-builds
with:
append-timestamp: false
create-symlink: true
key: ${{env.cache-name}}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
variant: sccache
- name: Configure CMake - name: Configure CMake
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DENABLE_QT_GUI=ON run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DENABLE_QT_GUI=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
- name: Build - name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu) run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu)

View File

@ -31,8 +31,29 @@ jobs:
arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
arch -x86_64 /usr/local/bin/brew install molten-vk arch -x86_64 /usr/local/bin/brew install molten-vk
- name: Cache CMake dependency source code
uses: actions/cache@v4
env:
cache-name: ${{ runner.os }}-sdl-cache-cmake-dependency-sources
with:
path: |
${{github.workspace}}/build
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
restore-keys: |
${{ env.cache-name }}-
- name: Cache CMake dependency build objects
uses: hendrikmuhs/ccache-action@v1.2.14
env:
cache-name: ${{runner.os}}-sdl-cache-cmake-dependency-builds
with:
append-timestamp: false
create-symlink: true
key: ${{env.cache-name}}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
variant: sccache
- name: Configure CMake - name: Configure CMake
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
- name: Build - name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu) run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu)

View File

@ -30,6 +30,17 @@ jobs:
arch: win64_msvc2019_64 arch: win64_msvc2019_64
archives: qtbase qttools archives: qtbase qttools
- name: Cache CMake dependency source code
uses: actions/cache@v4
env:
cache-name: ${{ runner.os }}-qt-cache-cmake-dependency-sources
with:
path: |
${{github.workspace}}/build
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
restore-keys: |
${{ env.cache-name }}-
- name: Configure CMake - name: Configure CMake
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -T ClangCL -DENABLE_QT_GUI=ON run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -T ClangCL -DENABLE_QT_GUI=ON

View File

@ -20,6 +20,17 @@ jobs:
with: with:
submodules: recursive submodules: recursive
- name: Cache CMake dependency source code
uses: actions/cache@v4
env:
cache-name: ${{ runner.os }}-sdl-cache-cmake-dependency-sources
with:
path: |
${{github.workspace}}/build
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
restore-keys: |
${{ env.cache-name }}-
- name: Configure CMake - name: Configure CMake
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -T ClangCL run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -T ClangCL

View File

@ -159,6 +159,20 @@ Open a PR and we'll check it :)
<img src="https://contrib.rocks/image?repo=shadps4-emu/shadPS4&max=15"> <img src="https://contrib.rocks/image?repo=shadps4-emu/shadPS4&max=15">
</a> </a>
# Special Thanks
A few noteworthy teams/projects who've helped us along the way are:
- [**Panda3DS**](https://github.com/wheremyfoodat/Panda3DS): A multiplatform 3DS emulator from our co-author wheremyfoodat. They have been incredibly helpful in understanding and solving problems that came up from natively executing the x64 code of PS4 binaries
- [**fpPS4**](https://github.com/red-prig/fpPS4): The fpPS4 team has assisted massively with understanding some of the more complex parts of the PS4 operating system and libraries, by helping with reverse engineering work and research.
- **yuzu**: Our shader compiler has been designed with yuzu's Hades compiler as a blueprint. This allowed us to focus on the challenges of emulating a modern AMD GPU while having a high-quality optimizing shader compiler implementation as a base.
- [**hydra**](https://github.com/hydra-emu/hydra): A multisystem, multiplatform emulator (chip-8, GB, NES, N64) from Paris.
# Sister Projects # Sister Projects
- [**Panda3DS**](https://github.com/wheremyfoodat/Panda3DS): A multiplatform 3DS emulator from our co-author wheremyfoodat. - [**Panda3DS**](https://github.com/wheremyfoodat/Panda3DS): A multiplatform 3DS emulator from our co-author wheremyfoodat.

View File

@ -315,14 +315,12 @@ static void GenerateBLSI(const ZydisDecodedOperand* operands, Xbyak::CodeGenerat
SaveRegisters(c, {scratch}); SaveRegisters(c, {scratch});
// BLSI sets CF to zero if source is zero, otherwise it sets CF to one. // BLSI sets CF to zero if source is zero, otherwise it sets CF to one.
Xbyak::Label set_carry, clear_carry, end; Xbyak::Label clear_carry, end;
c.mov(scratch, *src); c.mov(scratch, *src);
c.neg(scratch); // NEG, like BLSI, clears CF if the source is zero and sets it otherwise c.neg(scratch); // NEG, like BLSI, clears CF if the source is zero and sets it otherwise
c.jc(set_carry); c.jnc(clear_carry);
c.jmp(clear_carry);
c.L(set_carry);
c.and_(scratch, *src); c.and_(scratch, *src);
c.stc(); // setting/clearing carry needs to happen after the AND because that clears CF c.stc(); // setting/clearing carry needs to happen after the AND because that clears CF
c.jmp(end); c.jmp(end);
@ -345,15 +343,13 @@ static void GenerateBLSMSK(const ZydisDecodedOperand* operands, Xbyak::CodeGener
SaveRegisters(c, {scratch}); SaveRegisters(c, {scratch});
Xbyak::Label set_carry, clear_carry, end; Xbyak::Label clear_carry, end;
// BLSMSK sets CF to zero if source is NOT zero, otherwise it sets CF to one. // BLSMSK sets CF to zero if source is NOT zero, otherwise it sets CF to one.
c.mov(scratch, *src); c.mov(scratch, *src);
c.test(scratch, scratch); c.test(scratch, scratch);
c.jz(set_carry); c.jnz(clear_carry);
c.jmp(clear_carry);
c.L(set_carry);
c.dec(scratch); c.dec(scratch);
c.xor_(scratch, *src); c.xor_(scratch, *src);
c.stc(); c.stc();
@ -378,15 +374,13 @@ static void GenerateBLSR(const ZydisDecodedOperand* operands, Xbyak::CodeGenerat
SaveRegisters(c, {scratch}); SaveRegisters(c, {scratch});
Xbyak::Label set_carry, clear_carry, end; Xbyak::Label clear_carry, end;
// BLSR sets CF to zero if source is NOT zero, otherwise it sets CF to one. // BLSR sets CF to zero if source is NOT zero, otherwise it sets CF to one.
c.mov(scratch, *src); c.mov(scratch, *src);
c.test(scratch, scratch); c.test(scratch, scratch);
c.jz(set_carry); c.jnz(clear_carry);
c.jmp(clear_carry);
c.L(set_carry);
c.dec(scratch); c.dec(scratch);
c.and_(scratch, *src); c.and_(scratch, *src);
c.stc(); c.stc();

View File

@ -8,7 +8,7 @@
namespace Libraries::Kernel { namespace Libraries::Kernel {
int PS4_SYSV_ABI sceKernelIsNeoMode() { int PS4_SYSV_ABI sceKernelIsNeoMode() {
LOG_INFO(Kernel_Sce, "called"); LOG_DEBUG(Kernel_Sce, "called");
return Config::isNeoMode(); return Config::isNeoMode();
} }

View File

@ -78,7 +78,7 @@ int PS4_SYSV_ABI sceKernelCloseEventFlag() {
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceKernelClearEventFlag(OrbisKernelEventFlag ef, u64 bitPattern) { int PS4_SYSV_ABI sceKernelClearEventFlag(OrbisKernelEventFlag ef, u64 bitPattern) {
LOG_INFO(Kernel_Event, "called"); LOG_DEBUG(Kernel_Event, "called");
ef->Clear(bitPattern); ef->Clear(bitPattern);
return ORBIS_OK; return ORBIS_OK;
} }
@ -97,7 +97,7 @@ int PS4_SYSV_ABI sceKernelSetEventFlag(OrbisKernelEventFlag ef, u64 bitPattern)
} }
int PS4_SYSV_ABI sceKernelPollEventFlag(OrbisKernelEventFlag ef, u64 bitPattern, u32 waitMode, int PS4_SYSV_ABI sceKernelPollEventFlag(OrbisKernelEventFlag ef, u64 bitPattern, u32 waitMode,
u64* pResultPat) { u64* pResultPat) {
LOG_INFO(Kernel_Event, "called bitPattern = {:#x} waitMode = {:#x}", bitPattern, waitMode); LOG_DEBUG(Kernel_Event, "called bitPattern = {:#x} waitMode = {:#x}", bitPattern, waitMode);
if (ef == nullptr) { if (ef == nullptr) {
return ORBIS_KERNEL_ERROR_ESRCH; return ORBIS_KERNEL_ERROR_ESRCH;

View File

@ -33,6 +33,8 @@ typedef struct {
} OrbisKernelUuid; } OrbisKernelUuid;
int* PS4_SYSV_ABI __Error(); int* PS4_SYSV_ABI __Error();
int PS4_SYSV_ABI sceKernelConvertUtcToLocaltime(time_t time, time_t* local_time,
struct OrbisTimesec* st, unsigned long* dst_sec);
int PS4_SYSV_ABI sceKernelGetCompiledSdkVersion(int* ver); int PS4_SYSV_ABI sceKernelGetCompiledSdkVersion(int* ver);
void LibKernel_Register(Core::Loader::SymbolsResolver* sym); void LibKernel_Register(Core::Loader::SymbolsResolver* sym);

View File

@ -295,7 +295,7 @@ ScePthread PS4_SYSV_ABI scePthreadSelf() {
int PS4_SYSV_ABI scePthreadAttrSetaffinity(ScePthreadAttr* pattr, int PS4_SYSV_ABI scePthreadAttrSetaffinity(ScePthreadAttr* pattr,
const /*SceKernelCpumask*/ u64 mask) { const /*SceKernelCpumask*/ u64 mask) {
LOG_INFO(Kernel_Pthread, "called"); LOG_DEBUG(Kernel_Pthread, "called");
if (pattr == nullptr || *pattr == nullptr) { if (pattr == nullptr || *pattr == nullptr) {
return SCE_KERNEL_ERROR_EINVAL; return SCE_KERNEL_ERROR_EINVAL;
@ -387,7 +387,7 @@ int PS4_SYSV_ABI posix_pthread_attr_setstacksize(ScePthreadAttr* attr, size_t st
} }
int PS4_SYSV_ABI scePthreadSetaffinity(ScePthread thread, const /*SceKernelCpumask*/ u64 mask) { int PS4_SYSV_ABI scePthreadSetaffinity(ScePthread thread, const /*SceKernelCpumask*/ u64 mask) {
LOG_INFO(Kernel_Pthread, "called"); LOG_DEBUG(Kernel_Pthread, "called");
if (thread == nullptr) { if (thread == nullptr) {
return SCE_KERNEL_ERROR_ESRCH; return SCE_KERNEL_ERROR_ESRCH;

View File

@ -3,6 +3,8 @@
#pragma once #pragma once
#include <sys/types.h>
#include "common/types.h" #include "common/types.h"
namespace Core::Loader { namespace Core::Loader {
@ -50,7 +52,10 @@ u64 PS4_SYSV_ABI sceKernelGetProcessTime();
u64 PS4_SYSV_ABI sceKernelGetProcessTimeCounter(); u64 PS4_SYSV_ABI sceKernelGetProcessTimeCounter();
u64 PS4_SYSV_ABI sceKernelGetProcessTimeCounterFrequency(); u64 PS4_SYSV_ABI sceKernelGetProcessTimeCounterFrequency();
u64 PS4_SYSV_ABI sceKernelReadTsc(); u64 PS4_SYSV_ABI sceKernelReadTsc();
int PS4_SYSV_ABI sceKernelClockGettime(s32 clock_id, OrbisKernelTimespec* tp);
s32 PS4_SYSV_ABI sceKernelGettimezone(OrbisKernelTimezone* tz);
int PS4_SYSV_ABI sceKernelConvertLocaltimeToUtc(time_t param_1, int64_t param_2, time_t* seconds,
OrbisKernelTimezone* timezone, int* dst_seconds);
void timeSymbolsRegister(Core::Loader::SymbolsResolver* sym); void timeSymbolsRegister(Core::Loader::SymbolsResolver* sym);
} // namespace Libraries::Kernel } // namespace Libraries::Kernel

File diff suppressed because it is too large Load Diff

View File

@ -11,57 +11,81 @@ class SymbolsResolver;
namespace Libraries::Rtc { namespace Libraries::Rtc {
constexpr int ORBIS_RTC_DAYOFWEEK_SUNDAY = 0;
constexpr int ORBIS_RTC_DAYOFWEEK_MONDAY = 1;
constexpr int ORBIS_RTC_DAYOFWEEK_TUESDAY = 2;
constexpr int ORBIS_RTC_DAYOFWEEK_WEDNESDAY = 3;
constexpr int ORBIS_RTC_DAYOFWEEK_THURSDAY = 4;
constexpr int ORBIS_RTC_DAYOFWEEK_FRIDAY = 5;
constexpr int ORBIS_RTC_DAYOFWEEK_SATURDAY = 6;
constexpr int64_t UNIX_EPOCH_TICKS = 0xdcbffeff2bc000;
constexpr int64_t WIN32_FILETIME_EPOCH_TICKS = 0xb36168b6a58000;
struct OrbisRtcTick { struct OrbisRtcTick {
u64 tick; uint64_t tick;
}; };
int PS4_SYSV_ABI sceRtcCheckValid(); struct OrbisRtcDateTime {
int PS4_SYSV_ABI sceRtcCompareTick(); uint16_t year;
int PS4_SYSV_ABI sceRtcConvertLocalTimeToUtc(); uint16_t month;
int PS4_SYSV_ABI sceRtcConvertUtcToLocalTime(); uint16_t day;
uint16_t hour;
uint16_t minute;
uint16_t second;
uint32_t microsecond;
};
int PS4_SYSV_ABI sceRtcCheckValid(OrbisRtcDateTime* pTime);
int PS4_SYSV_ABI sceRtcCompareTick(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2);
int PS4_SYSV_ABI sceRtcConvertLocalTimeToUtc(OrbisRtcTick* pTickLocal, OrbisRtcTick* pTickUtc);
int PS4_SYSV_ABI sceRtcConvertUtcToLocalTime(OrbisRtcTick* pTickUtc, OrbisRtcTick* pTickLocal);
int PS4_SYSV_ABI sceRtcEnd(); int PS4_SYSV_ABI sceRtcEnd();
int PS4_SYSV_ABI sceRtcFormatRFC2822(); int PS4_SYSV_ABI sceRtcFormatRFC2822(char* pszDateTime, const OrbisRtcTick* pTickUtc, int minutes);
int PS4_SYSV_ABI sceRtcFormatRFC2822LocalTime(); int PS4_SYSV_ABI sceRtcFormatRFC2822LocalTime(char* pszDateTime, const OrbisRtcTick* pTickUtc);
int PS4_SYSV_ABI sceRtcFormatRFC3339(); int PS4_SYSV_ABI sceRtcFormatRFC3339(char* pszDateTime, const OrbisRtcTick* pTickUtc, int minutes);
int PS4_SYSV_ABI sceRtcFormatRFC3339LocalTime(); int PS4_SYSV_ABI sceRtcFormatRFC3339LocalTime(char* pszDateTime, const OrbisRtcTick* pTickUtc);
int PS4_SYSV_ABI sceRtcFormatRFC3339Precise(); int PS4_SYSV_ABI sceRtcFormatRFC3339Precise(char* pszDateTime, const OrbisRtcTick* pTickUtc,
int PS4_SYSV_ABI sceRtcFormatRFC3339PreciseLocalTime(); int minutes);
int PS4_SYSV_ABI sceRtcGetCurrentAdNetworkTick(); int PS4_SYSV_ABI sceRtcFormatRFC3339PreciseLocalTime(char* pszDateTime,
int PS4_SYSV_ABI sceRtcGetCurrentClock(); const OrbisRtcTick* pTickUtc);
int PS4_SYSV_ABI sceRtcGetCurrentClockLocalTime(); int PS4_SYSV_ABI sceRtcGetCurrentAdNetworkTick(OrbisRtcTick* pTick);
int PS4_SYSV_ABI sceRtcGetCurrentDebugNetworkTick(); int PS4_SYSV_ABI sceRtcGetCurrentClock(OrbisRtcDateTime* pTime, int timeZone);
int PS4_SYSV_ABI sceRtcGetCurrentNetworkTick(); int PS4_SYSV_ABI sceRtcGetCurrentClockLocalTime(OrbisRtcDateTime* pTime);
int PS4_SYSV_ABI sceRtcGetCurrentRawNetworkTick(); int PS4_SYSV_ABI sceRtcGetCurrentDebugNetworkTick(OrbisRtcTick* pTick);
int PS4_SYSV_ABI sceRtcGetCurrentNetworkTick(OrbisRtcTick* pTick);
int PS4_SYSV_ABI sceRtcGetCurrentRawNetworkTick(OrbisRtcTick* pTick);
int PS4_SYSV_ABI sceRtcGetCurrentTick(OrbisRtcTick* pTick); int PS4_SYSV_ABI sceRtcGetCurrentTick(OrbisRtcTick* pTick);
int PS4_SYSV_ABI sceRtcGetDayOfWeek(); int PS4_SYSV_ABI sceRtcGetDayOfWeek(int year, int month, int day);
int PS4_SYSV_ABI sceRtcGetDaysInMonth(); int PS4_SYSV_ABI sceRtcGetDaysInMonth(int year, int month);
int PS4_SYSV_ABI sceRtcGetDosTime(); int PS4_SYSV_ABI sceRtcGetDosTime(OrbisRtcDateTime* pTime, unsigned int* dosTime);
int PS4_SYSV_ABI sceRtcGetTick(); int PS4_SYSV_ABI sceRtcGetTick(OrbisRtcDateTime* pTime, OrbisRtcTick* pTick);
int PS4_SYSV_ABI sceRtcGetTickResolution(); unsigned int PS4_SYSV_ABI sceRtcGetTickResolution();
int PS4_SYSV_ABI sceRtcGetTime_t(); int PS4_SYSV_ABI sceRtcGetTime_t(OrbisRtcDateTime* pTime, time_t* llTime);
int PS4_SYSV_ABI sceRtcGetWin32FileTime(); int PS4_SYSV_ABI sceRtcGetWin32FileTime(OrbisRtcDateTime* pTime, uint64_t* ulWin32Time);
int PS4_SYSV_ABI sceRtcInit(); int PS4_SYSV_ABI sceRtcInit();
int PS4_SYSV_ABI sceRtcIsLeapYear(); int PS4_SYSV_ABI sceRtcIsLeapYear(int yearInt);
int PS4_SYSV_ABI sceRtcParseDateTime(); int PS4_SYSV_ABI sceRtcParseDateTime(OrbisRtcTick* pTickUtc, const char* pszDateTime);
int PS4_SYSV_ABI sceRtcParseRFC3339(); int PS4_SYSV_ABI sceRtcParseRFC3339(OrbisRtcTick* pTickUtc, const char* pszDateTime);
int PS4_SYSV_ABI sceRtcSetConf(); int PS4_SYSV_ABI sceRtcSetConf();
int PS4_SYSV_ABI sceRtcSetCurrentAdNetworkTick(); int PS4_SYSV_ABI sceRtcSetCurrentAdNetworkTick(OrbisRtcTick* pTick);
int PS4_SYSV_ABI sceRtcSetCurrentDebugNetworkTick(); int PS4_SYSV_ABI sceRtcSetCurrentDebugNetworkTick(OrbisRtcTick* pTick);
int PS4_SYSV_ABI sceRtcSetCurrentNetworkTick(); int PS4_SYSV_ABI sceRtcSetCurrentNetworkTick(OrbisRtcTick* pTick);
int PS4_SYSV_ABI sceRtcSetCurrentTick(); int PS4_SYSV_ABI sceRtcSetCurrentTick(OrbisRtcTick* pTick);
int PS4_SYSV_ABI sceRtcSetDosTime(); int PS4_SYSV_ABI sceRtcSetDosTime(OrbisRtcDateTime* pTime, u32 dosTime);
int PS4_SYSV_ABI sceRtcSetTick(); int PS4_SYSV_ABI sceRtcSetTick(OrbisRtcDateTime* pTime, OrbisRtcTick* pTick);
int PS4_SYSV_ABI sceRtcSetTime_t(); int PS4_SYSV_ABI sceRtcSetTime_t(OrbisRtcDateTime* pTime, time_t llTime);
int PS4_SYSV_ABI sceRtcSetWin32FileTime(); int PS4_SYSV_ABI sceRtcSetWin32FileTime(OrbisRtcDateTime* pTime, int64_t ulWin32Time);
int PS4_SYSV_ABI sceRtcTickAddDays(); int PS4_SYSV_ABI sceRtcTickAddDays(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd);
int PS4_SYSV_ABI sceRtcTickAddHours(); int PS4_SYSV_ABI sceRtcTickAddHours(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd);
int PS4_SYSV_ABI sceRtcTickAddMicroseconds(); int PS4_SYSV_ABI sceRtcTickAddMicroseconds(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2,
int PS4_SYSV_ABI sceRtcTickAddMinutes(); int64_t lAdd);
int PS4_SYSV_ABI sceRtcTickAddMonths(); int PS4_SYSV_ABI sceRtcTickAddMinutes(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int64_t lAdd);
int PS4_SYSV_ABI sceRtcTickAddSeconds(); int PS4_SYSV_ABI sceRtcTickAddMonths(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd);
int PS4_SYSV_ABI sceRtcTickAddTicks(); int PS4_SYSV_ABI sceRtcTickAddSeconds(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int64_t lAdd);
int PS4_SYSV_ABI sceRtcTickAddWeeks(); int PS4_SYSV_ABI sceRtcTickAddTicks(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int64_t lAdd);
int PS4_SYSV_ABI sceRtcTickAddYears(); int PS4_SYSV_ABI sceRtcTickAddWeeks(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd);
int PS4_SYSV_ABI sceRtcTickAddYears(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd);
void RegisterlibSceRtc(Core::Loader::SymbolsResolver* sym); void RegisterlibSceRtc(Core::Loader::SymbolsResolver* sym);
} // namespace Libraries::Rtc } // namespace Libraries::Rtc

View File

@ -3,6 +3,7 @@
#pragma once #pragma once
constexpr int ORBIS_RTC_ERROR_DATETIME_UNINITIALIZED = 0x7FFEF9FE;
constexpr int ORBIS_RTC_ERROR_INVALID_PARAMETER = 0x80010602; constexpr int ORBIS_RTC_ERROR_INVALID_PARAMETER = 0x80010602;
constexpr int ORBIS_RTC_ERROR_INVALID_TICK_PARAMETER = 0x80010603; constexpr int ORBIS_RTC_ERROR_INVALID_TICK_PARAMETER = 0x80010603;
constexpr int ORBIS_RTC_ERROR_INVALID_DATE_PARAMETER = 0x80010604; constexpr int ORBIS_RTC_ERROR_INVALID_DATE_PARAMETER = 0x80010604;
@ -14,4 +15,18 @@ constexpr int ORBIS_RTC_ERROR_INVALID_DAYS_PARAMETER = 0x80010623;
constexpr int ORBIS_RTC_ERROR_INVALID_HOURS_PARAMETER = 0x80010624; constexpr int ORBIS_RTC_ERROR_INVALID_HOURS_PARAMETER = 0x80010624;
constexpr int ORBIS_RTC_ERROR_INVALID_MINUTES_PARAMETER = 0x80010625; constexpr int ORBIS_RTC_ERROR_INVALID_MINUTES_PARAMETER = 0x80010625;
constexpr int ORBIS_RTC_ERROR_INVALID_SECONDS_PARAMETER = 0x80010626; constexpr int ORBIS_RTC_ERROR_INVALID_SECONDS_PARAMETER = 0x80010626;
constexpr int ORBIS_RTC_ERROR_INVALID_MILLISECONDS_PARAMETER = 0x80010627; constexpr int ORBIS_RTC_ERROR_INVALID_MILLISECONDS_PARAMETER = 0x80010627;
constexpr int ORBIS_RTC_ERROR_NOT_INITIALIZED = 0x80B50001;
constexpr int ORBIS_RTC_ERROR_INVALID_POINTER = 0x80B50002;
constexpr int ORBIS_RTC_ERROR_INVALID_VALUE = 0x80B50003;
constexpr int ORBIS_RTC_ERROR_INVALID_ARG = 0x80B50004;
constexpr int ORBIS_RTC_ERROR_NOT_SUPPORTED = 0x80B50005;
constexpr int ORBIS_RTC_ERROR_NO_CLOCK = 0x80B50006;
constexpr int ORBIS_RTC_ERROR_BAD_PARSE = 0x80B50007;
constexpr int ORBIS_RTC_ERROR_INVALID_YEAR = 0x80B50008;
constexpr int ORBIS_RTC_ERROR_INVALID_MONTH = 0x80B50009;
constexpr int ORBIS_RTC_ERROR_INVALID_DAY = 0x80B5000A;
constexpr int ORBIS_RTC_ERROR_INVALID_HOUR = 0x80B5000B;
constexpr int ORBIS_RTC_ERROR_INVALID_MINUTE = 0x80B5000C;
constexpr int ORBIS_RTC_ERROR_INVALID_SECOND = 0x80B5000D;
constexpr int ORBIS_RTC_ERROR_INVALID_MICROSECOND = 0x80B5000E;

View File

@ -179,15 +179,21 @@ int PS4_SYSV_ABI sceSaveDataDeleteUser() {
int PS4_SYSV_ABI sceSaveDataDirNameSearch(const OrbisSaveDataDirNameSearchCond* cond, int PS4_SYSV_ABI sceSaveDataDirNameSearch(const OrbisSaveDataDirNameSearchCond* cond,
OrbisSaveDataDirNameSearchResult* result) { OrbisSaveDataDirNameSearchResult* result) {
if (cond == nullptr) if (cond == nullptr || result == nullptr)
return ORBIS_SAVE_DATA_ERROR_PARAMETER; return ORBIS_SAVE_DATA_ERROR_PARAMETER;
LOG_INFO(Lib_SaveData, "called"); LOG_INFO(Lib_SaveData, "Number of directories = {}", result->dirNamesNum);
const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) / const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) /
std::to_string(cond->userId) / game_serial; std::to_string(cond->userId) / game_serial;
if (!mount_dir.empty() && std::filesystem::exists(mount_dir)) { if (!mount_dir.empty() && std::filesystem::exists(mount_dir)) {
if (cond->dirName == nullptr || std::string_view(cond->dirName->data) int maxDirNum = result->dirNamesNum; // Games set a maximum of directories to search for
.empty()) { // look for all dirs if no dir is provided. int i = 0;
for (int i = 0; const auto& entry : std::filesystem::directory_iterator(mount_dir)) {
if (cond->dirName == nullptr || std::string_view(cond->dirName->data).empty()) {
// Look for all dirs if no dir is provided.
for (const auto& entry : std::filesystem::directory_iterator(mount_dir)) {
if (i >= maxDirNum)
break;
if (std::filesystem::is_directory(entry.path()) && if (std::filesystem::is_directory(entry.path()) &&
entry.path().filename().string() != "sdmemory") { entry.path().filename().string() != "sdmemory") {
// sceSaveDataDirNameSearch does not search for dataMemory1/2 dirs. // sceSaveDataDirNameSearch does not search for dataMemory1/2 dirs.
@ -199,13 +205,50 @@ int PS4_SYSV_ABI sceSaveDataDirNameSearch(const OrbisSaveDataDirNameSearchCond*
result->setNum = i; result->setNum = i;
} }
} }
} else { // Need a game to test. } else {
LOG_ERROR(Lib_SaveData, "Check Me. sceSaveDataDirNameSearch: dirName = {}", // Game checks for a specific directory.
cond->dirName->data); LOG_INFO(Lib_SaveData, "dirName = {}", cond->dirName->data);
strncpy(result->dirNames[0].data, cond->dirName->data, 32);
result->hitNum = 1; // Games can pass '%' as a wildcard
result->dirNamesNum = 1; // e.g. `SAVELIST%` searches for all folders with names starting with `SAVELIST`
result->setNum = 1; std::string baseName(cond->dirName->data);
u64 wildcardPos = baseName.find('%');
if (wildcardPos != std::string::npos) {
baseName = baseName.substr(0, wildcardPos);
}
for (const auto& entry : std::filesystem::directory_iterator(mount_dir)) {
if (i >= maxDirNum)
break;
if (std::filesystem::is_directory(entry.path())) {
std::string dirName = entry.path().filename().string();
if (wildcardPos != std::string::npos) {
if (dirName.compare(0, baseName.size(), baseName) != 0) {
continue;
}
} else if (wildcardPos == std::string::npos && dirName != cond->dirName->data) {
continue;
}
strncpy(result->dirNames[i].data, cond->dirName->data, 32);
i++;
result->hitNum = i;
result->dirNamesNum = i;
result->setNum = i;
}
}
}
if (result->params != nullptr) {
Common::FS::IOFile file(mount_dir / cond->dirName->data / "param.txt",
Common::FS::FileAccessMode::Read);
if (file.IsOpen()) {
file.ReadRaw<u8>((void*)result->params, sizeof(OrbisSaveDataParam));
file.Close();
}
} }
} else { } else {
result->hitNum = 0; result->hitNum = 0;

View File

@ -195,7 +195,7 @@ void Emulator::Run(const std::filesystem::path& file) {
} }
void Emulator::LoadSystemModules(const std::filesystem::path& file) { void Emulator::LoadSystemModules(const std::filesystem::path& file) {
constexpr std::array<SysModules, 9> ModulesToLoad{ constexpr std::array<SysModules, 10> ModulesToLoad{
{{"libSceNgs2.sprx", &Libraries::Ngs2::RegisterlibSceNgs2}, {{"libSceNgs2.sprx", &Libraries::Ngs2::RegisterlibSceNgs2},
{"libSceFiber.sprx", nullptr}, {"libSceFiber.sprx", nullptr},
{"libSceUlt.sprx", nullptr}, {"libSceUlt.sprx", nullptr},
@ -204,7 +204,8 @@ void Emulator::LoadSystemModules(const std::filesystem::path& file) {
{"libSceLibcInternal.sprx", &Libraries::LibcInternal::RegisterlibSceLibcInternal}, {"libSceLibcInternal.sprx", &Libraries::LibcInternal::RegisterlibSceLibcInternal},
{"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap}, {"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap},
{"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc}, {"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc},
{"libSceJpegEnc.sprx", nullptr}}, {"libSceJpegEnc.sprx", nullptr},
{"libSceFont.sprx", nullptr}},
}; };
std::vector<std::filesystem::path> found_modules; std::vector<std::filesystem::path> found_modules;

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.4 KiB

After

Width:  |  Height:  |  Size: 1.1 KiB

View File

@ -88,6 +88,7 @@ void MainWindow::AddUiWidgets() {
ui->toolBar->addWidget(ui->playButton); ui->toolBar->addWidget(ui->playButton);
ui->toolBar->addWidget(ui->pauseButton); ui->toolBar->addWidget(ui->pauseButton);
ui->toolBar->addWidget(ui->stopButton); ui->toolBar->addWidget(ui->stopButton);
ui->toolBar->addWidget(ui->refreshButton);
ui->toolBar->addWidget(ui->settingsButton); ui->toolBar->addWidget(ui->settingsButton);
ui->toolBar->addWidget(ui->controllerButton); ui->toolBar->addWidget(ui->controllerButton);
QFrame* line = new QFrame(this); QFrame* line = new QFrame(this);
@ -177,6 +178,7 @@ void MainWindow::CreateConnects() {
connect(ui->mw_searchbar, &QLineEdit::textChanged, this, &MainWindow::SearchGameTable); connect(ui->mw_searchbar, &QLineEdit::textChanged, this, &MainWindow::SearchGameTable);
connect(ui->exitAct, &QAction::triggered, this, &QWidget::close); connect(ui->exitAct, &QAction::triggered, this, &QWidget::close);
connect(ui->refreshGameListAct, &QAction::triggered, this, &MainWindow::RefreshGameTable); connect(ui->refreshGameListAct, &QAction::triggered, this, &MainWindow::RefreshGameTable);
connect(ui->refreshButton, &QPushButton::clicked, this, &MainWindow::RefreshGameTable);
connect(ui->showGameListAct, &QAction::triggered, this, &MainWindow::ShowGameList); connect(ui->showGameListAct, &QAction::triggered, this, &MainWindow::ShowGameList);
connect(this, &MainWindow::ExtractionFinished, this, &MainWindow::RefreshGameTable); connect(this, &MainWindow::ExtractionFinished, this, &MainWindow::RefreshGameTable);
@ -852,6 +854,7 @@ void MainWindow::SetUiIcons(bool isWhite) {
ui->playButton->setIcon(RecolorIcon(ui->playButton->icon(), isWhite)); ui->playButton->setIcon(RecolorIcon(ui->playButton->icon(), isWhite));
ui->pauseButton->setIcon(RecolorIcon(ui->pauseButton->icon(), isWhite)); ui->pauseButton->setIcon(RecolorIcon(ui->pauseButton->icon(), isWhite));
ui->stopButton->setIcon(RecolorIcon(ui->stopButton->icon(), isWhite)); ui->stopButton->setIcon(RecolorIcon(ui->stopButton->icon(), isWhite));
ui->refreshButton->setIcon(RecolorIcon(ui->refreshButton->icon(), isWhite));
ui->settingsButton->setIcon(RecolorIcon(ui->settingsButton->icon(), isWhite)); ui->settingsButton->setIcon(RecolorIcon(ui->settingsButton->icon(), isWhite));
ui->controllerButton->setIcon(RecolorIcon(ui->controllerButton->icon(), isWhite)); ui->controllerButton->setIcon(RecolorIcon(ui->controllerButton->icon(), isWhite));
ui->refreshGameListAct->setIcon(RecolorIcon(ui->refreshGameListAct->icon(), isWhite)); ui->refreshGameListAct->setIcon(RecolorIcon(ui->refreshGameListAct->icon(), isWhite));

View File

@ -38,6 +38,7 @@ public:
QPushButton* playButton; QPushButton* playButton;
QPushButton* pauseButton; QPushButton* pauseButton;
QPushButton* stopButton; QPushButton* stopButton;
QPushButton* refreshButton;
QPushButton* settingsButton; QPushButton* settingsButton;
QPushButton* controllerButton; QPushButton* controllerButton;
@ -176,6 +177,10 @@ public:
stopButton->setFlat(true); stopButton->setFlat(true);
stopButton->setIcon(QIcon(":images/stop_icon.png")); stopButton->setIcon(QIcon(":images/stop_icon.png"));
stopButton->setIconSize(QSize(40, 40)); stopButton->setIconSize(QSize(40, 40));
refreshButton = new QPushButton(centralWidget);
refreshButton->setFlat(true);
refreshButton->setIcon(QIcon(":images/refresh_icon.png"));
refreshButton->setIconSize(QSize(32, 32));
settingsButton = new QPushButton(centralWidget); settingsButton = new QPushButton(centralWidget);
settingsButton->setFlat(true); settingsButton->setFlat(true);
settingsButton->setIcon(QIcon(":images/settings_icon.png")); settingsButton->setIcon(QIcon(":images/settings_icon.png"));

View File

@ -194,11 +194,6 @@ void WindowSDL::onKeyPress(const SDL_Event* event) {
ax = Input::GetAxis(-0x80, 0x80, axisvalue); ax = Input::GetAxis(-0x80, 0x80, axisvalue);
break; break;
case SDLK_S: case SDLK_S:
if (event->key.mod == SDL_KMOD_LCTRL) {
// Trigger rdoc capture
VideoCore::TriggerCapture();
break;
}
axis = Input::Axis::LeftY; axis = Input::Axis::LeftY;
if (event->type == SDL_EVENT_KEY_DOWN) { if (event->type == SDL_EVENT_KEY_DOWN) {
axisvalue += 127; axisvalue += 127;
@ -287,6 +282,12 @@ void WindowSDL::onKeyPress(const SDL_Event* event) {
} }
} }
break; break;
case SDLK_F12:
if (event->type == SDL_EVENT_KEY_DOWN) {
// Trigger rdoc capture
VideoCore::TriggerCapture();
}
break;
default: default:
break; break;
} }

View File

@ -327,6 +327,10 @@ void EmitGetVccHi(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction"); UNREACHABLE_MSG("Unreachable instruction");
} }
void EmitGetM0(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetScc(EmitContext& ctx) { void EmitSetScc(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction"); UNREACHABLE_MSG("Unreachable instruction");
} }
@ -351,4 +355,8 @@ void EmitSetVccHi(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction"); UNREACHABLE_MSG("Unreachable instruction");
} }
void EmitSetM0(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View File

@ -152,4 +152,20 @@ Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id co
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicExchange); return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicExchange);
} }
Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) {
auto& buffer = ctx.buffers[binding];
const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value,
ctx.ConstU32(gds_addr));
const auto [scope, semantics]{AtomicArgs(ctx)};
return ctx.OpAtomicIIncrement(ctx.U32[1], ptr, scope, semantics);
}
Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding) {
auto& buffer = ctx.buffers[binding];
const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value,
ctx.ConstU32(gds_addr));
const auto [scope, semantics]{AtomicArgs(ctx)};
return ctx.OpAtomicIDecrement(ctx.U32[1], ptr, scope, semantics);
}
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View File

@ -133,10 +133,6 @@ Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
return ctx.OpLoad(buffer.data_types->Get(1), ptr); return ctx.OpLoad(buffer.data_types->Get(1), ptr);
} }
Id EmitReadConstBufferU32(EmitContext& ctx, u32 handle, Id index) {
return ctx.OpBitcast(ctx.U32[1], EmitReadConstBuffer(ctx, handle, index));
}
Id EmitReadStepRate(EmitContext& ctx, int rate_idx) { Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
return ctx.OpLoad( return ctx.OpLoad(
ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]), ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
@ -222,12 +218,8 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen
ctx.OpStore(pointer, ctx.OpBitcast(ctx.F32[1], value)); ctx.OpStore(pointer, ctx.OpBitcast(ctx.F32[1], value));
} }
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferF32(ctx, inst, handle, address);
}
template <u32 N> template <u32 N>
static Id EmitLoadBufferF32xN(EmitContext& ctx, u32 handle, Id address) { static Id EmitLoadBufferU32xN(EmitContext& ctx, u32 handle, Id address) {
auto& buffer = ctx.buffers[handle]; auto& buffer = ctx.buffers[handle];
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
@ -246,20 +238,20 @@ static Id EmitLoadBufferF32xN(EmitContext& ctx, u32 handle, Id address) {
} }
} }
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
return EmitLoadBufferF32xN<1>(ctx, handle, address); return EmitLoadBufferU32xN<1>(ctx, handle, address);
} }
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
return EmitLoadBufferF32xN<2>(ctx, handle, address); return EmitLoadBufferU32xN<2>(ctx, handle, address);
} }
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
return EmitLoadBufferF32xN<3>(ctx, handle, address); return EmitLoadBufferU32xN<3>(ctx, handle, address);
} }
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
return EmitLoadBufferF32xN<4>(ctx, handle, address); return EmitLoadBufferU32xN<4>(ctx, handle, address);
} }
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
@ -275,7 +267,7 @@ Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addr
} }
template <u32 N> template <u32 N>
static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id value) { static void EmitStoreBufferU32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
auto& buffer = ctx.buffers[handle]; auto& buffer = ctx.buffers[handle];
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
@ -287,29 +279,25 @@ static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id va
const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
const Id ptr = const Id ptr =
ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index_i); ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index_i);
ctx.OpStore(ptr, ctx.OpCompositeExtract(ctx.F32[1], value, i)); ctx.OpStore(ptr, ctx.OpCompositeExtract(buffer.data_types->Get(1), value, i));
} }
} }
} }
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferF32xN<1>(ctx, handle, address, value);
}
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferF32xN<2>(ctx, handle, address, value);
}
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferF32xN<3>(ctx, handle, address, value);
}
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferF32xN<4>(ctx, handle, address, value);
}
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferF32xN<1>(ctx, handle, address, value); EmitStoreBufferU32xN<1>(ctx, handle, address, value);
}
void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferU32xN<2>(ctx, handle, address, value);
}
void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferU32xN<3>(ctx, handle, address, value);
}
void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferU32xN<4>(ctx, handle, address, value);
} }
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {

View File

@ -36,12 +36,14 @@ void EmitGetVcc(EmitContext& ctx);
void EmitGetSccLo(EmitContext& ctx); void EmitGetSccLo(EmitContext& ctx);
void EmitGetVccLo(EmitContext& ctx); void EmitGetVccLo(EmitContext& ctx);
void EmitGetVccHi(EmitContext& ctx); void EmitGetVccHi(EmitContext& ctx);
void EmitGetM0(EmitContext& ctx);
void EmitSetScc(EmitContext& ctx); void EmitSetScc(EmitContext& ctx);
void EmitSetExec(EmitContext& ctx); void EmitSetExec(EmitContext& ctx);
void EmitSetVcc(EmitContext& ctx); void EmitSetVcc(EmitContext& ctx);
void EmitSetSccLo(EmitContext& ctx); void EmitSetSccLo(EmitContext& ctx);
void EmitSetVccLo(EmitContext& ctx); void EmitSetVccLo(EmitContext& ctx);
void EmitSetVccHi(EmitContext& ctx); void EmitSetVccHi(EmitContext& ctx);
void EmitSetM0(EmitContext& ctx);
void EmitFPCmpClass32(EmitContext& ctx); void EmitFPCmpClass32(EmitContext& ctx);
void EmitPrologue(EmitContext& ctx); void EmitPrologue(EmitContext& ctx);
void EmitEpilogue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx);
@ -62,25 +64,16 @@ void EmitGetGotoVariable(EmitContext& ctx);
void EmitSetScc(EmitContext& ctx); void EmitSetScc(EmitContext& ctx);
Id EmitReadConst(EmitContext& ctx); Id EmitReadConst(EmitContext& ctx);
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index); Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index);
Id EmitReadConstBufferU32(EmitContext& ctx, u32 handle, Id index);
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
@ -404,12 +397,13 @@ Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitLaneId(EmitContext& ctx); Id EmitLaneId(EmitContext& ctx);
Id EmitWarpId(EmitContext& ctx); Id EmitWarpId(EmitContext& ctx);
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index); Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);
Id EmitReadFirstLane(EmitContext& ctx, Id value); Id EmitReadFirstLane(EmitContext& ctx, Id value);
Id EmitReadLane(EmitContext& ctx, Id value, u32 lane); Id EmitReadLane(EmitContext& ctx, Id value, u32 lane);
Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane); Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane);
Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding);
Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding);
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View File

@ -43,6 +43,10 @@ void Translator::EmitDataShare(const GcnInst& inst) {
return DS_MIN_U32(inst, false, true); return DS_MIN_U32(inst, false, true);
case Opcode::DS_MAX_RTN_U32: case Opcode::DS_MAX_RTN_U32:
return DS_MAX_U32(inst, false, true); return DS_MAX_U32(inst, false, true);
case Opcode::DS_APPEND:
return DS_APPEND(inst);
case Opcode::DS_CONSUME:
return DS_CONSUME(inst);
default: default:
LogMissingOpcode(inst); LogMissingOpcode(inst);
} }
@ -192,4 +196,18 @@ void Translator::V_WRITELANE_B32(const GcnInst& inst) {
ir.SetVectorReg(dst, ir.WriteLane(old_value, value, lane)); ir.SetVectorReg(dst, ir.WriteLane(old_value, value, lane));
} }
void Translator::DS_APPEND(const GcnInst& inst) {
const u32 inst_offset = inst.control.ds.offset0;
const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset));
const IR::U32 prev = ir.DataAppend(gds_offset);
SetDst(inst.dst[0], prev);
}
void Translator::DS_CONSUME(const GcnInst& inst) {
const u32 inst_offset = inst.control.ds.offset0;
const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset));
const IR::U32 prev = ir.DataConsume(gds_offset);
SetDst(inst.dst[0], prev);
}
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -31,6 +31,12 @@ void Translator::EmitExport(const GcnInst& inst) {
case MrtSwizzle::Alt: case MrtSwizzle::Alt:
static constexpr std::array<u32, 4> AltSwizzle = {2, 1, 0, 3}; static constexpr std::array<u32, 4> AltSwizzle = {2, 1, 0, 3};
return AltSwizzle[comp]; return AltSwizzle[comp];
case MrtSwizzle::Reverse:
static constexpr std::array<u32, 4> RevSwizzle = {3, 2, 1, 0};
return RevSwizzle[comp];
case MrtSwizzle::ReverseAlt:
static constexpr std::array<u32, 4> AltRevSwizzle = {3, 0, 1, 2};
return AltRevSwizzle[comp];
default: default:
UNREACHABLE(); UNREACHABLE();
} }

View File

@ -73,9 +73,13 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
case Opcode::S_SUB_I32: case Opcode::S_SUB_I32:
return S_SUB_U32(inst); return S_SUB_U32(inst);
case Opcode::S_MIN_U32: case Opcode::S_MIN_U32:
return S_MIN_U32(inst); return S_MIN_U32(false, inst);
case Opcode::S_MIN_I32:
return S_MIN_U32(true, inst);
case Opcode::S_MAX_U32: case Opcode::S_MAX_U32:
return S_MAX_U32(inst); return S_MAX_U32(false, inst);
case Opcode::S_MAX_I32:
return S_MAX_U32(true, inst);
case Opcode::S_WQM_B64: case Opcode::S_WQM_B64:
break; break;
default: default:
@ -533,18 +537,18 @@ void Translator::S_ADDC_U32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), carry)); SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), carry));
} }
void Translator::S_MAX_U32(const GcnInst& inst) { void Translator::S_MAX_U32(bool is_signed, const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])}; const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 result = ir.UMax(src0, src1); const IR::U32 result = ir.IMax(src0, src1, is_signed);
SetDst(inst.dst[0], result); SetDst(inst.dst[0], result);
ir.SetScc(ir.IEqual(result, src0)); ir.SetScc(ir.IEqual(result, src0));
} }
void Translator::S_MIN_U32(const GcnInst& inst) { void Translator::S_MIN_U32(bool is_signed, const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])}; const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 result = ir.UMin(src0, src1); const IR::U32 result = ir.IMin(src0, src1, is_signed);
SetDst(inst.dst[0], result); SetDst(inst.dst[0], result);
ir.SetScc(ir.IEqual(result, src0)); ir.SetScc(ir.IEqual(result, src0));
} }

View File

@ -153,10 +153,11 @@ T Translator::GetSrc(const InstOperand& operand) {
break; break;
case OperandField::M0: case OperandField::M0:
if constexpr (is_float) { if constexpr (is_float) {
UNREACHABLE(); value = ir.BitCast<IR::F32>(ir.GetM0());
} else { } else {
return m0_value; value = ir.GetM0();
} }
break;
default: default:
UNREACHABLE(); UNREACHABLE();
} }
@ -296,8 +297,7 @@ void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
case OperandField::VccHi: case OperandField::VccHi:
return ir.SetVccHi(result); return ir.SetVccHi(result);
case OperandField::M0: case OperandField::M0:
m0_value = result; return ir.SetM0(result);
break;
default: default:
UNREACHABLE(); UNREACHABLE();
} }

View File

@ -101,8 +101,8 @@ public:
void S_ADDC_U32(const GcnInst& inst); void S_ADDC_U32(const GcnInst& inst);
void S_MULK_I32(const GcnInst& inst); void S_MULK_I32(const GcnInst& inst);
void S_ADDK_I32(const GcnInst& inst); void S_ADDK_I32(const GcnInst& inst);
void S_MAX_U32(const GcnInst& inst); void S_MAX_U32(bool is_signed, const GcnInst& inst);
void S_MIN_U32(const GcnInst& inst); void S_MIN_U32(bool is_signed, const GcnInst& inst);
void S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst); void S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst);
// Scalar Memory // Scalar Memory
@ -173,7 +173,7 @@ public:
void V_BCNT_U32_B32(const GcnInst& inst); void V_BCNT_U32_B32(const GcnInst& inst);
void V_COS_F32(const GcnInst& inst); void V_COS_F32(const GcnInst& inst);
void V_MAX3_F32(const GcnInst& inst); void V_MAX3_F32(const GcnInst& inst);
void V_MAX3_U32(const GcnInst& inst); void V_MAX3_U32(bool is_signed, const GcnInst& inst);
void V_CVT_I32_F32(const GcnInst& inst); void V_CVT_I32_F32(const GcnInst& inst);
void V_MIN_I32(const GcnInst& inst); void V_MIN_I32(const GcnInst& inst);
void V_MUL_LO_U32(const GcnInst& inst); void V_MUL_LO_U32(const GcnInst& inst);
@ -192,6 +192,9 @@ public:
void V_MBCNT_U32_B32(bool is_low, const GcnInst& inst); void V_MBCNT_U32_B32(bool is_low, const GcnInst& inst);
void V_BFM_B32(const GcnInst& inst); void V_BFM_B32(const GcnInst& inst);
void V_FFBH_U32(const GcnInst& inst); void V_FFBH_U32(const GcnInst& inst);
void V_MOVRELS_B32(const GcnInst& inst);
void V_MOVRELD_B32(const GcnInst& inst);
void V_MOVRELSD_B32(const GcnInst& inst);
// Vector Memory // Vector Memory
void BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst); void BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst);
@ -214,6 +217,8 @@ public:
void V_READFIRSTLANE_B32(const GcnInst& inst); void V_READFIRSTLANE_B32(const GcnInst& inst);
void V_READLANE_B32(const GcnInst& inst); void V_READLANE_B32(const GcnInst& inst);
void V_WRITELANE_B32(const GcnInst& inst); void V_WRITELANE_B32(const GcnInst& inst);
void DS_APPEND(const GcnInst& inst);
void DS_CONSUME(const GcnInst& inst);
void S_BARRIER(); void S_BARRIER();
// MIMG // MIMG
@ -233,6 +238,9 @@ private:
void SetDst(const InstOperand& operand, const IR::U32F32& value); void SetDst(const InstOperand& operand, const IR::U32F32& value);
void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw); void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw);
IR::U32 VMovRelSHelper(u32 src_vgprno, const IR::U32 m0);
void VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0);
void LogMissingOpcode(const GcnInst& inst); void LogMissingOpcode(const GcnInst& inst);
private: private:
@ -240,7 +248,6 @@ private:
Info& info; Info& info;
const RuntimeInfo& runtime_info; const RuntimeInfo& runtime_info;
const Profile& profile; const Profile& profile;
IR::U32 m0_value;
bool opcode_missing = false; bool opcode_missing = false;
}; };

View File

@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/opcodes.h"
#include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/frontend/translate/translate.h"
namespace Shader::Gcn { namespace Shader::Gcn {
@ -226,7 +227,9 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
case Opcode::V_MAX3_F32: case Opcode::V_MAX3_F32:
return V_MAX3_F32(inst); return V_MAX3_F32(inst);
case Opcode::V_MAX3_U32: case Opcode::V_MAX3_U32:
return V_MAX3_U32(inst); return V_MAX3_U32(false, inst);
case Opcode::V_MAX3_I32:
return V_MAX_U32(true, inst);
case Opcode::V_TRUNC_F32: case Opcode::V_TRUNC_F32:
return V_TRUNC_F32(inst); return V_TRUNC_F32(inst);
case Opcode::V_CEIL_F32: case Opcode::V_CEIL_F32:
@ -309,6 +312,12 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
return V_MBCNT_U32_B32(true, inst); return V_MBCNT_U32_B32(true, inst);
case Opcode::V_MBCNT_HI_U32_B32: case Opcode::V_MBCNT_HI_U32_B32:
return V_MBCNT_U32_B32(false, inst); return V_MBCNT_U32_B32(false, inst);
case Opcode::V_MOVRELS_B32:
return V_MOVRELS_B32(inst);
case Opcode::V_MOVRELD_B32:
return V_MOVRELD_B32(inst);
case Opcode::V_MOVRELSD_B32:
return V_MOVRELSD_B32(inst);
case Opcode::V_NOP: case Opcode::V_NOP:
return; return;
@ -824,11 +833,11 @@ void Translator::V_MAX3_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2))); SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2)));
} }
void Translator::V_MAX3_U32(const GcnInst& inst) { void Translator::V_MAX3_U32(bool is_signed, const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])}; const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 src2{GetSrc(inst.src[2])}; const IR::U32 src2{GetSrc(inst.src[2])};
SetDst(inst.dst[0], ir.UMax(src0, ir.UMax(src1, src2))); SetDst(inst.dst[0], ir.IMax(src0, ir.IMax(src1, src2, is_signed), is_signed));
} }
void Translator::V_CVT_I32_F32(const GcnInst& inst) { void Translator::V_CVT_I32_F32(const GcnInst& inst) {
@ -960,14 +969,29 @@ void Translator::V_FFBL_B32(const GcnInst& inst) {
} }
void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) { void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
if (!is_low) { if (!is_low) {
ASSERT(src0.IsImmediate() && src0.U32() == ~0U && src1.IsImmediate() && src1.U32() == 0U); // v_mbcnt_hi_u32_b32 v2, -1, 0
return; if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193 &&
inst.src[1].field == OperandField::ConstZero) {
return;
}
// v_mbcnt_hi_u32_b32 vX, exec_hi, 0
if (inst.src[0].field == OperandField::ExecHi &&
inst.src[1].field == OperandField::ConstZero) {
return;
}
} else {
// v_mbcnt_lo_u32_b32 v2, -1, vX
// used combined with above to fetch lane id in non-compute stages
if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193) {
SetDst(inst.dst[0], ir.LaneId());
}
// v_mbcnt_lo_u32_b32 v20, exec_lo, vX
// used combined in above for append buffer indexing.
if (inst.src[0].field == OperandField::ExecLo) {
SetDst(inst.dst[0], ir.Imm32(0));
}
} }
ASSERT(src0.IsImmediate() && src0.U32() == ~0U);
SetDst(inst.dst[0], ir.LaneId());
} }
void Translator::V_BFM_B32(const GcnInst& inst) { void Translator::V_BFM_B32(const GcnInst& inst) {
@ -990,4 +1014,52 @@ void Translator::V_FFBH_U32(const GcnInst& inst) {
SetDst(inst.dst[0], IR::U32{ir.Select(cond, pos_from_left, ir.Imm32(~0U))}); SetDst(inst.dst[0], IR::U32{ir.Select(cond, pos_from_left, ir.Imm32(~0U))});
} }
// TODO: add range analysis pass to hopefully put an upper bound on m0, and only select one of
// [src_vgprno, src_vgprno + max_m0]. Same for dst regs we may write back to
IR::U32 Translator::VMovRelSHelper(u32 src_vgprno, const IR::U32 m0) {
// Read from VGPR0 by default when src_vgprno + m0 > num_allocated_vgprs
IR::U32 src_val = ir.GetVectorReg<IR::U32>(IR::VectorReg::V0);
for (u32 i = src_vgprno; i < runtime_info.num_allocated_vgprs; i++) {
const IR::U1 cond = ir.IEqual(m0, ir.Imm32(i - src_vgprno));
src_val =
IR::U32{ir.Select(cond, ir.GetVectorReg<IR::U32>(IR::VectorReg::V0 + i), src_val)};
}
return src_val;
}
void Translator::VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0) {
for (u32 i = dst_vgprno; i < runtime_info.num_allocated_vgprs; i++) {
const IR::U1 cond = ir.IEqual(m0, ir.Imm32(i - dst_vgprno));
const IR::U32 dst_val =
IR::U32{ir.Select(cond, src_val, ir.GetVectorReg<IR::U32>(IR::VectorReg::V0 + i))};
ir.SetVectorReg(IR::VectorReg::V0 + i, dst_val);
}
}
void Translator::V_MOVRELS_B32(const GcnInst& inst) {
u32 src_vgprno = inst.src[0].code - static_cast<u32>(IR::VectorReg::V0);
const IR::U32 m0 = ir.GetM0();
const IR::U32 src_val = VMovRelSHelper(src_vgprno, m0);
SetDst(inst.dst[0], src_val);
}
void Translator::V_MOVRELD_B32(const GcnInst& inst) {
const IR::U32 src_val{GetSrc(inst.src[0])};
u32 dst_vgprno = inst.dst[0].code - static_cast<u32>(IR::VectorReg::V0);
IR::U32 m0 = ir.GetM0();
VMovRelDHelper(dst_vgprno, src_val, m0);
}
void Translator::V_MOVRELSD_B32(const GcnInst& inst) {
u32 src_vgprno = inst.src[0].code - static_cast<u32>(IR::VectorReg::V0);
u32 dst_vgprno = inst.dst[0].code - static_cast<u32>(IR::VectorReg::V0);
IR::U32 m0 = ir.GetM0();
const IR::U32 src_val = VMovRelSHelper(src_vgprno, m0);
VMovRelDHelper(dst_vgprno, src_val, m0);
}
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -18,9 +18,11 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
case Opcode::IMAGE_SAMPLE_B: case Opcode::IMAGE_SAMPLE_B:
case Opcode::IMAGE_SAMPLE_C_LZ_O: case Opcode::IMAGE_SAMPLE_C_LZ_O:
case Opcode::IMAGE_SAMPLE_D: case Opcode::IMAGE_SAMPLE_D:
case Opcode::IMAGE_SAMPLE_CD:
return IMAGE_SAMPLE(inst); return IMAGE_SAMPLE(inst);
case Opcode::IMAGE_GATHER4_C:
case Opcode::IMAGE_GATHER4_LZ: case Opcode::IMAGE_GATHER4_LZ:
case Opcode::IMAGE_GATHER4_C:
case Opcode::IMAGE_GATHER4_C_LZ:
case Opcode::IMAGE_GATHER4_LZ_O: case Opcode::IMAGE_GATHER4_LZ_O:
return IMAGE_GATHER(inst); return IMAGE_GATHER(inst);
case Opcode::IMAGE_ATOMIC_ADD: case Opcode::IMAGE_ATOMIC_ADD:
@ -98,6 +100,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
return BUFFER_STORE(2, true, inst); return BUFFER_STORE(2, true, inst);
case Opcode::TBUFFER_STORE_FORMAT_XYZ: case Opcode::TBUFFER_STORE_FORMAT_XYZ:
return BUFFER_STORE(3, true, inst); return BUFFER_STORE(3, true, inst);
case Opcode::TBUFFER_STORE_FORMAT_XYZW:
return BUFFER_STORE(4, true, inst);
case Opcode::BUFFER_STORE_DWORD: case Opcode::BUFFER_STORE_DWORD:
return BUFFER_STORE(1, false, inst); return BUFFER_STORE(1, false, inst);
@ -143,10 +147,6 @@ void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) {
void Translator::IMAGE_SAMPLE(const GcnInst& inst) { void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
const auto& mimg = inst.control.mimg; const auto& mimg = inst.control.mimg;
if (mimg.da) {
LOG_WARNING(Render_Vulkan, "Image instruction declares an array");
}
IR::VectorReg addr_reg{inst.src[0].code}; IR::VectorReg addr_reg{inst.src[0].code};
IR::VectorReg dest_reg{inst.dst[0].code}; IR::VectorReg dest_reg{inst.dst[0].code};
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4}; const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
@ -384,11 +384,11 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst)
const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info); const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info);
const IR::VectorReg dst_reg{inst.src[1].code}; const IR::VectorReg dst_reg{inst.src[1].code};
if (num_dwords == 1) { if (num_dwords == 1) {
ir.SetVectorReg(dst_reg, IR::F32{value}); ir.SetVectorReg(dst_reg, IR::U32{value});
return; return;
} }
for (u32 i = 0; i < num_dwords; i++) { for (u32 i = 0; i < num_dwords; i++) {
ir.SetVectorReg(dst_reg + i, IR::F32{ir.CompositeExtract(value, i)}); ir.SetVectorReg(dst_reg + i, IR::U32{ir.CompositeExtract(value, i)});
} }
} }
@ -452,21 +452,18 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
const IR::VectorReg src_reg{inst.src[1].code}; const IR::VectorReg src_reg{inst.src[1].code};
switch (num_dwords) { switch (num_dwords) {
case 1: case 1:
value = ir.GetVectorReg<IR::F32>(src_reg); value = ir.GetVectorReg(src_reg);
break; break;
case 2: case 2:
value = ir.CompositeConstruct(ir.GetVectorReg<IR::F32>(src_reg), value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1));
ir.GetVectorReg<IR::F32>(src_reg + 1));
break; break;
case 3: case 3:
value = ir.CompositeConstruct(ir.GetVectorReg<IR::F32>(src_reg), value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1),
ir.GetVectorReg<IR::F32>(src_reg + 1), ir.GetVectorReg(src_reg + 2));
ir.GetVectorReg<IR::F32>(src_reg + 2));
break; break;
case 4: case 4:
value = ir.CompositeConstruct( value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1),
ir.GetVectorReg<IR::F32>(src_reg), ir.GetVectorReg<IR::F32>(src_reg + 1), ir.GetVectorReg(src_reg + 2), ir.GetVectorReg(src_reg + 3));
ir.GetVectorReg<IR::F32>(src_reg + 2), ir.GetVectorReg<IR::F32>(src_reg + 3));
break; break;
} }
const IR::Value handle = const IR::Value handle =
@ -514,6 +511,15 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
const IR::VectorReg vaddr{inst.src[0].code}; const IR::VectorReg vaddr{inst.src[0].code};
const IR::VectorReg vdata{inst.src[1].code}; const IR::VectorReg vdata{inst.src[1].code};
const IR::ScalarReg srsrc{inst.src[2].code * 4}; const IR::ScalarReg srsrc{inst.src[2].code * 4};
const IR::Value address = [&] -> IR::Value {
if (mubuf.idxen && mubuf.offen) {
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
}
if (mubuf.idxen || mubuf.offen) {
return ir.GetVectorReg(vaddr);
}
return {};
}();
const IR::U32 soffset{GetSrc(inst.src[3])}; const IR::U32 soffset{GetSrc(inst.src[3])};
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
@ -523,7 +529,6 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
info.offset_enable.Assign(mubuf.offen); info.offset_enable.Assign(mubuf.offen);
IR::Value vdata_val = ir.GetVectorReg<Shader::IR::U32>(vdata); IR::Value vdata_val = ir.GetVectorReg<Shader::IR::U32>(vdata);
const IR::U32 address = ir.GetVectorReg(vaddr);
const IR::Value handle = const IR::Value handle =
ir.CompositeConstruct(ir.GetScalarReg(srsrc), ir.GetScalarReg(srsrc + 1), ir.CompositeConstruct(ir.GetScalarReg(srsrc), ir.GetScalarReg(srsrc + 1),
ir.GetScalarReg(srsrc + 2), ir.GetScalarReg(srsrc + 3)); ir.GetScalarReg(srsrc + 2), ir.GetScalarReg(srsrc + 3));

View File

@ -37,12 +37,13 @@ struct BufferResource {
u32 dword_offset; u32 dword_offset;
IR::Type used_types; IR::Type used_types;
AmdGpu::Buffer inline_cbuf; AmdGpu::Buffer inline_cbuf;
bool is_gds_buffer{};
bool is_instance_data{}; bool is_instance_data{};
bool is_written{}; bool is_written{};
bool IsStorage(AmdGpu::Buffer buffer) const noexcept { bool IsStorage(AmdGpu::Buffer buffer) const noexcept {
static constexpr size_t MaxUboSize = 65536; static constexpr size_t MaxUboSize = 65536;
return buffer.GetSize() > MaxUboSize || is_written; return buffer.GetSize() > MaxUboSize || is_written || is_gds_buffer;
} }
constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;

View File

@ -147,6 +147,7 @@ public:
/// Intrusively store the value of a register in the block. /// Intrusively store the value of a register in the block.
std::array<Value, NumScalarRegs> ssa_sreg_values; std::array<Value, NumScalarRegs> ssa_sreg_values;
std::array<Value, NumScalarRegs> ssa_sbit_values;
std::array<Value, NumVectorRegs> ssa_vreg_values; std::array<Value, NumVectorRegs> ssa_vreg_values;
bool has_multiple_predecessors{false}; bool has_multiple_predecessors{false};

View File

@ -217,6 +217,10 @@ U32 IREmitter::GetVccHi() {
return Inst<U32>(Opcode::GetVccHi); return Inst<U32>(Opcode::GetVccHi);
} }
U32 IREmitter::GetM0() {
return Inst<U32>(Opcode::GetM0);
}
void IREmitter::SetScc(const U1& value) { void IREmitter::SetScc(const U1& value) {
Inst(Opcode::SetScc, value); Inst(Opcode::SetScc, value);
} }
@ -241,6 +245,10 @@ void IREmitter::SetVccHi(const U32& value) {
Inst(Opcode::SetVccHi, value); Inst(Opcode::SetVccHi, value);
} }
void IREmitter::SetM0(const U32& value) {
Inst(Opcode::SetM0, value);
}
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp) { F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp) {
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp)); return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp));
} }
@ -305,21 +313,21 @@ U32 IREmitter::ReadConst(const Value& base, const U32& offset) {
return Inst<U32>(Opcode::ReadConst, base, offset); return Inst<U32>(Opcode::ReadConst, base, offset);
} }
F32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index) { U32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index) {
return Inst<F32>(Opcode::ReadConstBuffer, handle, index); return Inst<U32>(Opcode::ReadConstBuffer, handle, index);
} }
Value IREmitter::LoadBuffer(int num_dwords, const Value& handle, const Value& address, Value IREmitter::LoadBuffer(int num_dwords, const Value& handle, const Value& address,
BufferInstInfo info) { BufferInstInfo info) {
switch (num_dwords) { switch (num_dwords) {
case 1: case 1:
return Inst(Opcode::LoadBufferF32, Flags{info}, handle, address); return Inst(Opcode::LoadBufferU32, Flags{info}, handle, address);
case 2: case 2:
return Inst(Opcode::LoadBufferF32x2, Flags{info}, handle, address); return Inst(Opcode::LoadBufferU32x2, Flags{info}, handle, address);
case 3: case 3:
return Inst(Opcode::LoadBufferF32x3, Flags{info}, handle, address); return Inst(Opcode::LoadBufferU32x3, Flags{info}, handle, address);
case 4: case 4:
return Inst(Opcode::LoadBufferF32x4, Flags{info}, handle, address); return Inst(Opcode::LoadBufferU32x4, Flags{info}, handle, address);
default: default:
UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords); UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords);
} }
@ -333,17 +341,16 @@ void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& ad
const Value& data, BufferInstInfo info) { const Value& data, BufferInstInfo info) {
switch (num_dwords) { switch (num_dwords) {
case 1: case 1:
Inst(data.Type() == Type::F32 ? Opcode::StoreBufferF32 : Opcode::StoreBufferU32, Inst(Opcode::StoreBufferU32, Flags{info}, handle, address, data);
Flags{info}, handle, address, data);
break; break;
case 2: case 2:
Inst(Opcode::StoreBufferF32x2, Flags{info}, handle, address, data); Inst(Opcode::StoreBufferU32x2, Flags{info}, handle, address, data);
break; break;
case 3: case 3:
Inst(Opcode::StoreBufferF32x3, Flags{info}, handle, address, data); Inst(Opcode::StoreBufferU32x3, Flags{info}, handle, address, data);
break; break;
case 4: case 4:
Inst(Opcode::StoreBufferF32x4, Flags{info}, handle, address, data); Inst(Opcode::StoreBufferU32x4, Flags{info}, handle, address, data);
break; break;
default: default:
UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords); UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords);
@ -402,6 +409,14 @@ void IREmitter::StoreBufferFormat(const Value& handle, const Value& address, con
Inst(Opcode::StoreBufferFormatF32, Flags{info}, handle, address, data); Inst(Opcode::StoreBufferFormatF32, Flags{info}, handle, address, data);
} }
U32 IREmitter::DataAppend(const U32& counter) {
return Inst<U32>(Opcode::DataAppend, counter, Imm32(0));
}
U32 IREmitter::DataConsume(const U32& counter) {
return Inst<U32>(Opcode::DataConsume, counter, Imm32(0));
}
U32 IREmitter::LaneId() { U32 IREmitter::LaneId() {
return Inst<U32>(Opcode::LaneId); return Inst<U32>(Opcode::LaneId);
} }

View File

@ -67,12 +67,14 @@ public:
[[nodiscard]] U1 GetVcc(); [[nodiscard]] U1 GetVcc();
[[nodiscard]] U32 GetVccLo(); [[nodiscard]] U32 GetVccLo();
[[nodiscard]] U32 GetVccHi(); [[nodiscard]] U32 GetVccHi();
[[nodiscard]] U32 GetM0();
void SetScc(const U1& value); void SetScc(const U1& value);
void SetExec(const U1& value); void SetExec(const U1& value);
void SetVcc(const U1& value); void SetVcc(const U1& value);
void SetSccLo(const U32& value); void SetSccLo(const U32& value);
void SetVccLo(const U32& value); void SetVccLo(const U32& value);
void SetVccHi(const U32& value); void SetVccHi(const U32& value);
void SetM0(const U32& value);
[[nodiscard]] U1 Condition(IR::Condition cond); [[nodiscard]] U1 Condition(IR::Condition cond);
@ -88,7 +90,7 @@ public:
[[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed); [[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed);
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset); [[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
[[nodiscard]] F32 ReadConstBuffer(const Value& handle, const U32& index); [[nodiscard]] U32 ReadConstBuffer(const Value& handle, const U32& index);
[[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address, [[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address,
BufferInstInfo info); BufferInstInfo info);
@ -118,6 +120,8 @@ public:
[[nodiscard]] Value BufferAtomicSwap(const Value& handle, const Value& address, [[nodiscard]] Value BufferAtomicSwap(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info); const Value& value, BufferInstInfo info);
[[nodiscard]] U32 DataAppend(const U32& counter);
[[nodiscard]] U32 DataConsume(const U32& counter);
[[nodiscard]] U32 LaneId(); [[nodiscard]] U32 LaneId();
[[nodiscard]] U32 WarpId(); [[nodiscard]] U32 WarpId();
[[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index); [[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index);

View File

@ -51,12 +51,11 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::Discard: case Opcode::Discard:
case Opcode::DiscardCond: case Opcode::DiscardCond:
case Opcode::SetAttribute: case Opcode::SetAttribute:
case Opcode::StoreBufferF32:
case Opcode::StoreBufferF32x2:
case Opcode::StoreBufferF32x3:
case Opcode::StoreBufferF32x4:
case Opcode::StoreBufferFormatF32:
case Opcode::StoreBufferU32: case Opcode::StoreBufferU32:
case Opcode::StoreBufferU32x2:
case Opcode::StoreBufferU32x3:
case Opcode::StoreBufferU32x4:
case Opcode::StoreBufferFormatF32:
case Opcode::BufferAtomicIAdd32: case Opcode::BufferAtomicIAdd32:
case Opcode::BufferAtomicSMin32: case Opcode::BufferAtomicSMin32:
case Opcode::BufferAtomicUMin32: case Opcode::BufferAtomicUMin32:
@ -68,6 +67,8 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::BufferAtomicOr32: case Opcode::BufferAtomicOr32:
case Opcode::BufferAtomicXor32: case Opcode::BufferAtomicXor32:
case Opcode::BufferAtomicSwap32: case Opcode::BufferAtomicSwap32:
case Opcode::DataAppend:
case Opcode::DataConsume:
case Opcode::WriteSharedU128: case Opcode::WriteSharedU128:
case Opcode::WriteSharedU64: case Opcode::WriteSharedU64:
case Opcode::WriteSharedU32: case Opcode::WriteSharedU32:

View File

@ -17,8 +17,7 @@ OPCODE(DiscardCond, Void, U1,
// Constant memory operations // Constant memory operations
OPCODE(ReadConst, U32, U32x2, U32, ) OPCODE(ReadConst, U32, U32x2, U32, )
OPCODE(ReadConstBuffer, F32, Opaque, U32, ) OPCODE(ReadConstBuffer, U32, Opaque, U32, )
OPCODE(ReadConstBufferU32, U32, Opaque, U32, )
// Barriers // Barriers
OPCODE(Barrier, Void, ) OPCODE(Barrier, Void, )
@ -60,12 +59,14 @@ OPCODE(GetExec, U1, Void,
OPCODE(GetVcc, U1, Void, ) OPCODE(GetVcc, U1, Void, )
OPCODE(GetVccLo, U32, Void, ) OPCODE(GetVccLo, U32, Void, )
OPCODE(GetVccHi, U32, Void, ) OPCODE(GetVccHi, U32, Void, )
OPCODE(GetM0, U32, Void, )
OPCODE(SetScc, Void, U1, ) OPCODE(SetScc, Void, U1, )
OPCODE(SetExec, Void, U1, ) OPCODE(SetExec, Void, U1, )
OPCODE(SetVcc, Void, U1, ) OPCODE(SetVcc, Void, U1, )
OPCODE(SetSccLo, Void, U32, ) OPCODE(SetSccLo, Void, U32, )
OPCODE(SetVccLo, Void, U32, ) OPCODE(SetVccLo, Void, U32, )
OPCODE(SetVccHi, Void, U32, ) OPCODE(SetVccHi, Void, U32, )
OPCODE(SetM0, Void, U32, )
// Undefined // Undefined
OPCODE(UndefU1, U1, ) OPCODE(UndefU1, U1, )
@ -75,21 +76,19 @@ OPCODE(UndefU32, U32,
OPCODE(UndefU64, U64, ) OPCODE(UndefU64, U64, )
// Buffer operations // Buffer operations
OPCODE(LoadBufferF32, F32, Opaque, Opaque, )
OPCODE(LoadBufferF32x2, F32x2, Opaque, Opaque, )
OPCODE(LoadBufferF32x3, F32x3, Opaque, Opaque, )
OPCODE(LoadBufferF32x4, F32x4, Opaque, Opaque, )
OPCODE(LoadBufferFormatF32, F32x4, Opaque, Opaque, )
OPCODE(LoadBufferU32, U32, Opaque, Opaque, ) OPCODE(LoadBufferU32, U32, Opaque, Opaque, )
OPCODE(StoreBufferF32, Void, Opaque, Opaque, F32, ) OPCODE(LoadBufferU32x2, U32x2, Opaque, Opaque, )
OPCODE(StoreBufferF32x2, Void, Opaque, Opaque, F32x2, ) OPCODE(LoadBufferU32x3, U32x3, Opaque, Opaque, )
OPCODE(StoreBufferF32x3, Void, Opaque, Opaque, F32x3, ) OPCODE(LoadBufferU32x4, U32x4, Opaque, Opaque, )
OPCODE(StoreBufferF32x4, Void, Opaque, Opaque, F32x4, ) OPCODE(LoadBufferFormatF32, F32x4, Opaque, Opaque, )
OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, F32x4, )
OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, ) OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, )
OPCODE(StoreBufferU32x2, Void, Opaque, Opaque, U32x2, )
OPCODE(StoreBufferU32x3, Void, Opaque, Opaque, U32x3, )
OPCODE(StoreBufferU32x4, Void, Opaque, Opaque, U32x4, )
OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, U32x4, )
// Buffer atomic operations // Buffer atomic operations
OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 )
@ -99,7 +98,7 @@ OPCODE(BufferAtomicDec32, U32, Opaq
OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, ) OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, ) OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, ) OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicSwap32, U32, Opaque, Opaque, U32, ) OPCODE(BufferAtomicSwap32, U32, Opaque, Opaque, U32, )
// Vector utility // Vector utility
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
@ -343,3 +342,5 @@ OPCODE(QuadShuffle, U32, U32,
OPCODE(ReadFirstLane, U32, U32, ) OPCODE(ReadFirstLane, U32, U32, )
OPCODE(ReadLane, U32, U32, U32 ) OPCODE(ReadLane, U32, U32, U32 )
OPCODE(WriteLane, U32, U32, U32, U32 ) OPCODE(WriteLane, U32, U32, U32, U32 )
OPCODE(DataAppend, U32, U32, U32 )
OPCODE(DataConsume, U32, U32, U32 )

View File

@ -3,7 +3,6 @@
#include <algorithm> #include <algorithm>
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include "common/alignment.h"
#include "shader_recompiler/info.h" #include "shader_recompiler/info.h"
#include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/breadth_first_search.h" #include "shader_recompiler/ir/breadth_first_search.h"
@ -42,11 +41,10 @@ bool IsBufferAtomic(const IR::Inst& inst) {
bool IsBufferStore(const IR::Inst& inst) { bool IsBufferStore(const IR::Inst& inst) {
switch (inst.GetOpcode()) { switch (inst.GetOpcode()) {
case IR::Opcode::StoreBufferF32:
case IR::Opcode::StoreBufferF32x2:
case IR::Opcode::StoreBufferF32x3:
case IR::Opcode::StoreBufferF32x4:
case IR::Opcode::StoreBufferU32: case IR::Opcode::StoreBufferU32:
case IR::Opcode::StoreBufferU32x2:
case IR::Opcode::StoreBufferU32x3:
case IR::Opcode::StoreBufferU32x4:
return true; return true;
default: default:
return IsBufferAtomic(inst); return IsBufferAtomic(inst);
@ -55,25 +53,28 @@ bool IsBufferStore(const IR::Inst& inst) {
bool IsBufferInstruction(const IR::Inst& inst) { bool IsBufferInstruction(const IR::Inst& inst) {
switch (inst.GetOpcode()) { switch (inst.GetOpcode()) {
case IR::Opcode::LoadBufferF32:
case IR::Opcode::LoadBufferF32x2:
case IR::Opcode::LoadBufferF32x3:
case IR::Opcode::LoadBufferF32x4:
case IR::Opcode::LoadBufferU32: case IR::Opcode::LoadBufferU32:
case IR::Opcode::LoadBufferU32x2:
case IR::Opcode::LoadBufferU32x3:
case IR::Opcode::LoadBufferU32x4:
case IR::Opcode::ReadConstBuffer: case IR::Opcode::ReadConstBuffer:
case IR::Opcode::ReadConstBufferU32:
return true; return true;
default: default:
return IsBufferStore(inst); return IsBufferStore(inst);
} }
} }
bool IsDataRingInstruction(const IR::Inst& inst) {
return inst.GetOpcode() == IR::Opcode::DataAppend ||
inst.GetOpcode() == IR::Opcode::DataConsume;
}
bool IsTextureBufferInstruction(const IR::Inst& inst) { bool IsTextureBufferInstruction(const IR::Inst& inst) {
return inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32 || return inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32 ||
inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32; inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32;
} }
static bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) { bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
switch (num_format) { switch (num_format) {
case AmdGpu::NumberFormat::Float: case AmdGpu::NumberFormat::Float:
switch (data_format) { switch (data_format) {
@ -98,19 +99,15 @@ static bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_for
IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) { IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
switch (inst.GetOpcode()) { switch (inst.GetOpcode()) {
case IR::Opcode::LoadBufferF32:
case IR::Opcode::LoadBufferF32x2:
case IR::Opcode::LoadBufferF32x3:
case IR::Opcode::LoadBufferF32x4:
case IR::Opcode::ReadConstBuffer:
case IR::Opcode::StoreBufferF32:
case IR::Opcode::StoreBufferF32x2:
case IR::Opcode::StoreBufferF32x3:
case IR::Opcode::StoreBufferF32x4:
return IR::Type::F32;
case IR::Opcode::LoadBufferU32: case IR::Opcode::LoadBufferU32:
case IR::Opcode::ReadConstBufferU32: case IR::Opcode::LoadBufferU32x2:
case IR::Opcode::LoadBufferU32x3:
case IR::Opcode::LoadBufferU32x4:
case IR::Opcode::StoreBufferU32: case IR::Opcode::StoreBufferU32:
case IR::Opcode::StoreBufferU32x2:
case IR::Opcode::StoreBufferU32x3:
case IR::Opcode::StoreBufferU32x4:
case IR::Opcode::ReadConstBuffer:
case IR::Opcode::BufferAtomicIAdd32: case IR::Opcode::BufferAtomicIAdd32:
case IR::Opcode::BufferAtomicSwap32: case IR::Opcode::BufferAtomicSwap32:
return IR::Type::U32; return IR::Type::U32;
@ -191,6 +188,10 @@ public:
u32 Add(const BufferResource& desc) { u32 Add(const BufferResource& desc) {
const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) { const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
// Only one GDS binding can exist.
if (desc.is_gds_buffer && existing.is_gds_buffer) {
return true;
}
return desc.sgpr_base == existing.sgpr_base && return desc.sgpr_base == existing.sgpr_base &&
desc.dword_offset == existing.dword_offset && desc.dword_offset == existing.dword_offset &&
desc.inline_cbuf == existing.inline_cbuf; desc.inline_cbuf == existing.inline_cbuf;
@ -399,8 +400,7 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable); ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
// Address of constant buffer reads can be calculated at IR emittion time. // Address of constant buffer reads can be calculated at IR emittion time.
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer || if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) {
return; return;
} }
@ -609,6 +609,51 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
} }
} }
void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info,
Descriptors& descriptors) {
// Insert gds binding in the shader if it doesn't exist already.
// The buffer is used for append/consume counters.
constexpr static AmdGpu::Buffer GdsSharp{.base_address = 1};
const u32 binding = descriptors.Add(BufferResource{
.used_types = IR::Type::U32,
.inline_cbuf = GdsSharp,
.is_gds_buffer = true,
.is_written = true,
});
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
return inst;
}
return std::nullopt;
};
// Attempt to deduce the GDS address of counter at compile time.
const u32 gds_addr = [&] {
const IR::Value& gds_offset = inst.Arg(0);
if (gds_offset.IsImmediate()) {
// Nothing to do, offset is known.
return gds_offset.U32() & 0xFFFF;
}
const auto result = IR::BreadthFirstSearch(&inst, pred);
ASSERT_MSG(result, "Unable to track M0 source");
// M0 must be set by some user data register.
const IR::Inst* prod = gds_offset.InstRecursive();
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
u32 m0_val = info.user_data[ud_reg] >> 16;
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
m0_val += prod->Arg(1).U32();
}
return m0_val & 0xFFFF;
}();
// Patch instruction.
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
inst.SetArg(1, ir.Imm32(binding));
}
void ResourceTrackingPass(IR::Program& program) { void ResourceTrackingPass(IR::Program& program) {
// Iterate resource instructions and patch them after finding the sharp. // Iterate resource instructions and patch them after finding the sharp.
auto& info = program.info; auto& info = program.info;
@ -625,6 +670,10 @@ void ResourceTrackingPass(IR::Program& program) {
} }
if (IsImageInstruction(inst)) { if (IsImageInstruction(inst)) {
PatchImageInstruction(*block, inst, info, descriptors); PatchImageInstruction(*block, inst, info, descriptors);
continue;
}
if (IsDataRingInstruction(inst)) {
PatchDataRingInstruction(*block, inst, info, descriptors);
} }
} }
} }

View File

@ -33,6 +33,7 @@ struct ExecFlagTag : FlagTag {};
struct VccFlagTag : FlagTag {}; struct VccFlagTag : FlagTag {};
struct VccLoTag : FlagTag {}; struct VccLoTag : FlagTag {};
struct VccHiTag : FlagTag {}; struct VccHiTag : FlagTag {};
struct M0Tag : FlagTag {};
struct GotoVariable : FlagTag { struct GotoVariable : FlagTag {
GotoVariable() = default; GotoVariable() = default;
@ -43,8 +44,17 @@ struct GotoVariable : FlagTag {
u32 index; u32 index;
}; };
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, SccFlagTag, ExecFlagTag, struct ThreadBitScalar : FlagTag {
VccFlagTag, VccLoTag, VccHiTag>; ThreadBitScalar() = default;
explicit ThreadBitScalar(IR::ScalarReg sgpr_) : sgpr{sgpr_} {}
auto operator<=>(const ThreadBitScalar&) const noexcept = default;
IR::ScalarReg sgpr;
};
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, ThreadBitScalar,
SccFlagTag, ExecFlagTag, VccFlagTag, VccLoTag, VccHiTag, M0Tag>;
using ValueMap = std::unordered_map<IR::Block*, IR::Value>; using ValueMap = std::unordered_map<IR::Block*, IR::Value>;
struct DefTable { struct DefTable {
@ -69,6 +79,13 @@ struct DefTable {
goto_vars[variable.index].insert_or_assign(block, value); goto_vars[variable.index].insert_or_assign(block, value);
} }
const IR::Value& Def(IR::Block* block, ThreadBitScalar variable) {
return block->ssa_sreg_values[RegIndex(variable.sgpr)];
}
void SetDef(IR::Block* block, ThreadBitScalar variable, const IR::Value& value) {
block->ssa_sreg_values[RegIndex(variable.sgpr)] = value;
}
const IR::Value& Def(IR::Block* block, SccFlagTag) { const IR::Value& Def(IR::Block* block, SccFlagTag) {
return scc_flag[block]; return scc_flag[block];
} }
@ -103,6 +120,12 @@ struct DefTable {
void SetDef(IR::Block* block, VccFlagTag, const IR::Value& value) { void SetDef(IR::Block* block, VccFlagTag, const IR::Value& value) {
vcc_flag.insert_or_assign(block, value); vcc_flag.insert_or_assign(block, value);
} }
const IR::Value& Def(IR::Block* block, M0Tag) {
return m0_flag[block];
}
void SetDef(IR::Block* block, M0Tag, const IR::Value& value) {
m0_flag.insert_or_assign(block, value);
}
std::unordered_map<u32, ValueMap> goto_vars; std::unordered_map<u32, ValueMap> goto_vars;
ValueMap scc_flag; ValueMap scc_flag;
@ -111,6 +134,7 @@ struct DefTable {
ValueMap scc_lo_flag; ValueMap scc_lo_flag;
ValueMap vcc_lo_flag; ValueMap vcc_lo_flag;
ValueMap vcc_hi_flag; ValueMap vcc_hi_flag;
ValueMap m0_flag;
}; };
IR::Opcode UndefOpcode(IR::ScalarReg) noexcept { IR::Opcode UndefOpcode(IR::ScalarReg) noexcept {
@ -129,6 +153,10 @@ IR::Opcode UndefOpcode(const VccHiTag) noexcept {
return IR::Opcode::UndefU32; return IR::Opcode::UndefU32;
} }
IR::Opcode UndefOpcode(const M0Tag) noexcept {
return IR::Opcode::UndefU32;
}
IR::Opcode UndefOpcode(const FlagTag) noexcept { IR::Opcode UndefOpcode(const FlagTag) noexcept {
return IR::Opcode::UndefU1; return IR::Opcode::UndefU1;
} }
@ -161,7 +189,7 @@ public:
} }
template <typename Type> template <typename Type>
IR::Value ReadVariable(Type variable, IR::Block* root_block, bool is_thread_bit = false) { IR::Value ReadVariable(Type variable, IR::Block* root_block) {
boost::container::small_vector<ReadState<Type>, 64> stack{ boost::container::small_vector<ReadState<Type>, 64> stack{
ReadState<Type>(nullptr), ReadState<Type>(nullptr),
ReadState<Type>(root_block), ReadState<Type>(root_block),
@ -189,7 +217,7 @@ public:
} else if (!block->IsSsaSealed()) { } else if (!block->IsSsaSealed()) {
// Incomplete CFG // Incomplete CFG
IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
phi->SetFlags(is_thread_bit ? IR::Type::U1 : IR::TypeOf(UndefOpcode(variable))); phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
incomplete_phis[block].insert_or_assign(variable, phi); incomplete_phis[block].insert_or_assign(variable, phi);
stack.back().result = IR::Value{&*phi}; stack.back().result = IR::Value{&*phi};
@ -202,7 +230,7 @@ public:
} else { } else {
// Break potential cycles with operandless phi // Break potential cycles with operandless phi
IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
phi->SetFlags(is_thread_bit ? IR::Type::U1 : IR::TypeOf(UndefOpcode(variable))); phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
WriteVariable(variable, block, IR::Value{phi}); WriteVariable(variable, block, IR::Value{phi});
@ -251,9 +279,7 @@ private:
template <typename Type> template <typename Type>
IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) { IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) {
for (IR::Block* const imm_pred : block->ImmPredecessors()) { for (IR::Block* const imm_pred : block->ImmPredecessors()) {
const bool is_thread_bit = phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred));
std::is_same_v<Type, IR::ScalarReg> && phi.Flags<IR::Type>() == IR::Type::U1;
phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred, is_thread_bit));
} }
return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable)); return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable));
} }
@ -301,7 +327,11 @@ private:
void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
const IR::Opcode opcode{inst.GetOpcode()}; const IR::Opcode opcode{inst.GetOpcode()};
switch (opcode) { switch (opcode) {
case IR::Opcode::SetThreadBitScalarReg: case IR::Opcode::SetThreadBitScalarReg: {
const IR::ScalarReg reg{inst.Arg(0).ScalarReg()};
pass.WriteVariable(ThreadBitScalar{reg}, block, inst.Arg(1));
break;
}
case IR::Opcode::SetScalarRegister: { case IR::Opcode::SetScalarRegister: {
const IR::ScalarReg reg{inst.Arg(0).ScalarReg()}; const IR::ScalarReg reg{inst.Arg(0).ScalarReg()};
pass.WriteVariable(reg, block, inst.Arg(1)); pass.WriteVariable(reg, block, inst.Arg(1));
@ -330,11 +360,18 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
case IR::Opcode::SetVccHi: case IR::Opcode::SetVccHi:
pass.WriteVariable(VccHiTag{}, block, inst.Arg(0)); pass.WriteVariable(VccHiTag{}, block, inst.Arg(0));
break; break;
case IR::Opcode::GetThreadBitScalarReg: case IR::Opcode::SetM0:
pass.WriteVariable(M0Tag{}, block, inst.Arg(0));
break;
case IR::Opcode::GetThreadBitScalarReg: {
const IR::ScalarReg reg{inst.Arg(0).ScalarReg()};
const IR::Value value = pass.ReadVariable(ThreadBitScalar{reg}, block);
inst.ReplaceUsesWith(value);
break;
}
case IR::Opcode::GetScalarRegister: { case IR::Opcode::GetScalarRegister: {
const IR::ScalarReg reg{inst.Arg(0).ScalarReg()}; const IR::ScalarReg reg{inst.Arg(0).ScalarReg()};
const bool thread_bit = opcode == IR::Opcode::GetThreadBitScalarReg; const IR::Value value = pass.ReadVariable(reg, block);
const IR::Value value = pass.ReadVariable(reg, block, thread_bit);
inst.ReplaceUsesWith(value); inst.ReplaceUsesWith(value);
break; break;
} }
@ -362,6 +399,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
case IR::Opcode::GetVccHi: case IR::Opcode::GetVccHi:
inst.ReplaceUsesWith(pass.ReadVariable(VccHiTag{}, block)); inst.ReplaceUsesWith(pass.ReadVariable(VccHiTag{}, block));
break; break;
case IR::Opcode::GetM0:
inst.ReplaceUsesWith(pass.ReadVariable(M0Tag{}, block));
break;
default: default:
break; break;
} }

View File

@ -107,6 +107,7 @@ struct RuntimeInfo {
Stage stage; Stage stage;
u32 num_user_data; u32 num_user_data;
u32 num_input_vgprs; u32 num_input_vgprs;
u32 num_allocated_vgprs;
VertexRuntimeInfo vs_info; VertexRuntimeInfo vs_info;
FragmentRuntimeInfo fs_info; FragmentRuntimeInfo fs_info;
ComputeRuntimeInfo cs_info; ComputeRuntimeInfo cs_info;

View File

@ -465,6 +465,14 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
case PM4ItOpcode::EventWriteEos: { case PM4ItOpcode::EventWriteEos: {
const auto* event_eos = reinterpret_cast<const PM4CmdEventWriteEos*>(header); const auto* event_eos = reinterpret_cast<const PM4CmdEventWriteEos*>(header);
event_eos->SignalFence(); event_eos->SignalFence();
if (event_eos->command == PM4CmdEventWriteEos::Command::GdsStore) {
ASSERT(event_eos->size == 1);
if (rasterizer) {
rasterizer->Finish();
const u32 value = rasterizer->ReadDataFromGds(event_eos->gds_index);
*event_eos->Address() = value;
}
}
break; break;
} }
case PM4ItOpcode::EventWriteEop: { case PM4ItOpcode::EventWriteEop: {
@ -474,6 +482,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
} }
case PM4ItOpcode::DmaData: { case PM4ItOpcode::DmaData: {
const auto* dma_data = reinterpret_cast<const PM4DmaData*>(header); const auto* dma_data = reinterpret_cast<const PM4DmaData*>(header);
if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Gds) {
rasterizer->InlineDataToGds(dma_data->dst_addr_lo, dma_data->data);
}
break; break;
} }
case PM4ItOpcode::WriteData: { case PM4ItOpcode::WriteData: {

View File

@ -350,6 +350,17 @@ struct PM4CmdEventWriteEop {
} }
}; };
enum class DmaDataDst : u32 {
Memory = 0,
Gds = 1,
};
enum class DmaDataSrc : u32 {
Memory = 0,
Gds = 1,
Data = 2,
};
struct PM4DmaData { struct PM4DmaData {
PM4Type3Header header; PM4Type3Header header;
union { union {
@ -357,11 +368,11 @@ struct PM4DmaData {
BitField<12, 1, u32> src_atc; BitField<12, 1, u32> src_atc;
BitField<13, 2, u32> src_cache_policy; BitField<13, 2, u32> src_cache_policy;
BitField<15, 1, u32> src_volatile; BitField<15, 1, u32> src_volatile;
BitField<20, 2, u32> dst_sel; BitField<20, 2, DmaDataDst> dst_sel;
BitField<24, 1, u32> dst_atc; BitField<24, 1, u32> dst_atc;
BitField<25, 2, u32> dst_cache_policy; BitField<25, 2, u32> dst_cache_policy;
BitField<27, 1, u32> dst_volatile; BitField<27, 1, u32> dst_volatile;
BitField<29, 2, u32> src_sel; BitField<29, 2, DmaDataSrc> src_sel;
BitField<31, 1, u32> cp_sync; BitField<31, 1, u32> cp_sync;
}; };
union { union {
@ -502,13 +513,17 @@ struct PM4CmdEventWriteEos {
} }
void SignalFence() const { void SignalFence() const {
switch (command.Value()) { const auto cmd = command.Value();
switch (cmd) {
case Command::SingalFence: { case Command::SingalFence: {
*Address() = DataDWord(); *Address() = DataDWord();
break; break;
} }
case Command::GdsStore: {
break;
}
default: { default: {
UNREACHABLE(); UNREACHABLE_MSG("Unknown command {}", u32(cmd));
} }
} }
} }

View File

@ -15,8 +15,9 @@
namespace VideoCore { namespace VideoCore {
static constexpr size_t NumVertexBuffers = 32; static constexpr size_t NumVertexBuffers = 32;
static constexpr size_t StagingBufferSize = 512_MB; static constexpr size_t GdsBufferSize = 64_KB;
static constexpr size_t UboStreamBufferSize = 64_MB; static constexpr size_t StagingBufferSize = 1_GB;
static constexpr size_t UboStreamBufferSize = 128_MB;
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
const AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_, const AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_,
@ -25,7 +26,10 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
texture_cache{texture_cache_}, tracker{tracker_}, texture_cache{texture_cache_}, tracker{tracker_},
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize}, staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize}, stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, GdsBufferSize},
memory_tracker{&tracker} { memory_tracker{&tracker} {
Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer");
// Ensure the first slot is used for the null buffer // Ensure the first slot is used for the null buffer
void(slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, ReadFlags, 1)); void(slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, ReadFlags, 1));
} }
@ -232,6 +236,27 @@ u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) {
return regs.num_indices; return regs.num_indices;
} }
void BufferCache::InlineDataToGds(u32 gds_offset, u32 value) {
ASSERT_MSG(gds_offset % 4 == 0, "GDS offset must be dword aligned");
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
const vk::BufferMemoryBarrier2 buf_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
.buffer = gds_buffer.Handle(),
.offset = gds_offset,
.size = sizeof(u32),
};
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &buf_barrier,
});
cmdbuf.updateBuffer(gds_buffer.Handle(), gds_offset, sizeof(u32), &value);
}
std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written, std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written,
bool is_texel_buffer) { bool is_texel_buffer) {
static constexpr u64 StreamThreshold = CACHING_PAGESIZE; static constexpr u64 StreamThreshold = CACHING_PAGESIZE;
@ -258,6 +283,7 @@ std::pair<Buffer*, u32> BufferCache::ObtainTempBuffer(VAddr gpu_addr, u32 size)
if (buffer_id) { if (buffer_id) {
Buffer& buffer = slot_buffers[buffer_id]; Buffer& buffer = slot_buffers[buffer_id];
if (buffer.IsInBounds(gpu_addr, size)) { if (buffer.IsInBounds(gpu_addr, size)) {
SynchronizeBuffer(buffer, gpu_addr, size, false);
return {&buffer, buffer.Offset(gpu_addr)}; return {&buffer, buffer.Offset(gpu_addr)};
} }
} }
@ -541,64 +567,48 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
} }
bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) { bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) {
boost::container::small_vector<ImageId, 8> image_ids; static constexpr FindFlags find_flags =
const u32 inv_size = std::min(size, MaxInvalidateDist); FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize;
texture_cache.ForEachImageInRegion(device_addr, inv_size, [&](ImageId image_id, Image& image) { ImageInfo info{};
// Only consider GPU modified images, i.e render targets or storage images. info.guest_address = device_addr;
// Also avoid any CPU modified images as the image data is likely to be stale. info.guest_size_bytes = size;
if (True(image.flags & ImageFlagBits::CpuModified) || const ImageId image_id = texture_cache.FindImage(info, find_flags);
False(image.flags & ImageFlagBits::GpuModified)) { if (!image_id) {
return;
}
// Image must fully overlap with the provided buffer range.
if (image.cpu_addr < device_addr || image.cpu_addr_end > device_addr + size) {
return;
}
image_ids.push_back(image_id);
});
if (image_ids.empty()) {
return false; return false;
} }
// Sort images by modification tick. If there are overlaps we want to Image& image = texture_cache.GetImage(image_id);
// copy from least to most recently modified. if (image.info.guest_size_bytes > size) {
std::ranges::sort(image_ids, [&](ImageId lhs_id, ImageId rhs_id) { return false;
const Image& lhs = texture_cache.GetImage(lhs_id);
const Image& rhs = texture_cache.GetImage(rhs_id);
return lhs.tick_accessed_last < rhs.tick_accessed_last;
});
boost::container::small_vector<vk::BufferImageCopy, 8> copies;
for (const ImageId image_id : image_ids) {
copies.clear();
Image& image = texture_cache.GetImage(image_id);
u32 offset = buffer.Offset(image.cpu_addr);
const u32 num_layers = image.info.resources.layers;
for (u32 m = 0; m < image.info.resources.levels; m++) {
const u32 width = std::max(image.info.size.width >> m, 1u);
const u32 height = std::max(image.info.size.height >> m, 1u);
const u32 depth =
image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u;
const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m];
copies.push_back({
.bufferOffset = offset,
.bufferRowLength = static_cast<u32>(mip_pitch),
.bufferImageHeight = static_cast<u32>(mip_height),
.imageSubresource{
.aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil,
.mipLevel = m,
.baseArrayLayer = 0,
.layerCount = num_layers,
},
.imageOffset = {0, 0, 0},
.imageExtent = {width, height, depth},
});
offset += mip_ofs * num_layers;
}
scheduler.EndRendering();
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead);
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer,
copies);
} }
boost::container::small_vector<vk::BufferImageCopy, 8> copies;
u32 offset = buffer.Offset(image.cpu_addr);
const u32 num_layers = image.info.resources.layers;
for (u32 m = 0; m < image.info.resources.levels; m++) {
const u32 width = std::max(image.info.size.width >> m, 1u);
const u32 height = std::max(image.info.size.height >> m, 1u);
const u32 depth =
image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u;
const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m];
copies.push_back({
.bufferOffset = offset,
.bufferRowLength = static_cast<u32>(mip_pitch),
.bufferImageHeight = static_cast<u32>(mip_height),
.imageSubresource{
.aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil,
.mipLevel = m,
.baseArrayLayer = 0,
.layerCount = num_layers,
},
.imageOffset = {0, 0, 0},
.imageExtent = {width, height, depth},
});
offset += mip_ofs * num_layers;
}
scheduler.EndRendering();
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead);
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer,
copies);
return true; return true;
} }

View File

@ -57,6 +57,11 @@ public:
PageManager& tracker); PageManager& tracker);
~BufferCache(); ~BufferCache();
/// Returns a pointer to GDS device local buffer.
[[nodiscard]] const Buffer* GetGdsBuffer() const noexcept {
return &gds_buffer;
}
/// Invalidates any buffer in the logical page range. /// Invalidates any buffer in the logical page range.
void InvalidateMemory(VAddr device_addr, u64 size); void InvalidateMemory(VAddr device_addr, u64 size);
@ -66,6 +71,9 @@ public:
/// Bind host index buffer for the current draw. /// Bind host index buffer for the current draw.
u32 BindIndexBuffer(bool& is_indexed, u32 index_offset); u32 BindIndexBuffer(bool& is_indexed, u32 index_offset);
/// Writes a value to GDS buffer.
void InlineDataToGds(u32 gds_offset, u32 value);
/// Obtains a buffer for the specified region. /// Obtains a buffer for the specified region.
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written, [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,
bool is_texel_buffer = false); bool is_texel_buffer = false);
@ -130,6 +138,7 @@ private:
PageManager& tracker; PageManager& tracker;
StreamBuffer staging_buffer; StreamBuffer staging_buffer;
StreamBuffer stream_buffer; StreamBuffer stream_buffer;
Buffer gds_buffer;
std::mutex mutex; std::mutex mutex;
Common::SlotVector<Buffer> slot_buffers; Common::SlotVector<Buffer> slot_buffers;
MemoryTracker memory_tracker; MemoryTracker memory_tracker;

View File

@ -585,11 +585,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
vk::Format AdjustColorBufferFormat(vk::Format base_format, vk::Format AdjustColorBufferFormat(vk::Format base_format,
Liverpool::ColorBuffer::SwapMode comp_swap, bool is_vo_surface) { Liverpool::ColorBuffer::SwapMode comp_swap, bool is_vo_surface) {
ASSERT_MSG(comp_swap == Liverpool::ColorBuffer::SwapMode::Standard ||
comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate,
"Unsupported component swap mode {}", static_cast<u32>(comp_swap));
const bool comp_swap_alt = comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate; const bool comp_swap_alt = comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate;
const bool comp_swap_reverse = comp_swap == Liverpool::ColorBuffer::SwapMode::StandardReverse;
const bool comp_swap_alt_reverse =
comp_swap == Liverpool::ColorBuffer::SwapMode::AlternateReverse;
if (comp_swap_alt) { if (comp_swap_alt) {
switch (base_format) { switch (base_format) {
case vk::Format::eR8G8B8A8Unorm: case vk::Format::eR8G8B8A8Unorm:
@ -605,6 +604,18 @@ vk::Format AdjustColorBufferFormat(vk::Format base_format,
default: default:
break; break;
} }
} else if (comp_swap_reverse) {
switch (base_format) {
case vk::Format::eR8G8B8A8Unorm:
return vk::Format::eA8B8G8R8UnormPack32;
case vk::Format::eR8G8B8A8Srgb:
return is_vo_surface ? vk::Format::eA8B8G8R8UnormPack32
: vk::Format::eA8B8G8R8SrgbPack32;
default:
break;
}
} else if (comp_swap_alt_reverse) {
return base_format;
} else { } else {
if (is_vo_surface && base_format == vk::Format::eR8G8B8A8Srgb) { if (is_vo_surface && base_format == vk::Format::eR8G8B8A8Srgb) {
return vk::Format::eR8G8B8A8Unorm; return vk::Format::eR8G8B8A8Unorm;
@ -649,8 +660,8 @@ void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) {
*out_data++ = i; *out_data++ = i;
*out_data++ = i + 1; *out_data++ = i + 1;
*out_data++ = i + 2; *out_data++ = i + 2;
*out_data++ = i + 2;
*out_data++ = i; *out_data++ = i;
*out_data++ = i + 2;
*out_data++ = i + 3; *out_data++ = i + 3;
} }
} }

View File

@ -109,37 +109,42 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
u32 binding{}; u32 binding{};
for (const auto& desc : info->buffers) { for (const auto& desc : info->buffers) {
const auto vsharp = desc.GetSharp(*info); bool is_storage = true;
const bool is_storage = desc.IsStorage(vsharp); if (desc.is_gds_buffer) {
const VAddr address = vsharp.base_address; auto* vk_buffer = buffer_cache.GetGdsBuffer();
// Most of the time when a metadata is updated with a shader it gets cleared. It means we buffer_infos.emplace_back(vk_buffer->Handle(), 0, vk_buffer->SizeBytes());
// can skip the whole dispatch and update the tracked state instead. Also, it is not
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we will
// need its full emulation anyways. For cases of metadata read a warning will be logged.
if (desc.is_written) {
if (texture_cache.TouchMeta(address, true)) {
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
return false;
}
} else { } else {
if (texture_cache.IsMeta(address)) { const auto vsharp = desc.GetSharp(*info);
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)"); is_storage = desc.IsStorage(vsharp);
const VAddr address = vsharp.base_address;
// Most of the time when a metadata is updated with a shader it gets cleared. It means
// we can skip the whole dispatch and update the tracked state instead. Also, it is not
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we
// will need its full emulation anyways. For cases of metadata read a warning will be
// logged.
if (desc.is_written) {
if (texture_cache.TouchMeta(address, true)) {
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
return false;
}
} else {
if (texture_cache.IsMeta(address)) {
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)");
}
} }
const u32 size = vsharp.GetSize();
const u32 alignment =
is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
const auto [vk_buffer, offset] =
buffer_cache.ObtainBuffer(address, size, desc.is_written);
const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned;
if (adjust != 0) {
ASSERT(adjust % 4 == 0);
push_data.AddOffset(binding, adjust);
}
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust);
} }
const u32 size = vsharp.GetSize();
if (desc.is_written) {
texture_cache.InvalidateMemory(address, size);
}
const u32 alignment =
is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(address, size, desc.is_written);
const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned;
if (adjust != 0) {
ASSERT(adjust % 4 == 0);
push_data.AddOffset(binding, adjust);
}
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust);
set_writes.push_back({ set_writes.push_back({
.dstSet = VK_NULL_HANDLE, .dstSet = VK_NULL_HANDLE,
.dstBinding = binding++, .dstBinding = binding++,
@ -188,7 +193,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
buffer_barriers.emplace_back(*barrier); buffer_barriers.emplace_back(*barrier);
} }
if (desc.is_written) { if (desc.is_written) {
texture_cache.InvalidateMemory(address, size); texture_cache.MarkWritten(address, size);
} }
} }
set_writes.push_back({ set_writes.push_back({

View File

@ -432,7 +432,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
buffer_barriers.emplace_back(*barrier); buffer_barriers.emplace_back(*barrier);
} }
if (desc.is_written) { if (desc.is_written) {
texture_cache.InvalidateMemory(address, size); texture_cache.MarkWritten(address, size);
} }
} }
set_writes.push_back({ set_writes.push_back({

View File

@ -76,6 +76,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
case Shader::Stage::Vertex: { case Shader::Stage::Vertex: {
info.num_user_data = regs.vs_program.settings.num_user_regs; info.num_user_data = regs.vs_program.settings.num_user_regs;
info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt; info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt;
info.num_allocated_vgprs = regs.vs_program.settings.num_vgprs * 4;
GatherVertexOutputs(info.vs_info, regs.vs_output_control); GatherVertexOutputs(info.vs_info, regs.vs_output_control);
info.vs_info.emulate_depth_negative_one_to_one = info.vs_info.emulate_depth_negative_one_to_one =
!instance.IsDepthClipControlSupported() && !instance.IsDepthClipControlSupported() &&
@ -84,6 +85,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
} }
case Shader::Stage::Fragment: { case Shader::Stage::Fragment: {
info.num_user_data = regs.ps_program.settings.num_user_regs; info.num_user_data = regs.ps_program.settings.num_user_regs;
info.num_allocated_vgprs = regs.ps_program.settings.num_vgprs * 4;
std::ranges::transform(graphics_key.mrt_swizzles, info.fs_info.mrt_swizzles.begin(), std::ranges::transform(graphics_key.mrt_swizzles, info.fs_info.mrt_swizzles.begin(),
[](Liverpool::ColorBuffer::SwapMode mode) { [](Liverpool::ColorBuffer::SwapMode mode) {
return static_cast<Shader::MrtSwizzle>(mode); return static_cast<Shader::MrtSwizzle>(mode);
@ -102,6 +104,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
case Shader::Stage::Compute: { case Shader::Stage::Compute: {
const auto& cs_pgm = regs.cs_program; const auto& cs_pgm = regs.cs_program;
info.num_user_data = cs_pgm.settings.num_user_regs; info.num_user_data = cs_pgm.settings.num_user_regs;
info.num_allocated_vgprs = regs.cs_program.settings.num_vgprs * 4;
info.cs_info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full, info.cs_info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
cs_pgm.num_thread_z.full}; cs_pgm.num_thread_z.full};
info.cs_info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1), info.cs_info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1),
@ -295,6 +298,16 @@ bool PipelineCache::RefreshGraphicsKey() {
return false; return false;
} }
static bool TessMissingLogged = false;
if (auto* pgm = regs.ProgramForStage(3);
regs.stage_enable.IsStageEnabled(3) && pgm->Address() != 0) {
if (!TessMissingLogged) {
LOG_WARNING(Render_Vulkan, "Tess pipeline compilation skipped");
TessMissingLogged = true;
}
return false;
}
std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(stage, params, binding); std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(stage, params, binding);
} }
return true; return true;

View File

@ -175,6 +175,10 @@ u64 Rasterizer::Flush() {
return current_tick; return current_tick;
} }
void Rasterizer::Finish() {
scheduler.Finish();
}
void Rasterizer::BeginRendering() { void Rasterizer::BeginRendering() {
const auto& regs = liverpool->regs; const auto& regs = liverpool->regs;
RenderState state; RenderState state;
@ -251,6 +255,17 @@ void Rasterizer::BeginRendering() {
scheduler.BeginRendering(state); scheduler.BeginRendering(state);
} }
void Rasterizer::InlineDataToGds(u32 gds_offset, u32 value) {
buffer_cache.InlineDataToGds(gds_offset, value);
}
u32 Rasterizer::ReadDataFromGds(u32 gds_offset) {
auto* gds_buf = buffer_cache.GetGdsBuffer();
u32 value;
std::memcpy(&value, gds_buf->mapped_data.data() + gds_offset, sizeof(u32));
return value;
}
void Rasterizer::InvalidateMemory(VAddr addr, u64 size) { void Rasterizer::InvalidateMemory(VAddr addr, u64 size) {
buffer_cache.InvalidateMemory(addr, size); buffer_cache.InvalidateMemory(addr, size);
texture_cache.InvalidateMemory(addr, size); texture_cache.InvalidateMemory(addr, size);

View File

@ -41,12 +41,15 @@ public:
void ScopeMarkerEnd(); void ScopeMarkerEnd();
void ScopedMarkerInsert(const std::string_view& str); void ScopedMarkerInsert(const std::string_view& str);
void InlineDataToGds(u32 gds_offset, u32 value);
u32 ReadDataFromGds(u32 gsd_offset);
void InvalidateMemory(VAddr addr, u64 size); void InvalidateMemory(VAddr addr, u64 size);
void MapMemory(VAddr addr, u64 size); void MapMemory(VAddr addr, u64 size);
void UnmapMemory(VAddr addr, u64 size); void UnmapMemory(VAddr addr, u64 size);
void CpSync(); void CpSync();
u64 Flush(); u64 Flush();
void Finish();
private: private:
void BeginRendering(); void BeginRendering();

View File

@ -32,7 +32,6 @@ enum ImageFlagBits : u32 {
Registered = 1 << 6, ///< True when the image is registered Registered = 1 << 6, ///< True when the image is registered
Picked = 1 << 7, ///< Temporary flag to mark the image as picked Picked = 1 << 7, ///< Temporary flag to mark the image as picked
MetaRegistered = 1 << 8, ///< True when metadata for this surface is known and registered MetaRegistered = 1 << 8, ///< True when metadata for this surface is known and registered
Deleted = 1 << 9, ///< Indicates that images was marked for deletion once frame is done
}; };
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)

View File

@ -204,8 +204,8 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, bool force_depth /*= false*/) n
tiling_mode = image.GetTilingMode(); tiling_mode = image.GetTilingMode();
pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt()); pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt());
// Override format if image is forced to be a depth target // Override format if image is forced to be a depth target
if (force_depth || tiling_mode == AmdGpu::TilingMode::Depth_MacroTiled) { if (force_depth) {
if (pixel_format == vk::Format::eR32Sfloat) { if (pixel_format == vk::Format::eR32Sfloat || pixel_format == vk::Format::eR8Unorm) {
pixel_format = vk::Format::eD32SfloatS8Uint; pixel_format = vk::Format::eD32SfloatS8Uint;
} else if (pixel_format == vk::Format::eR16Unorm) { } else if (pixel_format == vk::Format::eR16Unorm) {
pixel_format = vk::Format::eD16UnormS8Uint; pixel_format = vk::Format::eD16UnormS8Uint;

View File

@ -128,6 +128,10 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
format = image.info.pixel_format; format = image.info.pixel_format;
aspect = vk::ImageAspectFlagBits::eDepth; aspect = vk::ImageAspectFlagBits::eDepth;
} }
if (image.aspect_mask & vk::ImageAspectFlagBits::eStencil && format == vk::Format::eR8Unorm) {
format = image.info.pixel_format;
aspect = vk::ImageAspectFlagBits::eStencil;
}
const vk::ImageViewCreateInfo image_view_ci = { const vk::ImageViewCreateInfo image_view_ci = {
.pNext = usage_override ? &usage_ci : nullptr, .pNext = usage_override ? &usage_ci : nullptr,

View File

@ -40,17 +40,27 @@ TextureCache::~TextureCache() = default;
void TextureCache::InvalidateMemory(VAddr address, size_t size) { void TextureCache::InvalidateMemory(VAddr address, size_t size) {
std::scoped_lock lock{mutex}; std::scoped_lock lock{mutex};
ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) { ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) {
const size_t image_dist = // Ensure image is reuploaded when accessed again.
image.cpu_addr > address ? image.cpu_addr - address : address - image.cpu_addr; image.flags |= ImageFlagBits::CpuModified;
if (image_dist < MaxInvalidateDist) {
// Ensure image is reuploaded when accessed again.
image.flags |= ImageFlagBits::CpuModified;
}
// Untrack image, so the range is unprotected and the guest can write freely. // Untrack image, so the range is unprotected and the guest can write freely.
UntrackImage(image_id); UntrackImage(image_id);
}); });
} }
void TextureCache::MarkWritten(VAddr address, size_t max_size) {
static constexpr FindFlags find_flags =
FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize;
ImageInfo info{};
info.guest_address = address;
info.guest_size_bytes = max_size;
const ImageId image_id = FindImage(info, find_flags);
if (!image_id) {
return;
}
// Ensure image is copied when accessed again.
slot_images[image_id].flags |= ImageFlagBits::CpuModified;
}
void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) { void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) {
std::scoped_lock lk{mutex}; std::scoped_lock lk{mutex};
@ -199,10 +209,14 @@ ImageId TextureCache::FindImage(const ImageInfo& info, FindFlags flags) {
!IsVulkanFormatCompatible(info.pixel_format, cache_image.info.pixel_format)) { !IsVulkanFormatCompatible(info.pixel_format, cache_image.info.pixel_format)) {
continue; continue;
} }
ASSERT(cache_image.info.type == info.type); ASSERT(cache_image.info.type == info.type || True(flags & FindFlags::RelaxFmt));
image_id = cache_id; image_id = cache_id;
} }
if (True(flags & FindFlags::NoCreate) && !image_id) {
return {};
}
// Try to resolve overlaps (if any) // Try to resolve overlaps (if any)
if (!image_id) { if (!image_id) {
for (const auto& cache_id : image_ids) { for (const auto& cache_id : image_ids) {
@ -211,10 +225,6 @@ ImageId TextureCache::FindImage(const ImageInfo& info, FindFlags flags) {
} }
} }
if (True(flags & FindFlags::NoCreate) && !image_id) {
return {};
}
// Create and register a new image // Create and register a new image
if (!image_id) { if (!image_id) {
image_id = slot_images.insert(instance, scheduler, info); image_id = slot_images.insert(instance, scheduler, info);
@ -251,9 +261,6 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo
ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) { ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) {
const ImageId image_id = FindImage(info); const ImageId image_id = FindImage(info);
Image& image = slot_images[image_id]; Image& image = slot_images[image_id];
if (view_info.is_storage) {
image.flags |= ImageFlagBits::GpuModified;
}
UpdateImage(image_id); UpdateImage(image_id);
auto& usage = image.info.usage; auto& usage = image.info.usage;
@ -351,7 +358,6 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
if (False(image.flags & ImageFlagBits::CpuModified)) { if (False(image.flags & ImageFlagBits::CpuModified)) {
return; return;
} }
// Mark image as validated. // Mark image as validated.
image.flags &= ~ImageFlagBits::CpuModified; image.flags &= ~ImageFlagBits::CpuModified;
@ -485,8 +491,6 @@ void TextureCache::DeleteImage(ImageId image_id) {
ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked");
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
image.flags |= ImageFlagBits::Deleted;
// Remove any registered meta areas. // Remove any registered meta areas.
const auto& meta_info = image.info.meta_info; const auto& meta_info = image.info.meta_info;
if (meta_info.cmask_addr) { if (meta_info.cmask_addr) {

View File

@ -50,6 +50,9 @@ public:
/// Invalidates any image in the logical page range. /// Invalidates any image in the logical page range.
void InvalidateMemory(VAddr address, size_t size); void InvalidateMemory(VAddr address, size_t size);
/// Marks an image as dirty if it exists at the provided address.
void MarkWritten(VAddr address, size_t max_size);
/// Evicts any images that overlap the unmapped range. /// Evicts any images that overlap the unmapped range.
void UnmapMemory(VAddr cpu_addr, size_t size); void UnmapMemory(VAddr cpu_addr, size_t size);