diff --git a/.github/workflows/linux-qt.yml b/.github/workflows/linux-qt.yml index 06e048c0e..6848f203b 100644 --- a/.github/workflows/linux-qt.yml +++ b/.github/workflows/linux-qt.yml @@ -25,8 +25,27 @@ jobs: run: > sudo apt-get update && sudo apt install libx11-dev libxext-dev libwayland-dev libfuse2 clang build-essential qt6-base-dev qt6-tools-dev + - name: Cache CMake dependency source code + uses: actions/cache@v4 + env: + cache-name: ${{ runner.os }}-qt-cache-cmake-dependency-sources + with: + path: | + ${{github.workspace}}/build + key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + restore-keys: | + ${{ env.cache-name }}- + + - name: Cache CMake dependency build objects + uses: hendrikmuhs/ccache-action@v1.2.14 + env: + cache-name: ${{ runner.os }}-qt-cache-cmake-dependency-builds + with: + append-timestamp: false + key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + - name: Configure CMake - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_QT_GUI=ON + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_QT_GUI=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel @@ -44,4 +63,4 @@ jobs: uses: actions/upload-artifact@v4 with: name: shadps4-linux-qt-${{ steps.vars.outputs.date }}-${{ steps.vars.outputs.shorthash }} - path: Shadps4-qt.AppImage \ No newline at end of file + path: Shadps4-qt.AppImage diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index ee1340984..d4402472a 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -25,8 +25,27 @@ jobs: run: > sudo apt-get update && sudo apt install libx11-dev libxext-dev libwayland-dev libfuse2 clang build-essential + - name: Cache CMake dependency source code + uses: actions/cache@v4 + env: + cache-name: ${{ runner.os }}-sdl-cache-cmake-dependency-sources + with: + path: | + ${{github.workspace}}/build + key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + restore-keys: | + ${{ env.cache-name }}- + + - name: Cache CMake dependency build objects + uses: hendrikmuhs/ccache-action@v1.2.14 + env: + cache-name: ${{ runner.os }}-sdl-cache-cmake-dependency-builds + with: + append-timestamp: false + key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + - name: Configure CMake - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel diff --git a/.github/workflows/macos-qt.yml b/.github/workflows/macos-qt.yml index f04d3091c..beb927a79 100644 --- a/.github/workflows/macos-qt.yml +++ b/.github/workflows/macos-qt.yml @@ -40,8 +40,29 @@ jobs: arch: clang_64 archives: qtbase qttools + - name: Cache CMake dependency source code + uses: actions/cache@v4 + env: + cache-name: ${{ runner.os }}-qt-cache-cmake-dependency-sources + with: + path: | + ${{github.workspace}}/build + key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + restore-keys: | + ${{ env.cache-name }}- + + - name: Cache CMake dependency build objects + uses: hendrikmuhs/ccache-action@v1.2.14 + env: + cache-name: ${{runner.os}}-qt-cache-cmake-dependency-builds + with: + append-timestamp: false + create-symlink: true + key: ${{env.cache-name}}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + variant: sccache + - name: Configure CMake - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DENABLE_QT_GUI=ON + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DENABLE_QT_GUI=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 0eb0ad17a..9526c6fd6 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -31,8 +31,29 @@ jobs: arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" arch -x86_64 /usr/local/bin/brew install molten-vk + - name: Cache CMake dependency source code + uses: actions/cache@v4 + env: + cache-name: ${{ runner.os }}-sdl-cache-cmake-dependency-sources + with: + path: | + ${{github.workspace}}/build + key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + restore-keys: | + ${{ env.cache-name }}- + + - name: Cache CMake dependency build objects + uses: hendrikmuhs/ccache-action@v1.2.14 + env: + cache-name: ${{runner.os}}-sdl-cache-cmake-dependency-builds + with: + append-timestamp: false + create-symlink: true + key: ${{env.cache-name}}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + variant: sccache + - name: Configure CMake - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu) diff --git a/.github/workflows/windows-qt.yml b/.github/workflows/windows-qt.yml index 83b1a908b..fee202b5c 100644 --- a/.github/workflows/windows-qt.yml +++ b/.github/workflows/windows-qt.yml @@ -30,6 +30,17 @@ jobs: arch: win64_msvc2019_64 archives: qtbase qttools + - name: Cache CMake dependency source code + uses: actions/cache@v4 + env: + cache-name: ${{ runner.os }}-qt-cache-cmake-dependency-sources + with: + path: | + ${{github.workspace}}/build + key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + restore-keys: | + ${{ env.cache-name }}- + - name: Configure CMake run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -T ClangCL -DENABLE_QT_GUI=ON diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 413277927..4bea63b16 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -20,6 +20,17 @@ jobs: with: submodules: recursive + - name: Cache CMake dependency source code + uses: actions/cache@v4 + env: + cache-name: ${{ runner.os }}-sdl-cache-cmake-dependency-sources + with: + path: | + ${{github.workspace}}/build + key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + restore-keys: | + ${{ env.cache-name }}- + - name: Configure CMake run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -T ClangCL diff --git a/README.md b/README.md index db898e565..5cf307e33 100644 --- a/README.md +++ b/README.md @@ -159,6 +159,20 @@ Open a PR and we'll check it :) + +# Special Thanks + +A few noteworthy teams/projects who've helped us along the way are: + +- [**Panda3DS**](https://github.com/wheremyfoodat/Panda3DS): A multiplatform 3DS emulator from our co-author wheremyfoodat. They have been incredibly helpful in understanding and solving problems that came up from natively executing the x64 code of PS4 binaries + +- [**fpPS4**](https://github.com/red-prig/fpPS4): The fpPS4 team has assisted massively with understanding some of the more complex parts of the PS4 operating system and libraries, by helping with reverse engineering work and research. + +- **yuzu**: Our shader compiler has been designed with yuzu's Hades compiler as a blueprint. This allowed us to focus on the challenges of emulating a modern AMD GPU while having a high-quality optimizing shader compiler implementation as a base. + +- [**hydra**](https://github.com/hydra-emu/hydra): A multisystem, multiplatform emulator (chip-8, GB, NES, N64) from Paris. + + # Sister Projects - [**Panda3DS**](https://github.com/wheremyfoodat/Panda3DS): A multiplatform 3DS emulator from our co-author wheremyfoodat. diff --git a/src/core/cpu_patches.cpp b/src/core/cpu_patches.cpp index f31ff18cb..91b3bcd40 100644 --- a/src/core/cpu_patches.cpp +++ b/src/core/cpu_patches.cpp @@ -315,14 +315,12 @@ static void GenerateBLSI(const ZydisDecodedOperand* operands, Xbyak::CodeGenerat SaveRegisters(c, {scratch}); // BLSI sets CF to zero if source is zero, otherwise it sets CF to one. - Xbyak::Label set_carry, clear_carry, end; + Xbyak::Label clear_carry, end; c.mov(scratch, *src); c.neg(scratch); // NEG, like BLSI, clears CF if the source is zero and sets it otherwise - c.jc(set_carry); - c.jmp(clear_carry); + c.jnc(clear_carry); - c.L(set_carry); c.and_(scratch, *src); c.stc(); // setting/clearing carry needs to happen after the AND because that clears CF c.jmp(end); @@ -345,15 +343,13 @@ static void GenerateBLSMSK(const ZydisDecodedOperand* operands, Xbyak::CodeGener SaveRegisters(c, {scratch}); - Xbyak::Label set_carry, clear_carry, end; + Xbyak::Label clear_carry, end; // BLSMSK sets CF to zero if source is NOT zero, otherwise it sets CF to one. c.mov(scratch, *src); c.test(scratch, scratch); - c.jz(set_carry); - c.jmp(clear_carry); + c.jnz(clear_carry); - c.L(set_carry); c.dec(scratch); c.xor_(scratch, *src); c.stc(); @@ -378,15 +374,13 @@ static void GenerateBLSR(const ZydisDecodedOperand* operands, Xbyak::CodeGenerat SaveRegisters(c, {scratch}); - Xbyak::Label set_carry, clear_carry, end; + Xbyak::Label clear_carry, end; // BLSR sets CF to zero if source is NOT zero, otherwise it sets CF to one. c.mov(scratch, *src); c.test(scratch, scratch); - c.jz(set_carry); - c.jmp(clear_carry); + c.jnz(clear_carry); - c.L(set_carry); c.dec(scratch); c.and_(scratch, *src); c.stc(); diff --git a/src/core/libraries/kernel/cpu_management.cpp b/src/core/libraries/kernel/cpu_management.cpp index 93dc60bd0..3bf609dfe 100644 --- a/src/core/libraries/kernel/cpu_management.cpp +++ b/src/core/libraries/kernel/cpu_management.cpp @@ -8,7 +8,7 @@ namespace Libraries::Kernel { int PS4_SYSV_ABI sceKernelIsNeoMode() { - LOG_INFO(Kernel_Sce, "called"); + LOG_DEBUG(Kernel_Sce, "called"); return Config::isNeoMode(); } diff --git a/src/core/libraries/kernel/event_flag/event_flag.cpp b/src/core/libraries/kernel/event_flag/event_flag.cpp index ec5d6ded2..4d3925127 100644 --- a/src/core/libraries/kernel/event_flag/event_flag.cpp +++ b/src/core/libraries/kernel/event_flag/event_flag.cpp @@ -78,7 +78,7 @@ int PS4_SYSV_ABI sceKernelCloseEventFlag() { return ORBIS_OK; } int PS4_SYSV_ABI sceKernelClearEventFlag(OrbisKernelEventFlag ef, u64 bitPattern) { - LOG_INFO(Kernel_Event, "called"); + LOG_DEBUG(Kernel_Event, "called"); ef->Clear(bitPattern); return ORBIS_OK; } @@ -97,7 +97,7 @@ int PS4_SYSV_ABI sceKernelSetEventFlag(OrbisKernelEventFlag ef, u64 bitPattern) } int PS4_SYSV_ABI sceKernelPollEventFlag(OrbisKernelEventFlag ef, u64 bitPattern, u32 waitMode, u64* pResultPat) { - LOG_INFO(Kernel_Event, "called bitPattern = {:#x} waitMode = {:#x}", bitPattern, waitMode); + LOG_DEBUG(Kernel_Event, "called bitPattern = {:#x} waitMode = {:#x}", bitPattern, waitMode); if (ef == nullptr) { return ORBIS_KERNEL_ERROR_ESRCH; diff --git a/src/core/libraries/kernel/libkernel.h b/src/core/libraries/kernel/libkernel.h index c28a548ff..73705cdc2 100644 --- a/src/core/libraries/kernel/libkernel.h +++ b/src/core/libraries/kernel/libkernel.h @@ -33,6 +33,8 @@ typedef struct { } OrbisKernelUuid; int* PS4_SYSV_ABI __Error(); +int PS4_SYSV_ABI sceKernelConvertUtcToLocaltime(time_t time, time_t* local_time, + struct OrbisTimesec* st, unsigned long* dst_sec); int PS4_SYSV_ABI sceKernelGetCompiledSdkVersion(int* ver); void LibKernel_Register(Core::Loader::SymbolsResolver* sym); diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index 919afcb47..8f97ed879 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -295,7 +295,7 @@ ScePthread PS4_SYSV_ABI scePthreadSelf() { int PS4_SYSV_ABI scePthreadAttrSetaffinity(ScePthreadAttr* pattr, const /*SceKernelCpumask*/ u64 mask) { - LOG_INFO(Kernel_Pthread, "called"); + LOG_DEBUG(Kernel_Pthread, "called"); if (pattr == nullptr || *pattr == nullptr) { return SCE_KERNEL_ERROR_EINVAL; @@ -387,7 +387,7 @@ int PS4_SYSV_ABI posix_pthread_attr_setstacksize(ScePthreadAttr* attr, size_t st } int PS4_SYSV_ABI scePthreadSetaffinity(ScePthread thread, const /*SceKernelCpumask*/ u64 mask) { - LOG_INFO(Kernel_Pthread, "called"); + LOG_DEBUG(Kernel_Pthread, "called"); if (thread == nullptr) { return SCE_KERNEL_ERROR_ESRCH; diff --git a/src/core/libraries/kernel/time_management.h b/src/core/libraries/kernel/time_management.h index a28f8c133..a28e6e558 100644 --- a/src/core/libraries/kernel/time_management.h +++ b/src/core/libraries/kernel/time_management.h @@ -3,6 +3,8 @@ #pragma once +#include + #include "common/types.h" namespace Core::Loader { @@ -50,7 +52,10 @@ u64 PS4_SYSV_ABI sceKernelGetProcessTime(); u64 PS4_SYSV_ABI sceKernelGetProcessTimeCounter(); u64 PS4_SYSV_ABI sceKernelGetProcessTimeCounterFrequency(); u64 PS4_SYSV_ABI sceKernelReadTsc(); - +int PS4_SYSV_ABI sceKernelClockGettime(s32 clock_id, OrbisKernelTimespec* tp); +s32 PS4_SYSV_ABI sceKernelGettimezone(OrbisKernelTimezone* tz); +int PS4_SYSV_ABI sceKernelConvertLocaltimeToUtc(time_t param_1, int64_t param_2, time_t* seconds, + OrbisKernelTimezone* timezone, int* dst_seconds); void timeSymbolsRegister(Core::Loader::SymbolsResolver* sym); } // namespace Libraries::Kernel diff --git a/src/core/libraries/rtc/rtc.cpp b/src/core/libraries/rtc/rtc.cpp index 387a8558b..7a46a1e31 100644 --- a/src/core/libraries/rtc/rtc.cpp +++ b/src/core/libraries/rtc/rtc.cpp @@ -5,156 +5,827 @@ #include "common/logging/log.h" #include "core/libraries/error_codes.h" +#include "core/libraries/kernel/libkernel.h" +#include "core/libraries/kernel/time_management.h" #include "core/libraries/libs.h" #include "rtc.h" #include "rtc_error.h" namespace Libraries::Rtc { -int PS4_SYSV_ABI sceRtcCheckValid() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcCheckValid(OrbisRtcDateTime* pTime) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTime == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + if (pTime->year == 0 || pTime->year > 9999) + return ORBIS_RTC_ERROR_INVALID_YEAR; + + if (pTime->month == 0 || pTime->month > 12) + return ORBIS_RTC_ERROR_INVALID_MONTH; + + if (pTime->day == 0) + return ORBIS_RTC_ERROR_INVALID_DAY; + + using namespace std::chrono; + year chronoYear = year(pTime->year); + month chronoMonth = month(pTime->month); + int lastDay = + static_cast(unsigned(year_month_day_last{chronoYear / chronoMonth / last}.day())); + + if (pTime->day > lastDay) + return ORBIS_RTC_ERROR_INVALID_DAY; + + if (pTime->hour >= 24) + return ORBIS_RTC_ERROR_INVALID_HOUR; + + if (pTime->minute >= 60) + return ORBIS_RTC_ERROR_INVALID_MINUTE; + + if (pTime->second >= 60) + return ORBIS_RTC_ERROR_INVALID_SECOND; + + if (pTime->microsecond >= 1000000) + return ORBIS_RTC_ERROR_INVALID_MICROSECOND; + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcCompareTick() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcCompareTick(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick1 == nullptr || pTick2 == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + if (pTick1->tick <= pTick2->tick) + return 1; + else + return 0; + + return ORBIS_FAIL; } -int PS4_SYSV_ABI sceRtcConvertLocalTimeToUtc() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcConvertLocalTimeToUtc(OrbisRtcTick* pTickLocal, OrbisRtcTick* pTickUtc) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTickLocal == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + time_t seconds; + Kernel::OrbisKernelTimezone timezone; + + int convertValue = Kernel::sceKernelConvertLocaltimeToUtc( + (pTickLocal->tick - UNIX_EPOCH_TICKS) / 1000000, 0xffffffff, &seconds, &timezone, 0); + + if (convertValue >= 0) { + convertValue = sceRtcTickAddMinutes( + pTickUtc, pTickLocal, -(((timezone.tz_dsttime * 60) - timezone.tz_minuteswest))); + } + + return convertValue; } -int PS4_SYSV_ABI sceRtcConvertUtcToLocalTime() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcConvertUtcToLocalTime(OrbisRtcTick* pTickUtc, OrbisRtcTick* pTickLocal) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTickUtc == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + Kernel::OrbisKernelTimezone timeZone; + int returnValue = Kernel::sceKernelGettimezone(&timeZone); + + sceRtcTickAddMinutes(pTickLocal, pTickUtc, + -(timeZone.tz_minuteswest - (timeZone.tz_dsttime * 60))); + + return 0; } int PS4_SYSV_ABI sceRtcEnd() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; + return SCE_OK; } -int PS4_SYSV_ABI sceRtcFormatRFC2822() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcFormatRFC2822(char* pszDateTime, const OrbisRtcTick* pTickUtc, + int iTimeZoneMinutes) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pszDateTime == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + OrbisRtcTick formatTick; + + if (pTickUtc == nullptr) { + sceRtcGetCurrentTick(&formatTick); + } else { + formatTick.tick = pTickUtc->tick; + } + + sceRtcTickAddMinutes(&formatTick, &formatTick, iTimeZoneMinutes); + + OrbisRtcDateTime formatTime; + sceRtcSetTick(&formatTime, &formatTick); + + int validTime = sceRtcCheckValid(&formatTime); + + std::string formattedString; + + if (validTime >= 0) { + int weekDay = sceRtcGetDayOfWeek(formatTime.year, formatTime.month, formatTime.day); + switch (weekDay) { + case 0: + formattedString = "Sun, "; + break; + case 1: + formattedString = "Mon, "; + break; + case 2: + formattedString = "Tue, "; + break; + case 3: + formattedString = "Wed, "; + break; + case 4: + formattedString = "Thu, "; + break; + case 5: + formattedString = "Fri, "; + break; + case 6: + formattedString = "Sat, "; + break; + } + + if (formatTime.day < 10) { + formattedString += "0" + std::to_string(formatTime.day) + " "; + } else { + formattedString += std::to_string(formatTime.day) + " "; + } + + switch (formatTime.month) { + case 1: + formattedString += "Jan "; + break; + case 2: + formattedString += "Feb "; + break; + case 3: + formattedString += "Mar "; + break; + case 4: + formattedString += "Apr "; + break; + case 5: + formattedString += "May "; + break; + case 6: + formattedString += "Jun "; + break; + case 7: + formattedString += "Jul "; + break; + case 8: + formattedString += "Aug "; + break; + case 9: + formattedString += "Sep "; + break; + case 10: + formattedString += "Oct "; + break; + case 11: + formattedString += "Nov "; + break; + case 12: + formattedString += "Dec "; + break; + } + + formattedString += std::to_string(formatTime.year) + " "; + + if (formatTime.hour < 10) { + formattedString += "0" + std::to_string(formatTime.hour) + ":"; + } else { + formattedString += std::to_string(formatTime.hour) + ":"; + } + + if (formatTime.minute < 10) { + formattedString += "0" + std::to_string(formatTime.minute) + ":"; + } else { + formattedString += std::to_string(formatTime.minute) + ":"; + } + + if (formatTime.second < 10) { + formattedString += "0" + std::to_string(formatTime.second) + " "; + } else { + formattedString += std::to_string(formatTime.second) + " "; + } + + if (iTimeZoneMinutes == 0) { + formattedString += "+0000"; + } else { + int timeZoneHours = iTimeZoneMinutes / 60; + int timeZoneRemainder = iTimeZoneMinutes % 60; + + if (timeZoneHours < 0) { + formattedString += "-"; + timeZoneHours *= -1; + } else { + formattedString += "+"; + } + + if (timeZoneHours < 10) { + formattedString += "0" + std::to_string(timeZoneHours); + } else { + formattedString += std::to_string(timeZoneHours); + } + + if (timeZoneRemainder == 0) { + formattedString += "00"; + } else { + if (timeZoneRemainder < 0) + timeZoneRemainder *= -1; + formattedString += std::to_string(timeZoneRemainder); + } + } + + for (int i = 0; i < formattedString.size() + 1; ++i) { + pszDateTime[i] = formattedString.c_str()[i]; + } + } + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcFormatRFC2822LocalTime() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcFormatRFC2822LocalTime(char* pszDateTime, const OrbisRtcTick* pTickUtc) { + LOG_TRACE(Lib_Rtc, "called"); + + Kernel::OrbisKernelTimezone timeZone; + Kernel::sceKernelGettimezone(&timeZone); + + return sceRtcFormatRFC2822(pszDateTime, pTickUtc, + -(timeZone.tz_minuteswest - (timeZone.tz_dsttime * 60))); } -int PS4_SYSV_ABI sceRtcFormatRFC3339() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcFormatRFC3339(char* pszDateTime, const OrbisRtcTick* pTickUtc, + int iTimeZoneMinutes) { + LOG_TRACE(Lib_Rtc, "called"); + return sceRtcFormatRFC3339Precise(pszDateTime, pTickUtc, iTimeZoneMinutes); } -int PS4_SYSV_ABI sceRtcFormatRFC3339LocalTime() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcFormatRFC3339LocalTime(char* pszDateTime, const OrbisRtcTick* pTickUtc) { + LOG_TRACE(Lib_Rtc, "called"); + + Kernel::OrbisKernelTimezone timeZone; + Kernel::sceKernelGettimezone(&timeZone); + + return sceRtcFormatRFC3339(pszDateTime, pTickUtc, + -(timeZone.tz_minuteswest - (timeZone.tz_dsttime * 60))); } -int PS4_SYSV_ABI sceRtcFormatRFC3339Precise() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcFormatRFC3339Precise(char* pszDateTime, const OrbisRtcTick* pTickUtc, + int iTimeZoneMinutes) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pszDateTime == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + OrbisRtcTick formatTick; + + if (pTickUtc == nullptr) { + sceRtcGetCurrentTick(&formatTick); + } else { + formatTick.tick = pTickUtc->tick; + } + + sceRtcTickAddMinutes(&formatTick, &formatTick, iTimeZoneMinutes); + + OrbisRtcDateTime formatTime; + + sceRtcSetTick(&formatTime, &formatTick); + + std::string formattedString; + formattedString = std::to_string(formatTime.year) + "-"; + + if (formatTime.month < 10) { + formattedString += "0" + std::to_string(formatTime.month) + "-"; + } else { + formattedString += std::to_string(formatTime.month) + "-"; + } + + if (formatTime.day < 10) { + formattedString += "0" + std::to_string(formatTime.day) + "T"; + } else { + formattedString += std::to_string(formatTime.day) + "T"; + } + + if (formatTime.hour < 10) { + formattedString += "0" + std::to_string(formatTime.hour) + ":"; + } else { + formattedString += std::to_string(formatTime.hour) + ":"; + } + + if (formatTime.minute < 10) { + formattedString += "0" + std::to_string(formatTime.minute) + ":"; + } else { + formattedString += std::to_string(formatTime.minute) + ":"; + } + + if (formatTime.second < 10) { + formattedString += "0" + std::to_string(formatTime.second); + } else { + formattedString += std::to_string(formatTime.second); + } + + if (formatTime.microsecond != 0) { + formattedString += "." + std::to_string(formatTime.microsecond / 1000).substr(0, 2); + } else { + formattedString += ".00"; + } + + if (iTimeZoneMinutes == 0) { + formattedString += "Z"; + } else { + int timeZoneHours = iTimeZoneMinutes / 60; + int timeZoneRemainder = iTimeZoneMinutes % 60; + + if (timeZoneHours < 0) { + formattedString += "-"; + timeZoneHours *= -1; + } else { + formattedString += "+"; + } + + if (timeZoneHours < 10) { + formattedString += "0" + std::to_string(timeZoneHours); + } else { + formattedString += std::to_string(timeZoneHours); + } + + if (timeZoneRemainder == 0) { + formattedString += ":00"; + } else { + if (timeZoneRemainder < 0) + timeZoneRemainder *= -1; + formattedString += ":" + std::to_string(timeZoneRemainder); + } + } + + for (int i = 0; i < formattedString.size() + 1; ++i) { + pszDateTime[i] = formattedString.c_str()[i]; + } + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcFormatRFC3339PreciseLocalTime() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcFormatRFC3339PreciseLocalTime(char* pszDateTime, + const OrbisRtcTick* pTickUtc) { + LOG_TRACE(Lib_Rtc, "called"); + + Kernel::OrbisKernelTimezone timeZone; + Kernel::sceKernelGettimezone(&timeZone); + + return sceRtcFormatRFC3339Precise(pszDateTime, pTickUtc, + -(timeZone.tz_minuteswest - (timeZone.tz_dsttime * 60))); } -int PS4_SYSV_ABI sceRtcGetCurrentAdNetworkTick() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetCurrentAdNetworkTick(OrbisRtcTick* pTick) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + Kernel::OrbisKernelTimespec clocktime; + int returnValue = Kernel::sceKernelClockGettime(Kernel::ORBIS_CLOCK_REALTIME, &clocktime); + + if (returnValue == SCE_OK) { + pTick->tick = clocktime.tv_nsec / 1000 + clocktime.tv_sec * 1000000 + UNIX_EPOCH_TICKS; + } else { + return ORBIS_RTC_ERROR_NOT_INITIALIZED; + } + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcGetCurrentClock() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetCurrentClock(OrbisRtcDateTime* pTime, int timeZone) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTime == nullptr) + return ORBIS_RTC_ERROR_DATETIME_UNINITIALIZED; + + Kernel::OrbisKernelTimespec clocktime; + int returnValue = Kernel::sceKernelClockGettime(Kernel::ORBIS_CLOCK_REALTIME, &clocktime); + + if (returnValue == SCE_OK) { + OrbisRtcTick clockTick; + clockTick.tick = clocktime.tv_nsec / 1000 + clocktime.tv_sec * 1000000 + UNIX_EPOCH_TICKS; + + sceRtcTickAddMinutes(&clockTick, &clockTick, timeZone); + sceRtcSetTick(pTime, &clockTick); + } + + return returnValue; } -int PS4_SYSV_ABI sceRtcGetCurrentClockLocalTime() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetCurrentClockLocalTime(OrbisRtcDateTime* pTime) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTime == nullptr) + return ORBIS_RTC_ERROR_DATETIME_UNINITIALIZED; + + Kernel::OrbisKernelTimezone timeZone; + int returnValue = Kernel::sceKernelGettimezone(&timeZone); + + if (returnValue >= 0) { + Kernel::OrbisKernelTimespec clocktime; + + // calculate total timezone offset for converting UTC to local time + uint64_t tzOffset = -(timeZone.tz_minuteswest - (timeZone.tz_dsttime * 60)); + + if (returnValue >= 0) { + OrbisRtcTick newTick; + sceRtcGetCurrentTick(&newTick); + sceRtcTickAddMinutes(&newTick, &newTick, tzOffset); + sceRtcSetTick(pTime, &newTick); + } + } + + return returnValue; } -int PS4_SYSV_ABI sceRtcGetCurrentDebugNetworkTick() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetCurrentDebugNetworkTick(OrbisRtcTick* pTick) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + Kernel::OrbisKernelTimespec clocktime; + int returnValue = Kernel::sceKernelClockGettime(Kernel::ORBIS_CLOCK_REALTIME, &clocktime); + + if (returnValue == SCE_OK) { + pTick->tick = clocktime.tv_nsec / 1000 + clocktime.tv_sec * 1000000 + UNIX_EPOCH_TICKS; + } else { + return ORBIS_RTC_ERROR_NOT_INITIALIZED; + } + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcGetCurrentNetworkTick() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetCurrentNetworkTick(OrbisRtcTick* pTick) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + Kernel::OrbisKernelTimespec clocktime; + int returnValue = Kernel::sceKernelClockGettime(Kernel::ORBIS_CLOCK_REALTIME, &clocktime); + + if (returnValue == SCE_OK) { + pTick->tick = clocktime.tv_nsec / 1000 + clocktime.tv_sec * 1000000 + UNIX_EPOCH_TICKS; + } else { + return ORBIS_RTC_ERROR_NOT_INITIALIZED; + } + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcGetCurrentRawNetworkTick() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetCurrentRawNetworkTick(OrbisRtcTick* pTick) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + Kernel::OrbisKernelTimespec clocktime; + int returnValue = Kernel::sceKernelClockGettime(Kernel::ORBIS_CLOCK_REALTIME, &clocktime); + + if (returnValue == SCE_OK) { + pTick->tick = clocktime.tv_nsec / 1000 + clocktime.tv_sec * 1000000 + UNIX_EPOCH_TICKS; + } else { + return ORBIS_RTC_ERROR_NOT_INITIALIZED; + } + + return SCE_OK; } int PS4_SYSV_ABI sceRtcGetCurrentTick(OrbisRtcTick* pTick) { - pTick->tick = std::chrono::duration_cast( - std::chrono::high_resolution_clock::now().time_since_epoch()) - .count(); - return ORBIS_OK; + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick == nullptr) + return ORBIS_RTC_ERROR_DATETIME_UNINITIALIZED; + + Kernel::OrbisKernelTimespec clocktime; + int returnValue = Kernel::sceKernelClockGettime(Kernel::ORBIS_CLOCK_REALTIME, &clocktime); + + if (returnValue >= 0) { + pTick->tick = clocktime.tv_nsec / 1000 + clocktime.tv_sec * 1000000 + UNIX_EPOCH_TICKS; + } + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcGetDayOfWeek() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetDayOfWeek(int year, int month, int day) { + LOG_TRACE(Lib_Rtc, "called"); + + int sdk_version = 0; + int sdkResult = Kernel::sceKernelGetCompiledSdkVersion(&sdk_version); + if (sdkResult != ORBIS_OK) { + sdk_version = 0; + } + + if (sdk_version < 0x3000000) { + if (year < 1) { + return ORBIS_RTC_ERROR_INVALID_YEAR; + } + if (month > 12 || month <= 0) { + return ORBIS_RTC_ERROR_INVALID_MONTH; + } + } else { + if (year > 9999 || year < 1) { + return ORBIS_RTC_ERROR_INVALID_YEAR; + } + if (month > 12 || month <= 0) { + return ORBIS_RTC_ERROR_INVALID_MONTH; + } + } + + int daysInMonth = sceRtcGetDaysInMonth(year, month); + + if (day <= 0 || day > daysInMonth) + return ORBIS_RTC_ERROR_INVALID_DAY; + + std::chrono::sys_days chrono_time{std::chrono::year(year) / std::chrono::month(month) / + std::chrono::day(day)}; + std::chrono::weekday chrono_weekday{chrono_time}; + + return chrono_weekday.c_encoding(); } -int PS4_SYSV_ABI sceRtcGetDaysInMonth() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetDaysInMonth(int year, int month) { + LOG_TRACE(Lib_Rtc, "called"); + + if (year <= 0) + return ORBIS_RTC_ERROR_INVALID_YEAR; + + if (month <= 0 || month > 12) + return ORBIS_RTC_ERROR_INVALID_MONTH; + + std::chrono::year chronoYear = std::chrono::year(year); + std::chrono::month chronoMonth = std::chrono::month(month); + int lastDay = static_cast(unsigned( + std::chrono::year_month_day_last{chronoYear / chronoMonth / std::chrono::last}.day())); + + return lastDay; } -int PS4_SYSV_ABI sceRtcGetDosTime() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetDosTime(OrbisRtcDateTime* pTime, unsigned int* dosTime) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTime == nullptr || dosTime == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + int isValid = sceRtcCheckValid(pTime); + if (isValid != SCE_OK) { + return isValid; + } + + *dosTime |= (pTime->second / 2) & 0x1F; + *dosTime |= (pTime->minute & 0x3F) << 5; + *dosTime |= (pTime->hour & 0x1F) << 11; + *dosTime |= (pTime->day & 0x1F) << 16; + *dosTime |= (pTime->month & 0x0F) << 21; + *dosTime |= ((pTime->year - 1980) & 0x7F) << 25; + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcGetTick() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetTick(OrbisRtcDateTime* pTime, OrbisRtcTick* pTick) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTime == nullptr || pTick == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + int isTimeValid = sceRtcCheckValid(pTime); + if (isTimeValid != 0) + return isTimeValid; + + if (pTime->month > 2) { + pTime->month -= 3; + } else { + pTime->month += 9; + pTime->year -= 1; + } + + int c = pTime->year / 100; + int ya = pTime->year - 100 * c; + + u64 days; + u64 msec; + + days = ((146097 * c) >> 2) + ((1461 * ya) >> 2) + (153 * pTime->month + 2) / 5 + pTime->day; + days -= 307; + days *= 86400000000; + + msec = pTime->hour * 3600000000 + pTime->minute * 60000000 + pTime->second * 1000000 + + pTime->microsecond; + + pTick->tick = days + msec; + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcGetTickResolution() { +unsigned int PS4_SYSV_ABI sceRtcGetTickResolution() { + LOG_TRACE(Lib_Rtc, "called"); + return 1000000; } -int PS4_SYSV_ABI sceRtcGetTime_t() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetTime_t(OrbisRtcDateTime* pTime, time_t* llTime) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTime == nullptr || llTime == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + int isValid = sceRtcCheckValid(pTime); + if (isValid != SCE_OK) { + return isValid; + } + + OrbisRtcTick timeTick; + sceRtcGetTick(pTime, &timeTick); + + if (timeTick.tick < UNIX_EPOCH_TICKS) { + *llTime = 0; + } else { + *llTime = (timeTick.tick - UNIX_EPOCH_TICKS) / 1000000; + } + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcGetWin32FileTime() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetWin32FileTime(OrbisRtcDateTime* pTime, uint64_t* ulWin32Time) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTime == nullptr || ulWin32Time == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + int isValid = sceRtcCheckValid(pTime); + if (isValid != SCE_OK) { + return isValid; + } + + OrbisRtcTick timeTick; + sceRtcGetTick(pTime, &timeTick); + + if (timeTick.tick < WIN32_FILETIME_EPOCH_TICKS) { + *ulWin32Time = 0; + } else { + *ulWin32Time = (timeTick.tick - WIN32_FILETIME_EPOCH_TICKS) * 10; + } + + return SCE_OK; } int PS4_SYSV_ABI sceRtcInit() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; + return SCE_OK; } -int PS4_SYSV_ABI sceRtcIsLeapYear() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcIsLeapYear(int yearInt) { + LOG_TRACE(Lib_Rtc, "called"); + + if (yearInt < 1) + return ORBIS_RTC_ERROR_INVALID_YEAR; + + using namespace std::chrono; + + year_month_day_last ymdl{year(yearInt) / February / last}; + return (ymdl.day() == 29d); } -int PS4_SYSV_ABI sceRtcParseDateTime() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int GetMonthFromString(std::string monthStr) { + if (monthStr == "Jan") + return 1; + + if (monthStr == "Feb") + return 2; + + if (monthStr == "Mar") + return 3; + + if (monthStr == "Apr") + return 4; + + if (monthStr == "May") + return 5; + + if (monthStr == "Jun") + return 6; + + if (monthStr == "Jul") + return 7; + + if (monthStr == "Aug") + return 8; + + if (monthStr == "Sep") + return 9; + + if (monthStr == "Oct") + return 10; + + if (monthStr == "Nov") + return 11; + + if (monthStr == "Dec") + return 12; + + return 1; } -int PS4_SYSV_ABI sceRtcParseRFC3339() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcParseDateTime(OrbisRtcTick* pTickUtc, const char* pszDateTime) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTickUtc == nullptr || pszDateTime == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + std::string dateTimeString = std::string(pszDateTime); + + char formatKey = dateTimeString[22]; + OrbisRtcDateTime dateTime; + + if (formatKey == 'Z' || formatKey == '-' || formatKey == '+') { + // RFC3339 + sceRtcParseRFC3339(pTickUtc, pszDateTime); + } else if (formatKey == ':') { + // RFC2822 + dateTime.day = std::stoi(dateTimeString.substr(5, 2)); + dateTime.month = GetMonthFromString(dateTimeString.substr(8, 3)); + dateTime.year = std::stoi(dateTimeString.substr(12, 4)); + dateTime.hour = std::stoi(dateTimeString.substr(17, 2)); + dateTime.minute = std::stoi(dateTimeString.substr(20, 2)); + dateTime.second = std::stoi(dateTimeString.substr(23, 2)); + dateTime.microsecond = 0; + + sceRtcGetTick(&dateTime, pTickUtc); + + if (dateTimeString[26] == '+') { + int timeZoneOffset = std::stoi(dateTimeString.substr(27, 2)) * 60; + timeZoneOffset += std::stoi(dateTimeString.substr(29, 2)); + sceRtcTickAddMinutes(pTickUtc, pTickUtc, timeZoneOffset); + } else if (dateTimeString[26] == '-') { + int timeZoneOffset = std::stoi(dateTimeString.substr(27, 2)) * 60; + timeZoneOffset += std::stoi(dateTimeString.substr(29, 2)); + timeZoneOffset *= -1; + sceRtcTickAddMinutes(pTickUtc, pTickUtc, timeZoneOffset); + } + + } else { + // asctime + dateTime.month = GetMonthFromString(dateTimeString.substr(4, 3)); + dateTime.day = std::stoi(dateTimeString.substr(8, 2)); + dateTime.hour = std::stoi(dateTimeString.substr(11, 2)); + dateTime.minute = std::stoi(dateTimeString.substr(14, 2)); + dateTime.second = std::stoi(dateTimeString.substr(17, 2)); + dateTime.year = std::stoi(dateTimeString.substr(20, 4)); + dateTime.microsecond = 0; + + sceRtcGetTick(&dateTime, pTickUtc); + } + + return SCE_OK; +} + +int PS4_SYSV_ABI sceRtcParseRFC3339(OrbisRtcTick* pTickUtc, const char* pszDateTime) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTickUtc == nullptr || pszDateTime == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + std::string dateTimeString = std::string(pszDateTime); + + OrbisRtcDateTime dateTime; + dateTime.year = std::stoi(dateTimeString.substr(0, 4)); + dateTime.month = std::stoi(dateTimeString.substr(5, 2)); + dateTime.day = std::stoi(dateTimeString.substr(8, 2)); + dateTime.hour = std::stoi(dateTimeString.substr(11, 2)); + dateTime.minute = std::stoi(dateTimeString.substr(14, 2)); + dateTime.second = std::stoi(dateTimeString.substr(17, 2)); + dateTime.microsecond = std::stoi(dateTimeString.substr(20, 2)); + + sceRtcGetTick(&dateTime, pTickUtc); + + if (dateTimeString[22] != 'Z') { + if (dateTimeString[22] == '-') { + int timeZoneOffset = std::stoi(dateTimeString.substr(23, 2)) * 60; + timeZoneOffset += std::stoi(dateTimeString.substr(26, 2)); + timeZoneOffset *= -1; + sceRtcTickAddMinutes(pTickUtc, pTickUtc, timeZoneOffset); + } else if (dateTimeString[22] == '+') { + int timeZoneOffset = std::stoi(dateTimeString.substr(23, 2)) * 60; + timeZoneOffset += std::stoi(dateTimeString.substr(26, 2)); + sceRtcTickAddMinutes(pTickUtc, pTickUtc, timeZoneOffset); + } + } + + return SCE_OK; } int PS4_SYSV_ABI sceRtcSetConf() { @@ -162,89 +833,294 @@ int PS4_SYSV_ABI sceRtcSetConf() { return ORBIS_OK; } -int PS4_SYSV_ABI sceRtcSetCurrentAdNetworkTick() { +int PS4_SYSV_ABI sceRtcSetCurrentAdNetworkTick(OrbisRtcTick* pTick) { LOG_ERROR(Lib_Rtc, "(STUBBED) called"); return ORBIS_OK; } -int PS4_SYSV_ABI sceRtcSetCurrentDebugNetworkTick() { +int PS4_SYSV_ABI sceRtcSetCurrentDebugNetworkTick(OrbisRtcTick* pTick) { LOG_ERROR(Lib_Rtc, "(STUBBED) called"); return ORBIS_OK; } -int PS4_SYSV_ABI sceRtcSetCurrentNetworkTick() { +int PS4_SYSV_ABI sceRtcSetCurrentNetworkTick(OrbisRtcTick* pTick) { LOG_ERROR(Lib_Rtc, "(STUBBED) called"); return ORBIS_OK; } -int PS4_SYSV_ABI sceRtcSetCurrentTick() { +int PS4_SYSV_ABI sceRtcSetCurrentTick(OrbisRtcTick* pTick) { LOG_ERROR(Lib_Rtc, "(STUBBED) called"); return ORBIS_OK; } -int PS4_SYSV_ABI sceRtcSetDosTime() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcSetDosTime(OrbisRtcDateTime* pTime, u32 dosTime) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTime == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + pTime->microsecond = 0; + pTime->second = (dosTime << 1) & 0x3e; + pTime->minute = (dosTime >> 5) & 0x3f; + pTime->hour = (dosTime & 0xf800) >> 0xb; + + int16_t days = dosTime >> 0x10; + + pTime->day = days & 0x1f; + pTime->month = (days >> 5) & 0x0f; + pTime->year = (days >> 9) + 1980; + return SCE_OK; } -int PS4_SYSV_ABI sceRtcSetTick() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcSetTick(OrbisRtcDateTime* pTime, OrbisRtcTick* pTick) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTime == nullptr || pTick == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + u32 ly, ld, lm, j; + u64 days, msec; + + days = pTick->tick / 86400000000; + msec = pTick->tick % 86400000000; + + days += 307; + + j = (days << 2) - 1; + ly = j / 146097; + + j -= (146097 * ly); + ld = j >> 2; + + j = ((ld << 2) + 3) / 1461; + ld = (((ld << 2) + 7) - 1461 * j) >> 2; + + lm = (5 * ld - 3) / 153; + ld = (5 * ld + 2 - 153 * lm) / 5; + ly = 100 * ly + j; + + if (lm < 10) { + lm += 3; + } else { + lm -= 9; + ly++; + } + + pTime->year = ly; + pTime->month = lm; + pTime->day = ld; + + pTime->hour = msec / 3600000000; + msec %= 3600000000; + pTime->minute = msec / 60000000; + msec %= 60000000; + pTime->second = msec / 1000000; + msec %= 1000000; + pTime->microsecond = msec; + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcSetTime_t() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcSetTime_t(OrbisRtcDateTime* pTime, time_t llTime) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTime == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + int sdk_version; + int sdkResult = Kernel::sceKernelGetCompiledSdkVersion(&sdk_version); + if (sdkResult != ORBIS_OK) { + sdk_version = 0; + } + + OrbisRtcTick newTick; + if (sdk_version < 0x3000000) { + newTick.tick = (llTime & 0xffffffff) * 1000000; + } else { + if (llTime < 0) { + return ORBIS_RTC_ERROR_INVALID_VALUE; + } + newTick.tick = llTime * 1000000; + } + + newTick.tick += UNIX_EPOCH_TICKS; + sceRtcSetTick(pTime, &newTick); + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcSetWin32FileTime() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcSetWin32FileTime(OrbisRtcDateTime* pTime, int64_t ulWin32Time) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTime == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + u64 convertedTime = (ulWin32Time / 10) + WIN32_FILETIME_EPOCH_TICKS; + + OrbisRtcTick convertedTick; + convertedTick.tick = convertedTime; + + sceRtcSetTick(pTime, &convertedTick); + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcTickAddDays() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcTickAddDays(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick1 == nullptr || pTick2 == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + pTick1->tick = (lAdd * 86400000000) + pTick2->tick; + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcTickAddHours() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcTickAddHours(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick1 == nullptr || pTick2 == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + pTick1->tick = (lAdd * 3600000000) + pTick2->tick; + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcTickAddMicroseconds() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcTickAddMicroseconds(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, + int64_t lAdd) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick1 == nullptr || pTick2 == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + pTick1->tick = lAdd + pTick2->tick; + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcTickAddMinutes() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcTickAddMinutes(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int64_t lAdd) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick1 == nullptr || pTick2 == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + pTick1->tick = (lAdd * 60000000) + pTick2->tick; + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcTickAddMonths() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcTickAddMonths(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick1 == nullptr || pTick2 == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + if (lAdd == 0) { + pTick1->tick = pTick2->tick; + return SCE_OK; + } + + OrbisRtcDateTime time; + s64 s; + s64 tempMonth; + + sceRtcSetTick(&time, pTick1); + + if (lAdd >= 0) { + s = 1; + } else { + s = -1; + } + + time.year += (lAdd / 12); + tempMonth = time.month + (lAdd % 12) - 1; + + if (tempMonth > 11 || tempMonth < 0) { + tempMonth -= s * 12; + time.year += s; + } + + time.month = tempMonth + 1; + + using namespace std::chrono; + year chronoYear = year(time.year); + month chronoMonth = month(time.month); + int lastDay = + static_cast(unsigned(year_month_day_last{chronoYear / chronoMonth / last}.day())); + + if (time.day > lastDay) { + time.day = lastDay; + } + + int timeIsValid = sceRtcCheckValid(&time); + if (timeIsValid == SCE_OK) { + sceRtcGetTick(&time, pTick1); + } else { + return timeIsValid; + } + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcTickAddSeconds() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcTickAddSeconds(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int64_t lAdd) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick1 == nullptr || pTick2 == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + pTick1->tick = (lAdd * 1000000) + pTick2->tick; + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcTickAddTicks() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcTickAddTicks(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int64_t lAdd) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick1 == nullptr || pTick2 == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + pTick1->tick = lAdd + pTick2->tick; + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcTickAddWeeks() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcTickAddWeeks(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick1 == nullptr || pTick2 == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + pTick1->tick = (lAdd * 604800000000) + pTick2->tick; + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcTickAddYears() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcTickAddYears(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick1 == nullptr || pTick2 == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + OrbisRtcDateTime time; + + if (lAdd == 0) { + pTick1->tick = pTick2->tick; + return SCE_OK; + } + + sceRtcSetTick(&time, pTick1); + + time.year += lAdd; + + int timeIsValid = sceRtcCheckValid(&time); + if (timeIsValid == SCE_OK) { + sceRtcGetTick(&time, pTick1); + } else { + return timeIsValid; + } + + return SCE_OK; } void RegisterlibSceRtc(Core::Loader::SymbolsResolver* sym) { diff --git a/src/core/libraries/rtc/rtc.h b/src/core/libraries/rtc/rtc.h index ee6afa70e..c41040863 100644 --- a/src/core/libraries/rtc/rtc.h +++ b/src/core/libraries/rtc/rtc.h @@ -11,57 +11,81 @@ class SymbolsResolver; namespace Libraries::Rtc { +constexpr int ORBIS_RTC_DAYOFWEEK_SUNDAY = 0; +constexpr int ORBIS_RTC_DAYOFWEEK_MONDAY = 1; +constexpr int ORBIS_RTC_DAYOFWEEK_TUESDAY = 2; +constexpr int ORBIS_RTC_DAYOFWEEK_WEDNESDAY = 3; +constexpr int ORBIS_RTC_DAYOFWEEK_THURSDAY = 4; +constexpr int ORBIS_RTC_DAYOFWEEK_FRIDAY = 5; +constexpr int ORBIS_RTC_DAYOFWEEK_SATURDAY = 6; + +constexpr int64_t UNIX_EPOCH_TICKS = 0xdcbffeff2bc000; +constexpr int64_t WIN32_FILETIME_EPOCH_TICKS = 0xb36168b6a58000; + struct OrbisRtcTick { - u64 tick; + uint64_t tick; }; -int PS4_SYSV_ABI sceRtcCheckValid(); -int PS4_SYSV_ABI sceRtcCompareTick(); -int PS4_SYSV_ABI sceRtcConvertLocalTimeToUtc(); -int PS4_SYSV_ABI sceRtcConvertUtcToLocalTime(); +struct OrbisRtcDateTime { + uint16_t year; + uint16_t month; + uint16_t day; + uint16_t hour; + uint16_t minute; + uint16_t second; + uint32_t microsecond; +}; + +int PS4_SYSV_ABI sceRtcCheckValid(OrbisRtcDateTime* pTime); +int PS4_SYSV_ABI sceRtcCompareTick(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2); +int PS4_SYSV_ABI sceRtcConvertLocalTimeToUtc(OrbisRtcTick* pTickLocal, OrbisRtcTick* pTickUtc); +int PS4_SYSV_ABI sceRtcConvertUtcToLocalTime(OrbisRtcTick* pTickUtc, OrbisRtcTick* pTickLocal); int PS4_SYSV_ABI sceRtcEnd(); -int PS4_SYSV_ABI sceRtcFormatRFC2822(); -int PS4_SYSV_ABI sceRtcFormatRFC2822LocalTime(); -int PS4_SYSV_ABI sceRtcFormatRFC3339(); -int PS4_SYSV_ABI sceRtcFormatRFC3339LocalTime(); -int PS4_SYSV_ABI sceRtcFormatRFC3339Precise(); -int PS4_SYSV_ABI sceRtcFormatRFC3339PreciseLocalTime(); -int PS4_SYSV_ABI sceRtcGetCurrentAdNetworkTick(); -int PS4_SYSV_ABI sceRtcGetCurrentClock(); -int PS4_SYSV_ABI sceRtcGetCurrentClockLocalTime(); -int PS4_SYSV_ABI sceRtcGetCurrentDebugNetworkTick(); -int PS4_SYSV_ABI sceRtcGetCurrentNetworkTick(); -int PS4_SYSV_ABI sceRtcGetCurrentRawNetworkTick(); +int PS4_SYSV_ABI sceRtcFormatRFC2822(char* pszDateTime, const OrbisRtcTick* pTickUtc, int minutes); +int PS4_SYSV_ABI sceRtcFormatRFC2822LocalTime(char* pszDateTime, const OrbisRtcTick* pTickUtc); +int PS4_SYSV_ABI sceRtcFormatRFC3339(char* pszDateTime, const OrbisRtcTick* pTickUtc, int minutes); +int PS4_SYSV_ABI sceRtcFormatRFC3339LocalTime(char* pszDateTime, const OrbisRtcTick* pTickUtc); +int PS4_SYSV_ABI sceRtcFormatRFC3339Precise(char* pszDateTime, const OrbisRtcTick* pTickUtc, + int minutes); +int PS4_SYSV_ABI sceRtcFormatRFC3339PreciseLocalTime(char* pszDateTime, + const OrbisRtcTick* pTickUtc); +int PS4_SYSV_ABI sceRtcGetCurrentAdNetworkTick(OrbisRtcTick* pTick); +int PS4_SYSV_ABI sceRtcGetCurrentClock(OrbisRtcDateTime* pTime, int timeZone); +int PS4_SYSV_ABI sceRtcGetCurrentClockLocalTime(OrbisRtcDateTime* pTime); +int PS4_SYSV_ABI sceRtcGetCurrentDebugNetworkTick(OrbisRtcTick* pTick); +int PS4_SYSV_ABI sceRtcGetCurrentNetworkTick(OrbisRtcTick* pTick); +int PS4_SYSV_ABI sceRtcGetCurrentRawNetworkTick(OrbisRtcTick* pTick); int PS4_SYSV_ABI sceRtcGetCurrentTick(OrbisRtcTick* pTick); -int PS4_SYSV_ABI sceRtcGetDayOfWeek(); -int PS4_SYSV_ABI sceRtcGetDaysInMonth(); -int PS4_SYSV_ABI sceRtcGetDosTime(); -int PS4_SYSV_ABI sceRtcGetTick(); -int PS4_SYSV_ABI sceRtcGetTickResolution(); -int PS4_SYSV_ABI sceRtcGetTime_t(); -int PS4_SYSV_ABI sceRtcGetWin32FileTime(); +int PS4_SYSV_ABI sceRtcGetDayOfWeek(int year, int month, int day); +int PS4_SYSV_ABI sceRtcGetDaysInMonth(int year, int month); +int PS4_SYSV_ABI sceRtcGetDosTime(OrbisRtcDateTime* pTime, unsigned int* dosTime); +int PS4_SYSV_ABI sceRtcGetTick(OrbisRtcDateTime* pTime, OrbisRtcTick* pTick); +unsigned int PS4_SYSV_ABI sceRtcGetTickResolution(); +int PS4_SYSV_ABI sceRtcGetTime_t(OrbisRtcDateTime* pTime, time_t* llTime); +int PS4_SYSV_ABI sceRtcGetWin32FileTime(OrbisRtcDateTime* pTime, uint64_t* ulWin32Time); int PS4_SYSV_ABI sceRtcInit(); -int PS4_SYSV_ABI sceRtcIsLeapYear(); -int PS4_SYSV_ABI sceRtcParseDateTime(); -int PS4_SYSV_ABI sceRtcParseRFC3339(); +int PS4_SYSV_ABI sceRtcIsLeapYear(int yearInt); +int PS4_SYSV_ABI sceRtcParseDateTime(OrbisRtcTick* pTickUtc, const char* pszDateTime); +int PS4_SYSV_ABI sceRtcParseRFC3339(OrbisRtcTick* pTickUtc, const char* pszDateTime); int PS4_SYSV_ABI sceRtcSetConf(); -int PS4_SYSV_ABI sceRtcSetCurrentAdNetworkTick(); -int PS4_SYSV_ABI sceRtcSetCurrentDebugNetworkTick(); -int PS4_SYSV_ABI sceRtcSetCurrentNetworkTick(); -int PS4_SYSV_ABI sceRtcSetCurrentTick(); -int PS4_SYSV_ABI sceRtcSetDosTime(); -int PS4_SYSV_ABI sceRtcSetTick(); -int PS4_SYSV_ABI sceRtcSetTime_t(); -int PS4_SYSV_ABI sceRtcSetWin32FileTime(); -int PS4_SYSV_ABI sceRtcTickAddDays(); -int PS4_SYSV_ABI sceRtcTickAddHours(); -int PS4_SYSV_ABI sceRtcTickAddMicroseconds(); -int PS4_SYSV_ABI sceRtcTickAddMinutes(); -int PS4_SYSV_ABI sceRtcTickAddMonths(); -int PS4_SYSV_ABI sceRtcTickAddSeconds(); -int PS4_SYSV_ABI sceRtcTickAddTicks(); -int PS4_SYSV_ABI sceRtcTickAddWeeks(); -int PS4_SYSV_ABI sceRtcTickAddYears(); +int PS4_SYSV_ABI sceRtcSetCurrentAdNetworkTick(OrbisRtcTick* pTick); +int PS4_SYSV_ABI sceRtcSetCurrentDebugNetworkTick(OrbisRtcTick* pTick); +int PS4_SYSV_ABI sceRtcSetCurrentNetworkTick(OrbisRtcTick* pTick); +int PS4_SYSV_ABI sceRtcSetCurrentTick(OrbisRtcTick* pTick); +int PS4_SYSV_ABI sceRtcSetDosTime(OrbisRtcDateTime* pTime, u32 dosTime); +int PS4_SYSV_ABI sceRtcSetTick(OrbisRtcDateTime* pTime, OrbisRtcTick* pTick); +int PS4_SYSV_ABI sceRtcSetTime_t(OrbisRtcDateTime* pTime, time_t llTime); +int PS4_SYSV_ABI sceRtcSetWin32FileTime(OrbisRtcDateTime* pTime, int64_t ulWin32Time); +int PS4_SYSV_ABI sceRtcTickAddDays(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd); +int PS4_SYSV_ABI sceRtcTickAddHours(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd); +int PS4_SYSV_ABI sceRtcTickAddMicroseconds(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, + int64_t lAdd); +int PS4_SYSV_ABI sceRtcTickAddMinutes(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int64_t lAdd); +int PS4_SYSV_ABI sceRtcTickAddMonths(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd); +int PS4_SYSV_ABI sceRtcTickAddSeconds(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int64_t lAdd); +int PS4_SYSV_ABI sceRtcTickAddTicks(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int64_t lAdd); +int PS4_SYSV_ABI sceRtcTickAddWeeks(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd); +int PS4_SYSV_ABI sceRtcTickAddYears(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd); void RegisterlibSceRtc(Core::Loader::SymbolsResolver* sym); } // namespace Libraries::Rtc \ No newline at end of file diff --git a/src/core/libraries/rtc/rtc_error.h b/src/core/libraries/rtc/rtc_error.h index 04eecbbdf..3af5a68fd 100644 --- a/src/core/libraries/rtc/rtc_error.h +++ b/src/core/libraries/rtc/rtc_error.h @@ -3,6 +3,7 @@ #pragma once +constexpr int ORBIS_RTC_ERROR_DATETIME_UNINITIALIZED = 0x7FFEF9FE; constexpr int ORBIS_RTC_ERROR_INVALID_PARAMETER = 0x80010602; constexpr int ORBIS_RTC_ERROR_INVALID_TICK_PARAMETER = 0x80010603; constexpr int ORBIS_RTC_ERROR_INVALID_DATE_PARAMETER = 0x80010604; @@ -14,4 +15,18 @@ constexpr int ORBIS_RTC_ERROR_INVALID_DAYS_PARAMETER = 0x80010623; constexpr int ORBIS_RTC_ERROR_INVALID_HOURS_PARAMETER = 0x80010624; constexpr int ORBIS_RTC_ERROR_INVALID_MINUTES_PARAMETER = 0x80010625; constexpr int ORBIS_RTC_ERROR_INVALID_SECONDS_PARAMETER = 0x80010626; -constexpr int ORBIS_RTC_ERROR_INVALID_MILLISECONDS_PARAMETER = 0x80010627; \ No newline at end of file +constexpr int ORBIS_RTC_ERROR_INVALID_MILLISECONDS_PARAMETER = 0x80010627; +constexpr int ORBIS_RTC_ERROR_NOT_INITIALIZED = 0x80B50001; +constexpr int ORBIS_RTC_ERROR_INVALID_POINTER = 0x80B50002; +constexpr int ORBIS_RTC_ERROR_INVALID_VALUE = 0x80B50003; +constexpr int ORBIS_RTC_ERROR_INVALID_ARG = 0x80B50004; +constexpr int ORBIS_RTC_ERROR_NOT_SUPPORTED = 0x80B50005; +constexpr int ORBIS_RTC_ERROR_NO_CLOCK = 0x80B50006; +constexpr int ORBIS_RTC_ERROR_BAD_PARSE = 0x80B50007; +constexpr int ORBIS_RTC_ERROR_INVALID_YEAR = 0x80B50008; +constexpr int ORBIS_RTC_ERROR_INVALID_MONTH = 0x80B50009; +constexpr int ORBIS_RTC_ERROR_INVALID_DAY = 0x80B5000A; +constexpr int ORBIS_RTC_ERROR_INVALID_HOUR = 0x80B5000B; +constexpr int ORBIS_RTC_ERROR_INVALID_MINUTE = 0x80B5000C; +constexpr int ORBIS_RTC_ERROR_INVALID_SECOND = 0x80B5000D; +constexpr int ORBIS_RTC_ERROR_INVALID_MICROSECOND = 0x80B5000E; \ No newline at end of file diff --git a/src/core/libraries/save_data/savedata.cpp b/src/core/libraries/save_data/savedata.cpp index 959a75705..779c922e6 100644 --- a/src/core/libraries/save_data/savedata.cpp +++ b/src/core/libraries/save_data/savedata.cpp @@ -179,15 +179,21 @@ int PS4_SYSV_ABI sceSaveDataDeleteUser() { int PS4_SYSV_ABI sceSaveDataDirNameSearch(const OrbisSaveDataDirNameSearchCond* cond, OrbisSaveDataDirNameSearchResult* result) { - if (cond == nullptr) + if (cond == nullptr || result == nullptr) return ORBIS_SAVE_DATA_ERROR_PARAMETER; - LOG_INFO(Lib_SaveData, "called"); + LOG_INFO(Lib_SaveData, "Number of directories = {}", result->dirNamesNum); const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) / std::to_string(cond->userId) / game_serial; if (!mount_dir.empty() && std::filesystem::exists(mount_dir)) { - if (cond->dirName == nullptr || std::string_view(cond->dirName->data) - .empty()) { // look for all dirs if no dir is provided. - for (int i = 0; const auto& entry : std::filesystem::directory_iterator(mount_dir)) { + int maxDirNum = result->dirNamesNum; // Games set a maximum of directories to search for + int i = 0; + + if (cond->dirName == nullptr || std::string_view(cond->dirName->data).empty()) { + // Look for all dirs if no dir is provided. + for (const auto& entry : std::filesystem::directory_iterator(mount_dir)) { + if (i >= maxDirNum) + break; + if (std::filesystem::is_directory(entry.path()) && entry.path().filename().string() != "sdmemory") { // sceSaveDataDirNameSearch does not search for dataMemory1/2 dirs. @@ -199,13 +205,50 @@ int PS4_SYSV_ABI sceSaveDataDirNameSearch(const OrbisSaveDataDirNameSearchCond* result->setNum = i; } } - } else { // Need a game to test. - LOG_ERROR(Lib_SaveData, "Check Me. sceSaveDataDirNameSearch: dirName = {}", - cond->dirName->data); - strncpy(result->dirNames[0].data, cond->dirName->data, 32); - result->hitNum = 1; - result->dirNamesNum = 1; - result->setNum = 1; + } else { + // Game checks for a specific directory. + LOG_INFO(Lib_SaveData, "dirName = {}", cond->dirName->data); + + // Games can pass '%' as a wildcard + // e.g. `SAVELIST%` searches for all folders with names starting with `SAVELIST` + std::string baseName(cond->dirName->data); + u64 wildcardPos = baseName.find('%'); + if (wildcardPos != std::string::npos) { + baseName = baseName.substr(0, wildcardPos); + } + + for (const auto& entry : std::filesystem::directory_iterator(mount_dir)) { + if (i >= maxDirNum) + break; + + if (std::filesystem::is_directory(entry.path())) { + std::string dirName = entry.path().filename().string(); + + if (wildcardPos != std::string::npos) { + if (dirName.compare(0, baseName.size(), baseName) != 0) { + continue; + } + } else if (wildcardPos == std::string::npos && dirName != cond->dirName->data) { + continue; + } + + strncpy(result->dirNames[i].data, cond->dirName->data, 32); + + i++; + result->hitNum = i; + result->dirNamesNum = i; + result->setNum = i; + } + } + } + + if (result->params != nullptr) { + Common::FS::IOFile file(mount_dir / cond->dirName->data / "param.txt", + Common::FS::FileAccessMode::Read); + if (file.IsOpen()) { + file.ReadRaw((void*)result->params, sizeof(OrbisSaveDataParam)); + file.Close(); + } } } else { result->hitNum = 0; diff --git a/src/emulator.cpp b/src/emulator.cpp index 9d1bb00d9..a469a31ce 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -195,7 +195,7 @@ void Emulator::Run(const std::filesystem::path& file) { } void Emulator::LoadSystemModules(const std::filesystem::path& file) { - constexpr std::array ModulesToLoad{ + constexpr std::array ModulesToLoad{ {{"libSceNgs2.sprx", &Libraries::Ngs2::RegisterlibSceNgs2}, {"libSceFiber.sprx", nullptr}, {"libSceUlt.sprx", nullptr}, @@ -204,7 +204,8 @@ void Emulator::LoadSystemModules(const std::filesystem::path& file) { {"libSceLibcInternal.sprx", &Libraries::LibcInternal::RegisterlibSceLibcInternal}, {"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap}, {"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc}, - {"libSceJpegEnc.sprx", nullptr}}, + {"libSceJpegEnc.sprx", nullptr}, + {"libSceFont.sprx", nullptr}}, }; std::vector found_modules; diff --git a/src/images/play_icon.png b/src/images/play_icon.png index c67831a1e..2815be39d 100644 Binary files a/src/images/play_icon.png and b/src/images/play_icon.png differ diff --git a/src/qt_gui/main_window.cpp b/src/qt_gui/main_window.cpp index 93969100d..bd2f097ea 100644 --- a/src/qt_gui/main_window.cpp +++ b/src/qt_gui/main_window.cpp @@ -88,6 +88,7 @@ void MainWindow::AddUiWidgets() { ui->toolBar->addWidget(ui->playButton); ui->toolBar->addWidget(ui->pauseButton); ui->toolBar->addWidget(ui->stopButton); + ui->toolBar->addWidget(ui->refreshButton); ui->toolBar->addWidget(ui->settingsButton); ui->toolBar->addWidget(ui->controllerButton); QFrame* line = new QFrame(this); @@ -177,6 +178,7 @@ void MainWindow::CreateConnects() { connect(ui->mw_searchbar, &QLineEdit::textChanged, this, &MainWindow::SearchGameTable); connect(ui->exitAct, &QAction::triggered, this, &QWidget::close); connect(ui->refreshGameListAct, &QAction::triggered, this, &MainWindow::RefreshGameTable); + connect(ui->refreshButton, &QPushButton::clicked, this, &MainWindow::RefreshGameTable); connect(ui->showGameListAct, &QAction::triggered, this, &MainWindow::ShowGameList); connect(this, &MainWindow::ExtractionFinished, this, &MainWindow::RefreshGameTable); @@ -852,6 +854,7 @@ void MainWindow::SetUiIcons(bool isWhite) { ui->playButton->setIcon(RecolorIcon(ui->playButton->icon(), isWhite)); ui->pauseButton->setIcon(RecolorIcon(ui->pauseButton->icon(), isWhite)); ui->stopButton->setIcon(RecolorIcon(ui->stopButton->icon(), isWhite)); + ui->refreshButton->setIcon(RecolorIcon(ui->refreshButton->icon(), isWhite)); ui->settingsButton->setIcon(RecolorIcon(ui->settingsButton->icon(), isWhite)); ui->controllerButton->setIcon(RecolorIcon(ui->controllerButton->icon(), isWhite)); ui->refreshGameListAct->setIcon(RecolorIcon(ui->refreshGameListAct->icon(), isWhite)); diff --git a/src/qt_gui/main_window_ui.h b/src/qt_gui/main_window_ui.h index 0acfade0e..8ae5965f8 100644 --- a/src/qt_gui/main_window_ui.h +++ b/src/qt_gui/main_window_ui.h @@ -38,6 +38,7 @@ public: QPushButton* playButton; QPushButton* pauseButton; QPushButton* stopButton; + QPushButton* refreshButton; QPushButton* settingsButton; QPushButton* controllerButton; @@ -176,6 +177,10 @@ public: stopButton->setFlat(true); stopButton->setIcon(QIcon(":images/stop_icon.png")); stopButton->setIconSize(QSize(40, 40)); + refreshButton = new QPushButton(centralWidget); + refreshButton->setFlat(true); + refreshButton->setIcon(QIcon(":images/refresh_icon.png")); + refreshButton->setIconSize(QSize(32, 32)); settingsButton = new QPushButton(centralWidget); settingsButton->setFlat(true); settingsButton->setIcon(QIcon(":images/settings_icon.png")); diff --git a/src/sdl_window.cpp b/src/sdl_window.cpp index b83afd299..31460d07c 100644 --- a/src/sdl_window.cpp +++ b/src/sdl_window.cpp @@ -194,11 +194,6 @@ void WindowSDL::onKeyPress(const SDL_Event* event) { ax = Input::GetAxis(-0x80, 0x80, axisvalue); break; case SDLK_S: - if (event->key.mod == SDL_KMOD_LCTRL) { - // Trigger rdoc capture - VideoCore::TriggerCapture(); - break; - } axis = Input::Axis::LeftY; if (event->type == SDL_EVENT_KEY_DOWN) { axisvalue += 127; @@ -287,6 +282,12 @@ void WindowSDL::onKeyPress(const SDL_Event* event) { } } break; + case SDLK_F12: + if (event->type == SDL_EVENT_KEY_DOWN) { + // Trigger rdoc capture + VideoCore::TriggerCapture(); + } + break; default: break; } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index c681be97c..b0298cbb0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -327,6 +327,10 @@ void EmitGetVccHi(EmitContext& ctx) { UNREACHABLE_MSG("Unreachable instruction"); } +void EmitGetM0(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + void EmitSetScc(EmitContext& ctx) { UNREACHABLE_MSG("Unreachable instruction"); } @@ -351,4 +355,8 @@ void EmitSetVccHi(EmitContext& ctx) { UNREACHABLE_MSG("Unreachable instruction"); } +void EmitSetM0(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 1d553dc56..a58b2778f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -152,4 +152,20 @@ Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id co return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicExchange); } +Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) { + auto& buffer = ctx.buffers[binding]; + const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, + ctx.ConstU32(gds_addr)); + const auto [scope, semantics]{AtomicArgs(ctx)}; + return ctx.OpAtomicIIncrement(ctx.U32[1], ptr, scope, semantics); +} + +Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding) { + auto& buffer = ctx.buffers[binding]; + const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, + ctx.ConstU32(gds_addr)); + const auto [scope, semantics]{AtomicArgs(ctx)}; + return ctx.OpAtomicIDecrement(ctx.U32[1], ptr, scope, semantics); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 39a214fa0..64ce532b5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -133,10 +133,6 @@ Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) { return ctx.OpLoad(buffer.data_types->Get(1), ptr); } -Id EmitReadConstBufferU32(EmitContext& ctx, u32 handle, Id index) { - return ctx.OpBitcast(ctx.U32[1], EmitReadConstBuffer(ctx, handle, index)); -} - Id EmitReadStepRate(EmitContext& ctx, int rate_idx) { return ctx.OpLoad( ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]), @@ -222,12 +218,8 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen ctx.OpStore(pointer, ctx.OpBitcast(ctx.F32[1], value)); } -Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferF32(ctx, inst, handle, address); -} - template -static Id EmitLoadBufferF32xN(EmitContext& ctx, u32 handle, Id address) { +static Id EmitLoadBufferU32xN(EmitContext& ctx, u32 handle, Id address) { auto& buffer = ctx.buffers[handle]; address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); @@ -246,20 +238,20 @@ static Id EmitLoadBufferF32xN(EmitContext& ctx, u32 handle, Id address) { } } -Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { - return EmitLoadBufferF32xN<1>(ctx, handle, address); +Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { + return EmitLoadBufferU32xN<1>(ctx, handle, address); } -Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { - return EmitLoadBufferF32xN<2>(ctx, handle, address); +Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { + return EmitLoadBufferU32xN<2>(ctx, handle, address); } -Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { - return EmitLoadBufferF32xN<3>(ctx, handle, address); +Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { + return EmitLoadBufferU32xN<3>(ctx, handle, address); } -Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { - return EmitLoadBufferF32xN<4>(ctx, handle, address); +Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { + return EmitLoadBufferU32xN<4>(ctx, handle, address); } Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { @@ -275,7 +267,7 @@ Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addr } template -static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id value) { +static void EmitStoreBufferU32xN(EmitContext& ctx, u32 handle, Id address, Id value) { auto& buffer = ctx.buffers[handle]; address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); @@ -287,29 +279,25 @@ static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id va const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index_i); - ctx.OpStore(ptr, ctx.OpCompositeExtract(ctx.F32[1], value, i)); + ctx.OpStore(ptr, ctx.OpCompositeExtract(buffer.data_types->Get(1), value, i)); } } } -void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferF32xN<1>(ctx, handle, address, value); -} - -void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferF32xN<2>(ctx, handle, address, value); -} - -void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferF32xN<3>(ctx, handle, address, value); -} - -void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferF32xN<4>(ctx, handle, address, value); -} - void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferF32xN<1>(ctx, handle, address, value); + EmitStoreBufferU32xN<1>(ctx, handle, address, value); +} + +void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + EmitStoreBufferU32xN<2>(ctx, handle, address, value); +} + +void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + EmitStoreBufferU32xN<3>(ctx, handle, address, value); +} + +void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + EmitStoreBufferU32xN<4>(ctx, handle, address, value); } void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index ce4d3f137..e506ced3a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -36,12 +36,14 @@ void EmitGetVcc(EmitContext& ctx); void EmitGetSccLo(EmitContext& ctx); void EmitGetVccLo(EmitContext& ctx); void EmitGetVccHi(EmitContext& ctx); +void EmitGetM0(EmitContext& ctx); void EmitSetScc(EmitContext& ctx); void EmitSetExec(EmitContext& ctx); void EmitSetVcc(EmitContext& ctx); void EmitSetSccLo(EmitContext& ctx); void EmitSetVccLo(EmitContext& ctx); void EmitSetVccHi(EmitContext& ctx); +void EmitSetM0(EmitContext& ctx); void EmitFPCmpClass32(EmitContext& ctx); void EmitPrologue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx); @@ -62,25 +64,16 @@ void EmitGetGotoVariable(EmitContext& ctx); void EmitSetScc(EmitContext& ctx); Id EmitReadConst(EmitContext& ctx); Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index); -Id EmitReadConstBufferU32(EmitContext& ctx, u32 handle, Id index); -Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); @@ -404,12 +397,13 @@ Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); - Id EmitLaneId(EmitContext& ctx); Id EmitWarpId(EmitContext& ctx); Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index); Id EmitReadFirstLane(EmitContext& ctx, Id value); Id EmitReadLane(EmitContext& ctx, Id value, u32 lane); Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane); +Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding); +Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index c0f0fa274..d01c1977a 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -43,6 +43,10 @@ void Translator::EmitDataShare(const GcnInst& inst) { return DS_MIN_U32(inst, false, true); case Opcode::DS_MAX_RTN_U32: return DS_MAX_U32(inst, false, true); + case Opcode::DS_APPEND: + return DS_APPEND(inst); + case Opcode::DS_CONSUME: + return DS_CONSUME(inst); default: LogMissingOpcode(inst); } @@ -192,4 +196,18 @@ void Translator::V_WRITELANE_B32(const GcnInst& inst) { ir.SetVectorReg(dst, ir.WriteLane(old_value, value, lane)); } +void Translator::DS_APPEND(const GcnInst& inst) { + const u32 inst_offset = inst.control.ds.offset0; + const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset)); + const IR::U32 prev = ir.DataAppend(gds_offset); + SetDst(inst.dst[0], prev); +} + +void Translator::DS_CONSUME(const GcnInst& inst) { + const u32 inst_offset = inst.control.ds.offset0; + const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset)); + const IR::U32 prev = ir.DataConsume(gds_offset); + SetDst(inst.dst[0], prev); +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index d4db09a64..18e830f7b 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -31,6 +31,12 @@ void Translator::EmitExport(const GcnInst& inst) { case MrtSwizzle::Alt: static constexpr std::array AltSwizzle = {2, 1, 0, 3}; return AltSwizzle[comp]; + case MrtSwizzle::Reverse: + static constexpr std::array RevSwizzle = {3, 2, 1, 0}; + return RevSwizzle[comp]; + case MrtSwizzle::ReverseAlt: + static constexpr std::array AltRevSwizzle = {3, 0, 1, 2}; + return AltRevSwizzle[comp]; default: UNREACHABLE(); } diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index af258cd19..adc127f12 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -73,9 +73,13 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { case Opcode::S_SUB_I32: return S_SUB_U32(inst); case Opcode::S_MIN_U32: - return S_MIN_U32(inst); + return S_MIN_U32(false, inst); + case Opcode::S_MIN_I32: + return S_MIN_U32(true, inst); case Opcode::S_MAX_U32: - return S_MAX_U32(inst); + return S_MAX_U32(false, inst); + case Opcode::S_MAX_I32: + return S_MAX_U32(true, inst); case Opcode::S_WQM_B64: break; default: @@ -533,18 +537,18 @@ void Translator::S_ADDC_U32(const GcnInst& inst) { SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), carry)); } -void Translator::S_MAX_U32(const GcnInst& inst) { +void Translator::S_MAX_U32(bool is_signed, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 result = ir.UMax(src0, src1); + const IR::U32 result = ir.IMax(src0, src1, is_signed); SetDst(inst.dst[0], result); ir.SetScc(ir.IEqual(result, src0)); } -void Translator::S_MIN_U32(const GcnInst& inst) { +void Translator::S_MIN_U32(bool is_signed, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 result = ir.UMin(src0, src1); + const IR::U32 result = ir.IMin(src0, src1, is_signed); SetDst(inst.dst[0], result); ir.SetScc(ir.IEqual(result, src0)); } diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index b33746c7b..4e0c110c2 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -153,10 +153,11 @@ T Translator::GetSrc(const InstOperand& operand) { break; case OperandField::M0: if constexpr (is_float) { - UNREACHABLE(); + value = ir.BitCast(ir.GetM0()); } else { - return m0_value; + value = ir.GetM0(); } + break; default: UNREACHABLE(); } @@ -296,8 +297,7 @@ void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) { case OperandField::VccHi: return ir.SetVccHi(result); case OperandField::M0: - m0_value = result; - break; + return ir.SetM0(result); default: UNREACHABLE(); } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 0c1f3a587..e4be298ea 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -101,8 +101,8 @@ public: void S_ADDC_U32(const GcnInst& inst); void S_MULK_I32(const GcnInst& inst); void S_ADDK_I32(const GcnInst& inst); - void S_MAX_U32(const GcnInst& inst); - void S_MIN_U32(const GcnInst& inst); + void S_MAX_U32(bool is_signed, const GcnInst& inst); + void S_MIN_U32(bool is_signed, const GcnInst& inst); void S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst); // Scalar Memory @@ -173,7 +173,7 @@ public: void V_BCNT_U32_B32(const GcnInst& inst); void V_COS_F32(const GcnInst& inst); void V_MAX3_F32(const GcnInst& inst); - void V_MAX3_U32(const GcnInst& inst); + void V_MAX3_U32(bool is_signed, const GcnInst& inst); void V_CVT_I32_F32(const GcnInst& inst); void V_MIN_I32(const GcnInst& inst); void V_MUL_LO_U32(const GcnInst& inst); @@ -192,6 +192,9 @@ public: void V_MBCNT_U32_B32(bool is_low, const GcnInst& inst); void V_BFM_B32(const GcnInst& inst); void V_FFBH_U32(const GcnInst& inst); + void V_MOVRELS_B32(const GcnInst& inst); + void V_MOVRELD_B32(const GcnInst& inst); + void V_MOVRELSD_B32(const GcnInst& inst); // Vector Memory void BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst); @@ -214,6 +217,8 @@ public: void V_READFIRSTLANE_B32(const GcnInst& inst); void V_READLANE_B32(const GcnInst& inst); void V_WRITELANE_B32(const GcnInst& inst); + void DS_APPEND(const GcnInst& inst); + void DS_CONSUME(const GcnInst& inst); void S_BARRIER(); // MIMG @@ -233,6 +238,9 @@ private: void SetDst(const InstOperand& operand, const IR::U32F32& value); void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw); + IR::U32 VMovRelSHelper(u32 src_vgprno, const IR::U32 m0); + void VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0); + void LogMissingOpcode(const GcnInst& inst); private: @@ -240,7 +248,6 @@ private: Info& info; const RuntimeInfo& runtime_info; const Profile& profile; - IR::U32 m0_value; bool opcode_missing = false; }; diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index a07e70785..b4470ee39 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "shader_recompiler/frontend/opcodes.h" #include "shader_recompiler/frontend/translate/translate.h" namespace Shader::Gcn { @@ -226,7 +227,9 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { case Opcode::V_MAX3_F32: return V_MAX3_F32(inst); case Opcode::V_MAX3_U32: - return V_MAX3_U32(inst); + return V_MAX3_U32(false, inst); + case Opcode::V_MAX3_I32: + return V_MAX_U32(true, inst); case Opcode::V_TRUNC_F32: return V_TRUNC_F32(inst); case Opcode::V_CEIL_F32: @@ -309,6 +312,12 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_MBCNT_U32_B32(true, inst); case Opcode::V_MBCNT_HI_U32_B32: return V_MBCNT_U32_B32(false, inst); + case Opcode::V_MOVRELS_B32: + return V_MOVRELS_B32(inst); + case Opcode::V_MOVRELD_B32: + return V_MOVRELD_B32(inst); + case Opcode::V_MOVRELSD_B32: + return V_MOVRELSD_B32(inst); case Opcode::V_NOP: return; @@ -824,11 +833,11 @@ void Translator::V_MAX3_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2))); } -void Translator::V_MAX3_U32(const GcnInst& inst) { +void Translator::V_MAX3_U32(bool is_signed, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; const IR::U32 src2{GetSrc(inst.src[2])}; - SetDst(inst.dst[0], ir.UMax(src0, ir.UMax(src1, src2))); + SetDst(inst.dst[0], ir.IMax(src0, ir.IMax(src1, src2, is_signed), is_signed)); } void Translator::V_CVT_I32_F32(const GcnInst& inst) { @@ -960,14 +969,29 @@ void Translator::V_FFBL_B32(const GcnInst& inst) { } void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; if (!is_low) { - ASSERT(src0.IsImmediate() && src0.U32() == ~0U && src1.IsImmediate() && src1.U32() == 0U); - return; + // v_mbcnt_hi_u32_b32 v2, -1, 0 + if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193 && + inst.src[1].field == OperandField::ConstZero) { + return; + } + // v_mbcnt_hi_u32_b32 vX, exec_hi, 0 + if (inst.src[0].field == OperandField::ExecHi && + inst.src[1].field == OperandField::ConstZero) { + return; + } + } else { + // v_mbcnt_lo_u32_b32 v2, -1, vX + // used combined with above to fetch lane id in non-compute stages + if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193) { + SetDst(inst.dst[0], ir.LaneId()); + } + // v_mbcnt_lo_u32_b32 v20, exec_lo, vX + // used combined in above for append buffer indexing. + if (inst.src[0].field == OperandField::ExecLo) { + SetDst(inst.dst[0], ir.Imm32(0)); + } } - ASSERT(src0.IsImmediate() && src0.U32() == ~0U); - SetDst(inst.dst[0], ir.LaneId()); } void Translator::V_BFM_B32(const GcnInst& inst) { @@ -990,4 +1014,52 @@ void Translator::V_FFBH_U32(const GcnInst& inst) { SetDst(inst.dst[0], IR::U32{ir.Select(cond, pos_from_left, ir.Imm32(~0U))}); } +// TODO: add range analysis pass to hopefully put an upper bound on m0, and only select one of +// [src_vgprno, src_vgprno + max_m0]. Same for dst regs we may write back to + +IR::U32 Translator::VMovRelSHelper(u32 src_vgprno, const IR::U32 m0) { + // Read from VGPR0 by default when src_vgprno + m0 > num_allocated_vgprs + IR::U32 src_val = ir.GetVectorReg(IR::VectorReg::V0); + for (u32 i = src_vgprno; i < runtime_info.num_allocated_vgprs; i++) { + const IR::U1 cond = ir.IEqual(m0, ir.Imm32(i - src_vgprno)); + src_val = + IR::U32{ir.Select(cond, ir.GetVectorReg(IR::VectorReg::V0 + i), src_val)}; + } + return src_val; +} + +void Translator::VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0) { + for (u32 i = dst_vgprno; i < runtime_info.num_allocated_vgprs; i++) { + const IR::U1 cond = ir.IEqual(m0, ir.Imm32(i - dst_vgprno)); + const IR::U32 dst_val = + IR::U32{ir.Select(cond, src_val, ir.GetVectorReg(IR::VectorReg::V0 + i))}; + ir.SetVectorReg(IR::VectorReg::V0 + i, dst_val); + } +} + +void Translator::V_MOVRELS_B32(const GcnInst& inst) { + u32 src_vgprno = inst.src[0].code - static_cast(IR::VectorReg::V0); + const IR::U32 m0 = ir.GetM0(); + + const IR::U32 src_val = VMovRelSHelper(src_vgprno, m0); + SetDst(inst.dst[0], src_val); +} + +void Translator::V_MOVRELD_B32(const GcnInst& inst) { + const IR::U32 src_val{GetSrc(inst.src[0])}; + u32 dst_vgprno = inst.dst[0].code - static_cast(IR::VectorReg::V0); + IR::U32 m0 = ir.GetM0(); + + VMovRelDHelper(dst_vgprno, src_val, m0); +} + +void Translator::V_MOVRELSD_B32(const GcnInst& inst) { + u32 src_vgprno = inst.src[0].code - static_cast(IR::VectorReg::V0); + u32 dst_vgprno = inst.dst[0].code - static_cast(IR::VectorReg::V0); + IR::U32 m0 = ir.GetM0(); + + const IR::U32 src_val = VMovRelSHelper(src_vgprno, m0); + VMovRelDHelper(dst_vgprno, src_val, m0); +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 73530dade..5af283364 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -18,9 +18,11 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { case Opcode::IMAGE_SAMPLE_B: case Opcode::IMAGE_SAMPLE_C_LZ_O: case Opcode::IMAGE_SAMPLE_D: + case Opcode::IMAGE_SAMPLE_CD: return IMAGE_SAMPLE(inst); - case Opcode::IMAGE_GATHER4_C: case Opcode::IMAGE_GATHER4_LZ: + case Opcode::IMAGE_GATHER4_C: + case Opcode::IMAGE_GATHER4_C_LZ: case Opcode::IMAGE_GATHER4_LZ_O: return IMAGE_GATHER(inst); case Opcode::IMAGE_ATOMIC_ADD: @@ -98,6 +100,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { return BUFFER_STORE(2, true, inst); case Opcode::TBUFFER_STORE_FORMAT_XYZ: return BUFFER_STORE(3, true, inst); + case Opcode::TBUFFER_STORE_FORMAT_XYZW: + return BUFFER_STORE(4, true, inst); case Opcode::BUFFER_STORE_DWORD: return BUFFER_STORE(1, false, inst); @@ -143,10 +147,6 @@ void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) { void Translator::IMAGE_SAMPLE(const GcnInst& inst) { const auto& mimg = inst.control.mimg; - if (mimg.da) { - LOG_WARNING(Render_Vulkan, "Image instruction declares an array"); - } - IR::VectorReg addr_reg{inst.src[0].code}; IR::VectorReg dest_reg{inst.dst[0].code}; const IR::ScalarReg tsharp_reg{inst.src[2].code * 4}; @@ -384,11 +384,11 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst) const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info); const IR::VectorReg dst_reg{inst.src[1].code}; if (num_dwords == 1) { - ir.SetVectorReg(dst_reg, IR::F32{value}); + ir.SetVectorReg(dst_reg, IR::U32{value}); return; } for (u32 i = 0; i < num_dwords; i++) { - ir.SetVectorReg(dst_reg + i, IR::F32{ir.CompositeExtract(value, i)}); + ir.SetVectorReg(dst_reg + i, IR::U32{ir.CompositeExtract(value, i)}); } } @@ -452,21 +452,18 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst const IR::VectorReg src_reg{inst.src[1].code}; switch (num_dwords) { case 1: - value = ir.GetVectorReg(src_reg); + value = ir.GetVectorReg(src_reg); break; case 2: - value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), - ir.GetVectorReg(src_reg + 1)); + value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1)); break; case 3: - value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), - ir.GetVectorReg(src_reg + 1), - ir.GetVectorReg(src_reg + 2)); + value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1), + ir.GetVectorReg(src_reg + 2)); break; case 4: - value = ir.CompositeConstruct( - ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1), - ir.GetVectorReg(src_reg + 2), ir.GetVectorReg(src_reg + 3)); + value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1), + ir.GetVectorReg(src_reg + 2), ir.GetVectorReg(src_reg + 3)); break; } const IR::Value handle = @@ -514,6 +511,15 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) { const IR::VectorReg vaddr{inst.src[0].code}; const IR::VectorReg vdata{inst.src[1].code}; const IR::ScalarReg srsrc{inst.src[2].code * 4}; + const IR::Value address = [&] -> IR::Value { + if (mubuf.idxen && mubuf.offen) { + return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1)); + } + if (mubuf.idxen || mubuf.offen) { + return ir.GetVectorReg(vaddr); + } + return {}; + }(); const IR::U32 soffset{GetSrc(inst.src[3])}; ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); @@ -523,7 +529,6 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) { info.offset_enable.Assign(mubuf.offen); IR::Value vdata_val = ir.GetVectorReg(vdata); - const IR::U32 address = ir.GetVectorReg(vaddr); const IR::Value handle = ir.CompositeConstruct(ir.GetScalarReg(srsrc), ir.GetScalarReg(srsrc + 1), ir.GetScalarReg(srsrc + 2), ir.GetScalarReg(srsrc + 3)); diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index cdc17304c..0184a7f63 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -37,12 +37,13 @@ struct BufferResource { u32 dword_offset; IR::Type used_types; AmdGpu::Buffer inline_cbuf; + bool is_gds_buffer{}; bool is_instance_data{}; bool is_written{}; bool IsStorage(AmdGpu::Buffer buffer) const noexcept { static constexpr size_t MaxUboSize = 65536; - return buffer.GetSize() > MaxUboSize || is_written; + return buffer.GetSize() > MaxUboSize || is_written || is_gds_buffer; } constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; diff --git a/src/shader_recompiler/ir/basic_block.h b/src/shader_recompiler/ir/basic_block.h index 1eb11469c..11ae969bc 100644 --- a/src/shader_recompiler/ir/basic_block.h +++ b/src/shader_recompiler/ir/basic_block.h @@ -147,6 +147,7 @@ public: /// Intrusively store the value of a register in the block. std::array ssa_sreg_values; + std::array ssa_sbit_values; std::array ssa_vreg_values; bool has_multiple_predecessors{false}; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 473ae4f66..7e52cfb5f 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -217,6 +217,10 @@ U32 IREmitter::GetVccHi() { return Inst(Opcode::GetVccHi); } +U32 IREmitter::GetM0() { + return Inst(Opcode::GetM0); +} + void IREmitter::SetScc(const U1& value) { Inst(Opcode::SetScc, value); } @@ -241,6 +245,10 @@ void IREmitter::SetVccHi(const U32& value) { Inst(Opcode::SetVccHi, value); } +void IREmitter::SetM0(const U32& value) { + Inst(Opcode::SetM0, value); +} + F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp) { return Inst(Opcode::GetAttribute, attribute, Imm32(comp)); } @@ -305,21 +313,21 @@ U32 IREmitter::ReadConst(const Value& base, const U32& offset) { return Inst(Opcode::ReadConst, base, offset); } -F32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index) { - return Inst(Opcode::ReadConstBuffer, handle, index); +U32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index) { + return Inst(Opcode::ReadConstBuffer, handle, index); } Value IREmitter::LoadBuffer(int num_dwords, const Value& handle, const Value& address, BufferInstInfo info) { switch (num_dwords) { case 1: - return Inst(Opcode::LoadBufferF32, Flags{info}, handle, address); + return Inst(Opcode::LoadBufferU32, Flags{info}, handle, address); case 2: - return Inst(Opcode::LoadBufferF32x2, Flags{info}, handle, address); + return Inst(Opcode::LoadBufferU32x2, Flags{info}, handle, address); case 3: - return Inst(Opcode::LoadBufferF32x3, Flags{info}, handle, address); + return Inst(Opcode::LoadBufferU32x3, Flags{info}, handle, address); case 4: - return Inst(Opcode::LoadBufferF32x4, Flags{info}, handle, address); + return Inst(Opcode::LoadBufferU32x4, Flags{info}, handle, address); default: UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords); } @@ -333,17 +341,16 @@ void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& ad const Value& data, BufferInstInfo info) { switch (num_dwords) { case 1: - Inst(data.Type() == Type::F32 ? Opcode::StoreBufferF32 : Opcode::StoreBufferU32, - Flags{info}, handle, address, data); + Inst(Opcode::StoreBufferU32, Flags{info}, handle, address, data); break; case 2: - Inst(Opcode::StoreBufferF32x2, Flags{info}, handle, address, data); + Inst(Opcode::StoreBufferU32x2, Flags{info}, handle, address, data); break; case 3: - Inst(Opcode::StoreBufferF32x3, Flags{info}, handle, address, data); + Inst(Opcode::StoreBufferU32x3, Flags{info}, handle, address, data); break; case 4: - Inst(Opcode::StoreBufferF32x4, Flags{info}, handle, address, data); + Inst(Opcode::StoreBufferU32x4, Flags{info}, handle, address, data); break; default: UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords); @@ -402,6 +409,14 @@ void IREmitter::StoreBufferFormat(const Value& handle, const Value& address, con Inst(Opcode::StoreBufferFormatF32, Flags{info}, handle, address, data); } +U32 IREmitter::DataAppend(const U32& counter) { + return Inst(Opcode::DataAppend, counter, Imm32(0)); +} + +U32 IREmitter::DataConsume(const U32& counter) { + return Inst(Opcode::DataConsume, counter, Imm32(0)); +} + U32 IREmitter::LaneId() { return Inst(Opcode::LaneId); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index de8fe450d..01e71893c 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -67,12 +67,14 @@ public: [[nodiscard]] U1 GetVcc(); [[nodiscard]] U32 GetVccLo(); [[nodiscard]] U32 GetVccHi(); + [[nodiscard]] U32 GetM0(); void SetScc(const U1& value); void SetExec(const U1& value); void SetVcc(const U1& value); void SetSccLo(const U32& value); void SetVccLo(const U32& value); void SetVccHi(const U32& value); + void SetM0(const U32& value); [[nodiscard]] U1 Condition(IR::Condition cond); @@ -88,7 +90,7 @@ public: [[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed); [[nodiscard]] U32 ReadConst(const Value& base, const U32& offset); - [[nodiscard]] F32 ReadConstBuffer(const Value& handle, const U32& index); + [[nodiscard]] U32 ReadConstBuffer(const Value& handle, const U32& index); [[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address, BufferInstInfo info); @@ -118,6 +120,8 @@ public: [[nodiscard]] Value BufferAtomicSwap(const Value& handle, const Value& address, const Value& value, BufferInstInfo info); + [[nodiscard]] U32 DataAppend(const U32& counter); + [[nodiscard]] U32 DataConsume(const U32& counter); [[nodiscard]] U32 LaneId(); [[nodiscard]] U32 WarpId(); [[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index); diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index d6ef49cf7..601c453d9 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -51,12 +51,11 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::Discard: case Opcode::DiscardCond: case Opcode::SetAttribute: - case Opcode::StoreBufferF32: - case Opcode::StoreBufferF32x2: - case Opcode::StoreBufferF32x3: - case Opcode::StoreBufferF32x4: - case Opcode::StoreBufferFormatF32: case Opcode::StoreBufferU32: + case Opcode::StoreBufferU32x2: + case Opcode::StoreBufferU32x3: + case Opcode::StoreBufferU32x4: + case Opcode::StoreBufferFormatF32: case Opcode::BufferAtomicIAdd32: case Opcode::BufferAtomicSMin32: case Opcode::BufferAtomicUMin32: @@ -68,6 +67,8 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::BufferAtomicOr32: case Opcode::BufferAtomicXor32: case Opcode::BufferAtomicSwap32: + case Opcode::DataAppend: + case Opcode::DataConsume: case Opcode::WriteSharedU128: case Opcode::WriteSharedU64: case Opcode::WriteSharedU32: diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 40dcfa441..4b922d55b 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -17,8 +17,7 @@ OPCODE(DiscardCond, Void, U1, // Constant memory operations OPCODE(ReadConst, U32, U32x2, U32, ) -OPCODE(ReadConstBuffer, F32, Opaque, U32, ) -OPCODE(ReadConstBufferU32, U32, Opaque, U32, ) +OPCODE(ReadConstBuffer, U32, Opaque, U32, ) // Barriers OPCODE(Barrier, Void, ) @@ -60,12 +59,14 @@ OPCODE(GetExec, U1, Void, OPCODE(GetVcc, U1, Void, ) OPCODE(GetVccLo, U32, Void, ) OPCODE(GetVccHi, U32, Void, ) +OPCODE(GetM0, U32, Void, ) OPCODE(SetScc, Void, U1, ) OPCODE(SetExec, Void, U1, ) OPCODE(SetVcc, Void, U1, ) OPCODE(SetSccLo, Void, U32, ) OPCODE(SetVccLo, Void, U32, ) OPCODE(SetVccHi, Void, U32, ) +OPCODE(SetM0, Void, U32, ) // Undefined OPCODE(UndefU1, U1, ) @@ -75,21 +76,19 @@ OPCODE(UndefU32, U32, OPCODE(UndefU64, U64, ) // Buffer operations -OPCODE(LoadBufferF32, F32, Opaque, Opaque, ) -OPCODE(LoadBufferF32x2, F32x2, Opaque, Opaque, ) -OPCODE(LoadBufferF32x3, F32x3, Opaque, Opaque, ) -OPCODE(LoadBufferF32x4, F32x4, Opaque, Opaque, ) -OPCODE(LoadBufferFormatF32, F32x4, Opaque, Opaque, ) OPCODE(LoadBufferU32, U32, Opaque, Opaque, ) -OPCODE(StoreBufferF32, Void, Opaque, Opaque, F32, ) -OPCODE(StoreBufferF32x2, Void, Opaque, Opaque, F32x2, ) -OPCODE(StoreBufferF32x3, Void, Opaque, Opaque, F32x3, ) -OPCODE(StoreBufferF32x4, Void, Opaque, Opaque, F32x4, ) -OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, F32x4, ) +OPCODE(LoadBufferU32x2, U32x2, Opaque, Opaque, ) +OPCODE(LoadBufferU32x3, U32x3, Opaque, Opaque, ) +OPCODE(LoadBufferU32x4, U32x4, Opaque, Opaque, ) +OPCODE(LoadBufferFormatF32, F32x4, Opaque, Opaque, ) OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, ) +OPCODE(StoreBufferU32x2, Void, Opaque, Opaque, U32x2, ) +OPCODE(StoreBufferU32x3, Void, Opaque, Opaque, U32x3, ) +OPCODE(StoreBufferU32x4, Void, Opaque, Opaque, U32x4, ) +OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, U32x4, ) // Buffer atomic operations -OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 ) +OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 ) @@ -99,7 +98,7 @@ OPCODE(BufferAtomicDec32, U32, Opaq OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, ) OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, ) OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, ) -OPCODE(BufferAtomicSwap32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicSwap32, U32, Opaque, Opaque, U32, ) // Vector utility OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) @@ -343,3 +342,5 @@ OPCODE(QuadShuffle, U32, U32, OPCODE(ReadFirstLane, U32, U32, ) OPCODE(ReadLane, U32, U32, U32 ) OPCODE(WriteLane, U32, U32, U32, U32 ) +OPCODE(DataAppend, U32, U32, U32 ) +OPCODE(DataConsume, U32, U32, U32 ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 025bb98c8..aa5d39ae8 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -3,7 +3,6 @@ #include #include -#include "common/alignment.h" #include "shader_recompiler/info.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/breadth_first_search.h" @@ -42,11 +41,10 @@ bool IsBufferAtomic(const IR::Inst& inst) { bool IsBufferStore(const IR::Inst& inst) { switch (inst.GetOpcode()) { - case IR::Opcode::StoreBufferF32: - case IR::Opcode::StoreBufferF32x2: - case IR::Opcode::StoreBufferF32x3: - case IR::Opcode::StoreBufferF32x4: case IR::Opcode::StoreBufferU32: + case IR::Opcode::StoreBufferU32x2: + case IR::Opcode::StoreBufferU32x3: + case IR::Opcode::StoreBufferU32x4: return true; default: return IsBufferAtomic(inst); @@ -55,25 +53,28 @@ bool IsBufferStore(const IR::Inst& inst) { bool IsBufferInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { - case IR::Opcode::LoadBufferF32: - case IR::Opcode::LoadBufferF32x2: - case IR::Opcode::LoadBufferF32x3: - case IR::Opcode::LoadBufferF32x4: case IR::Opcode::LoadBufferU32: + case IR::Opcode::LoadBufferU32x2: + case IR::Opcode::LoadBufferU32x3: + case IR::Opcode::LoadBufferU32x4: case IR::Opcode::ReadConstBuffer: - case IR::Opcode::ReadConstBufferU32: return true; default: return IsBufferStore(inst); } } +bool IsDataRingInstruction(const IR::Inst& inst) { + return inst.GetOpcode() == IR::Opcode::DataAppend || + inst.GetOpcode() == IR::Opcode::DataConsume; +} + bool IsTextureBufferInstruction(const IR::Inst& inst) { return inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32 || inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32; } -static bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) { +bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) { switch (num_format) { case AmdGpu::NumberFormat::Float: switch (data_format) { @@ -98,19 +99,15 @@ static bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_for IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) { switch (inst.GetOpcode()) { - case IR::Opcode::LoadBufferF32: - case IR::Opcode::LoadBufferF32x2: - case IR::Opcode::LoadBufferF32x3: - case IR::Opcode::LoadBufferF32x4: - case IR::Opcode::ReadConstBuffer: - case IR::Opcode::StoreBufferF32: - case IR::Opcode::StoreBufferF32x2: - case IR::Opcode::StoreBufferF32x3: - case IR::Opcode::StoreBufferF32x4: - return IR::Type::F32; case IR::Opcode::LoadBufferU32: - case IR::Opcode::ReadConstBufferU32: + case IR::Opcode::LoadBufferU32x2: + case IR::Opcode::LoadBufferU32x3: + case IR::Opcode::LoadBufferU32x4: case IR::Opcode::StoreBufferU32: + case IR::Opcode::StoreBufferU32x2: + case IR::Opcode::StoreBufferU32x3: + case IR::Opcode::StoreBufferU32x4: + case IR::Opcode::ReadConstBuffer: case IR::Opcode::BufferAtomicIAdd32: case IR::Opcode::BufferAtomicSwap32: return IR::Type::U32; @@ -191,6 +188,10 @@ public: u32 Add(const BufferResource& desc) { const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) { + // Only one GDS binding can exist. + if (desc.is_gds_buffer && existing.is_gds_buffer) { + return true; + } return desc.sgpr_base == existing.sgpr_base && desc.dword_offset == existing.dword_offset && desc.inline_cbuf == existing.inline_cbuf; @@ -399,8 +400,7 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable); // Address of constant buffer reads can be calculated at IR emittion time. - if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer || - inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) { + if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) { return; } @@ -609,6 +609,51 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip } } +void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info, + Descriptors& descriptors) { + // Insert gds binding in the shader if it doesn't exist already. + // The buffer is used for append/consume counters. + constexpr static AmdGpu::Buffer GdsSharp{.base_address = 1}; + const u32 binding = descriptors.Add(BufferResource{ + .used_types = IR::Type::U32, + .inline_cbuf = GdsSharp, + .is_gds_buffer = true, + .is_written = true, + }); + + const auto pred = [](const IR::Inst* inst) -> std::optional { + if (inst->GetOpcode() == IR::Opcode::GetUserData) { + return inst; + } + return std::nullopt; + }; + + // Attempt to deduce the GDS address of counter at compile time. + const u32 gds_addr = [&] { + const IR::Value& gds_offset = inst.Arg(0); + if (gds_offset.IsImmediate()) { + // Nothing to do, offset is known. + return gds_offset.U32() & 0xFFFF; + } + const auto result = IR::BreadthFirstSearch(&inst, pred); + ASSERT_MSG(result, "Unable to track M0 source"); + + // M0 must be set by some user data register. + const IR::Inst* prod = gds_offset.InstRecursive(); + const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg()); + u32 m0_val = info.user_data[ud_reg] >> 16; + if (prod->GetOpcode() == IR::Opcode::IAdd32) { + m0_val += prod->Arg(1).U32(); + } + return m0_val & 0xFFFF; + }(); + + // Patch instruction. + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.SetArg(0, ir.Imm32(gds_addr >> 2)); + inst.SetArg(1, ir.Imm32(binding)); +} + void ResourceTrackingPass(IR::Program& program) { // Iterate resource instructions and patch them after finding the sharp. auto& info = program.info; @@ -625,6 +670,10 @@ void ResourceTrackingPass(IR::Program& program) { } if (IsImageInstruction(inst)) { PatchImageInstruction(*block, inst, info, descriptors); + continue; + } + if (IsDataRingInstruction(inst)) { + PatchDataRingInstruction(*block, inst, info, descriptors); } } } diff --git a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp index 9edb157db..54dce0355 100644 --- a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp @@ -33,6 +33,7 @@ struct ExecFlagTag : FlagTag {}; struct VccFlagTag : FlagTag {}; struct VccLoTag : FlagTag {}; struct VccHiTag : FlagTag {}; +struct M0Tag : FlagTag {}; struct GotoVariable : FlagTag { GotoVariable() = default; @@ -43,8 +44,17 @@ struct GotoVariable : FlagTag { u32 index; }; -using Variant = std::variant; +struct ThreadBitScalar : FlagTag { + ThreadBitScalar() = default; + explicit ThreadBitScalar(IR::ScalarReg sgpr_) : sgpr{sgpr_} {} + + auto operator<=>(const ThreadBitScalar&) const noexcept = default; + + IR::ScalarReg sgpr; +}; + +using Variant = std::variant; using ValueMap = std::unordered_map; struct DefTable { @@ -69,6 +79,13 @@ struct DefTable { goto_vars[variable.index].insert_or_assign(block, value); } + const IR::Value& Def(IR::Block* block, ThreadBitScalar variable) { + return block->ssa_sreg_values[RegIndex(variable.sgpr)]; + } + void SetDef(IR::Block* block, ThreadBitScalar variable, const IR::Value& value) { + block->ssa_sreg_values[RegIndex(variable.sgpr)] = value; + } + const IR::Value& Def(IR::Block* block, SccFlagTag) { return scc_flag[block]; } @@ -103,6 +120,12 @@ struct DefTable { void SetDef(IR::Block* block, VccFlagTag, const IR::Value& value) { vcc_flag.insert_or_assign(block, value); } + const IR::Value& Def(IR::Block* block, M0Tag) { + return m0_flag[block]; + } + void SetDef(IR::Block* block, M0Tag, const IR::Value& value) { + m0_flag.insert_or_assign(block, value); + } std::unordered_map goto_vars; ValueMap scc_flag; @@ -111,6 +134,7 @@ struct DefTable { ValueMap scc_lo_flag; ValueMap vcc_lo_flag; ValueMap vcc_hi_flag; + ValueMap m0_flag; }; IR::Opcode UndefOpcode(IR::ScalarReg) noexcept { @@ -129,6 +153,10 @@ IR::Opcode UndefOpcode(const VccHiTag) noexcept { return IR::Opcode::UndefU32; } +IR::Opcode UndefOpcode(const M0Tag) noexcept { + return IR::Opcode::UndefU32; +} + IR::Opcode UndefOpcode(const FlagTag) noexcept { return IR::Opcode::UndefU1; } @@ -161,7 +189,7 @@ public: } template - IR::Value ReadVariable(Type variable, IR::Block* root_block, bool is_thread_bit = false) { + IR::Value ReadVariable(Type variable, IR::Block* root_block) { boost::container::small_vector, 64> stack{ ReadState(nullptr), ReadState(root_block), @@ -189,7 +217,7 @@ public: } else if (!block->IsSsaSealed()) { // Incomplete CFG IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; - phi->SetFlags(is_thread_bit ? IR::Type::U1 : IR::TypeOf(UndefOpcode(variable))); + phi->SetFlags(IR::TypeOf(UndefOpcode(variable))); incomplete_phis[block].insert_or_assign(variable, phi); stack.back().result = IR::Value{&*phi}; @@ -202,7 +230,7 @@ public: } else { // Break potential cycles with operandless phi IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; - phi->SetFlags(is_thread_bit ? IR::Type::U1 : IR::TypeOf(UndefOpcode(variable))); + phi->SetFlags(IR::TypeOf(UndefOpcode(variable))); WriteVariable(variable, block, IR::Value{phi}); @@ -251,9 +279,7 @@ private: template IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) { for (IR::Block* const imm_pred : block->ImmPredecessors()) { - const bool is_thread_bit = - std::is_same_v && phi.Flags() == IR::Type::U1; - phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred, is_thread_bit)); + phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred)); } return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable)); } @@ -301,7 +327,11 @@ private: void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { const IR::Opcode opcode{inst.GetOpcode()}; switch (opcode) { - case IR::Opcode::SetThreadBitScalarReg: + case IR::Opcode::SetThreadBitScalarReg: { + const IR::ScalarReg reg{inst.Arg(0).ScalarReg()}; + pass.WriteVariable(ThreadBitScalar{reg}, block, inst.Arg(1)); + break; + } case IR::Opcode::SetScalarRegister: { const IR::ScalarReg reg{inst.Arg(0).ScalarReg()}; pass.WriteVariable(reg, block, inst.Arg(1)); @@ -330,11 +360,18 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::SetVccHi: pass.WriteVariable(VccHiTag{}, block, inst.Arg(0)); break; - case IR::Opcode::GetThreadBitScalarReg: + case IR::Opcode::SetM0: + pass.WriteVariable(M0Tag{}, block, inst.Arg(0)); + break; + case IR::Opcode::GetThreadBitScalarReg: { + const IR::ScalarReg reg{inst.Arg(0).ScalarReg()}; + const IR::Value value = pass.ReadVariable(ThreadBitScalar{reg}, block); + inst.ReplaceUsesWith(value); + break; + } case IR::Opcode::GetScalarRegister: { const IR::ScalarReg reg{inst.Arg(0).ScalarReg()}; - const bool thread_bit = opcode == IR::Opcode::GetThreadBitScalarReg; - const IR::Value value = pass.ReadVariable(reg, block, thread_bit); + const IR::Value value = pass.ReadVariable(reg, block); inst.ReplaceUsesWith(value); break; } @@ -362,6 +399,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetVccHi: inst.ReplaceUsesWith(pass.ReadVariable(VccHiTag{}, block)); break; + case IR::Opcode::GetM0: + inst.ReplaceUsesWith(pass.ReadVariable(M0Tag{}, block)); + break; default: break; } diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 776fd90a6..1bb065544 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -107,6 +107,7 @@ struct RuntimeInfo { Stage stage; u32 num_user_data; u32 num_input_vgprs; + u32 num_allocated_vgprs; VertexRuntimeInfo vs_info; FragmentRuntimeInfo fs_info; ComputeRuntimeInfo cs_info; diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index cee30f755..cbc18aa43 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -465,6 +465,14 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); event_eos->SignalFence(); + if (event_eos->command == PM4CmdEventWriteEos::Command::GdsStore) { + ASSERT(event_eos->size == 1); + if (rasterizer) { + rasterizer->Finish(); + const u32 value = rasterizer->ReadDataFromGds(event_eos->gds_index); + *event_eos->Address() = value; + } + } break; } case PM4ItOpcode::EventWriteEop: { @@ -474,6 +482,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); + if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Gds) { + rasterizer->InlineDataToGds(dma_data->dst_addr_lo, dma_data->data); + } break; } case PM4ItOpcode::WriteData: { diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index 58ade221b..fd7980c17 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -350,6 +350,17 @@ struct PM4CmdEventWriteEop { } }; +enum class DmaDataDst : u32 { + Memory = 0, + Gds = 1, +}; + +enum class DmaDataSrc : u32 { + Memory = 0, + Gds = 1, + Data = 2, +}; + struct PM4DmaData { PM4Type3Header header; union { @@ -357,11 +368,11 @@ struct PM4DmaData { BitField<12, 1, u32> src_atc; BitField<13, 2, u32> src_cache_policy; BitField<15, 1, u32> src_volatile; - BitField<20, 2, u32> dst_sel; + BitField<20, 2, DmaDataDst> dst_sel; BitField<24, 1, u32> dst_atc; BitField<25, 2, u32> dst_cache_policy; BitField<27, 1, u32> dst_volatile; - BitField<29, 2, u32> src_sel; + BitField<29, 2, DmaDataSrc> src_sel; BitField<31, 1, u32> cp_sync; }; union { @@ -502,13 +513,17 @@ struct PM4CmdEventWriteEos { } void SignalFence() const { - switch (command.Value()) { + const auto cmd = command.Value(); + switch (cmd) { case Command::SingalFence: { *Address() = DataDWord(); break; } + case Command::GdsStore: { + break; + } default: { - UNREACHABLE(); + UNREACHABLE_MSG("Unknown command {}", u32(cmd)); } } } diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 89032e990..86af05bf1 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -15,8 +15,9 @@ namespace VideoCore { static constexpr size_t NumVertexBuffers = 32; -static constexpr size_t StagingBufferSize = 512_MB; -static constexpr size_t UboStreamBufferSize = 64_MB; +static constexpr size_t GdsBufferSize = 64_KB; +static constexpr size_t StagingBufferSize = 1_GB; +static constexpr size_t UboStreamBufferSize = 128_MB; BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, const AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_, @@ -25,7 +26,10 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s texture_cache{texture_cache_}, tracker{tracker_}, staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize}, stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize}, + gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, GdsBufferSize}, memory_tracker{&tracker} { + Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer"); + // Ensure the first slot is used for the null buffer void(slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, ReadFlags, 1)); } @@ -232,6 +236,27 @@ u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) { return regs.num_indices; } +void BufferCache::InlineDataToGds(u32 gds_offset, u32 value) { + ASSERT_MSG(gds_offset % 4 == 0, "GDS offset must be dword aligned"); + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + const vk::BufferMemoryBarrier2 buf_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, + .buffer = gds_buffer.Handle(), + .offset = gds_offset, + .size = sizeof(u32), + }; + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &buf_barrier, + }); + cmdbuf.updateBuffer(gds_buffer.Handle(), gds_offset, sizeof(u32), &value); +} + std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written, bool is_texel_buffer) { static constexpr u64 StreamThreshold = CACHING_PAGESIZE; @@ -258,6 +283,7 @@ std::pair BufferCache::ObtainTempBuffer(VAddr gpu_addr, u32 size) if (buffer_id) { Buffer& buffer = slot_buffers[buffer_id]; if (buffer.IsInBounds(gpu_addr, size)) { + SynchronizeBuffer(buffer, gpu_addr, size, false); return {&buffer, buffer.Offset(gpu_addr)}; } } @@ -541,64 +567,48 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, } bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) { - boost::container::small_vector image_ids; - const u32 inv_size = std::min(size, MaxInvalidateDist); - texture_cache.ForEachImageInRegion(device_addr, inv_size, [&](ImageId image_id, Image& image) { - // Only consider GPU modified images, i.e render targets or storage images. - // Also avoid any CPU modified images as the image data is likely to be stale. - if (True(image.flags & ImageFlagBits::CpuModified) || - False(image.flags & ImageFlagBits::GpuModified)) { - return; - } - // Image must fully overlap with the provided buffer range. - if (image.cpu_addr < device_addr || image.cpu_addr_end > device_addr + size) { - return; - } - image_ids.push_back(image_id); - }); - if (image_ids.empty()) { + static constexpr FindFlags find_flags = + FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize; + ImageInfo info{}; + info.guest_address = device_addr; + info.guest_size_bytes = size; + const ImageId image_id = texture_cache.FindImage(info, find_flags); + if (!image_id) { return false; } - // Sort images by modification tick. If there are overlaps we want to - // copy from least to most recently modified. - std::ranges::sort(image_ids, [&](ImageId lhs_id, ImageId rhs_id) { - const Image& lhs = texture_cache.GetImage(lhs_id); - const Image& rhs = texture_cache.GetImage(rhs_id); - return lhs.tick_accessed_last < rhs.tick_accessed_last; - }); - boost::container::small_vector copies; - for (const ImageId image_id : image_ids) { - copies.clear(); - Image& image = texture_cache.GetImage(image_id); - u32 offset = buffer.Offset(image.cpu_addr); - const u32 num_layers = image.info.resources.layers; - for (u32 m = 0; m < image.info.resources.levels; m++) { - const u32 width = std::max(image.info.size.width >> m, 1u); - const u32 height = std::max(image.info.size.height >> m, 1u); - const u32 depth = - image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; - const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; - copies.push_back({ - .bufferOffset = offset, - .bufferRowLength = static_cast(mip_pitch), - .bufferImageHeight = static_cast(mip_height), - .imageSubresource{ - .aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil, - .mipLevel = m, - .baseArrayLayer = 0, - .layerCount = num_layers, - }, - .imageOffset = {0, 0, 0}, - .imageExtent = {width, height, depth}, - }); - offset += mip_ofs * num_layers; - } - scheduler.EndRendering(); - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead); - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, - copies); + Image& image = texture_cache.GetImage(image_id); + if (image.info.guest_size_bytes > size) { + return false; } + boost::container::small_vector copies; + u32 offset = buffer.Offset(image.cpu_addr); + const u32 num_layers = image.info.resources.layers; + for (u32 m = 0; m < image.info.resources.levels; m++) { + const u32 width = std::max(image.info.size.width >> m, 1u); + const u32 height = std::max(image.info.size.height >> m, 1u); + const u32 depth = + image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; + const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; + copies.push_back({ + .bufferOffset = offset, + .bufferRowLength = static_cast(mip_pitch), + .bufferImageHeight = static_cast(mip_height), + .imageSubresource{ + .aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil, + .mipLevel = m, + .baseArrayLayer = 0, + .layerCount = num_layers, + }, + .imageOffset = {0, 0, 0}, + .imageExtent = {width, height, depth}, + }); + offset += mip_ofs * num_layers; + } + scheduler.EndRendering(); + image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead); + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, + copies); return true; } diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b38b00f07..cd6ea28fc 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -57,6 +57,11 @@ public: PageManager& tracker); ~BufferCache(); + /// Returns a pointer to GDS device local buffer. + [[nodiscard]] const Buffer* GetGdsBuffer() const noexcept { + return &gds_buffer; + } + /// Invalidates any buffer in the logical page range. void InvalidateMemory(VAddr device_addr, u64 size); @@ -66,6 +71,9 @@ public: /// Bind host index buffer for the current draw. u32 BindIndexBuffer(bool& is_indexed, u32 index_offset); + /// Writes a value to GDS buffer. + void InlineDataToGds(u32 gds_offset, u32 value); + /// Obtains a buffer for the specified region. [[nodiscard]] std::pair ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written, bool is_texel_buffer = false); @@ -130,6 +138,7 @@ private: PageManager& tracker; StreamBuffer staging_buffer; StreamBuffer stream_buffer; + Buffer gds_buffer; std::mutex mutex; Common::SlotVector slot_buffers; MemoryTracker memory_tracker; diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 40a1124a6..a97c3dee9 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -585,11 +585,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu vk::Format AdjustColorBufferFormat(vk::Format base_format, Liverpool::ColorBuffer::SwapMode comp_swap, bool is_vo_surface) { - ASSERT_MSG(comp_swap == Liverpool::ColorBuffer::SwapMode::Standard || - comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate, - "Unsupported component swap mode {}", static_cast(comp_swap)); - const bool comp_swap_alt = comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate; + const bool comp_swap_reverse = comp_swap == Liverpool::ColorBuffer::SwapMode::StandardReverse; + const bool comp_swap_alt_reverse = + comp_swap == Liverpool::ColorBuffer::SwapMode::AlternateReverse; if (comp_swap_alt) { switch (base_format) { case vk::Format::eR8G8B8A8Unorm: @@ -605,6 +604,18 @@ vk::Format AdjustColorBufferFormat(vk::Format base_format, default: break; } + } else if (comp_swap_reverse) { + switch (base_format) { + case vk::Format::eR8G8B8A8Unorm: + return vk::Format::eA8B8G8R8UnormPack32; + case vk::Format::eR8G8B8A8Srgb: + return is_vo_surface ? vk::Format::eA8B8G8R8UnormPack32 + : vk::Format::eA8B8G8R8SrgbPack32; + default: + break; + } + } else if (comp_swap_alt_reverse) { + return base_format; } else { if (is_vo_surface && base_format == vk::Format::eR8G8B8A8Srgb) { return vk::Format::eR8G8B8A8Unorm; @@ -649,8 +660,8 @@ void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) { *out_data++ = i; *out_data++ = i + 1; *out_data++ = i + 2; - *out_data++ = i + 2; *out_data++ = i; + *out_data++ = i + 2; *out_data++ = i + 3; } } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index b87d3c915..aeae08138 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -109,37 +109,42 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, u32 binding{}; for (const auto& desc : info->buffers) { - const auto vsharp = desc.GetSharp(*info); - const bool is_storage = desc.IsStorage(vsharp); - const VAddr address = vsharp.base_address; - // Most of the time when a metadata is updated with a shader it gets cleared. It means we - // can skip the whole dispatch and update the tracked state instead. Also, it is not - // intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we will - // need its full emulation anyways. For cases of metadata read a warning will be logged. - if (desc.is_written) { - if (texture_cache.TouchMeta(address, true)) { - LOG_TRACE(Render_Vulkan, "Metadata update skipped"); - return false; - } + bool is_storage = true; + if (desc.is_gds_buffer) { + auto* vk_buffer = buffer_cache.GetGdsBuffer(); + buffer_infos.emplace_back(vk_buffer->Handle(), 0, vk_buffer->SizeBytes()); } else { - if (texture_cache.IsMeta(address)) { - LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)"); + const auto vsharp = desc.GetSharp(*info); + is_storage = desc.IsStorage(vsharp); + const VAddr address = vsharp.base_address; + // Most of the time when a metadata is updated with a shader it gets cleared. It means + // we can skip the whole dispatch and update the tracked state instead. Also, it is not + // intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we + // will need its full emulation anyways. For cases of metadata read a warning will be + // logged. + if (desc.is_written) { + if (texture_cache.TouchMeta(address, true)) { + LOG_TRACE(Render_Vulkan, "Metadata update skipped"); + return false; + } + } else { + if (texture_cache.IsMeta(address)) { + LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)"); + } } + const u32 size = vsharp.GetSize(); + const u32 alignment = + is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment(); + const auto [vk_buffer, offset] = + buffer_cache.ObtainBuffer(address, size, desc.is_written); + const u32 offset_aligned = Common::AlignDown(offset, alignment); + const u32 adjust = offset - offset_aligned; + if (adjust != 0) { + ASSERT(adjust % 4 == 0); + push_data.AddOffset(binding, adjust); + } + buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust); } - const u32 size = vsharp.GetSize(); - if (desc.is_written) { - texture_cache.InvalidateMemory(address, size); - } - const u32 alignment = - is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment(); - const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(address, size, desc.is_written); - const u32 offset_aligned = Common::AlignDown(offset, alignment); - const u32 adjust = offset - offset_aligned; - if (adjust != 0) { - ASSERT(adjust % 4 == 0); - push_data.AddOffset(binding, adjust); - } - buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust); set_writes.push_back({ .dstSet = VK_NULL_HANDLE, .dstBinding = binding++, @@ -188,7 +193,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, buffer_barriers.emplace_back(*barrier); } if (desc.is_written) { - texture_cache.InvalidateMemory(address, size); + texture_cache.MarkWritten(address, size); } } set_writes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 6ac4dcf14..a548b70a4 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -432,7 +432,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, buffer_barriers.emplace_back(*barrier); } if (desc.is_written) { - texture_cache.InvalidateMemory(address, size); + texture_cache.MarkWritten(address, size); } } set_writes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b5435af1f..b4b256bb0 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -76,6 +76,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { case Shader::Stage::Vertex: { info.num_user_data = regs.vs_program.settings.num_user_regs; info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt; + info.num_allocated_vgprs = regs.vs_program.settings.num_vgprs * 4; GatherVertexOutputs(info.vs_info, regs.vs_output_control); info.vs_info.emulate_depth_negative_one_to_one = !instance.IsDepthClipControlSupported() && @@ -84,6 +85,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { } case Shader::Stage::Fragment: { info.num_user_data = regs.ps_program.settings.num_user_regs; + info.num_allocated_vgprs = regs.ps_program.settings.num_vgprs * 4; std::ranges::transform(graphics_key.mrt_swizzles, info.fs_info.mrt_swizzles.begin(), [](Liverpool::ColorBuffer::SwapMode mode) { return static_cast(mode); @@ -102,6 +104,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { case Shader::Stage::Compute: { const auto& cs_pgm = regs.cs_program; info.num_user_data = cs_pgm.settings.num_user_regs; + info.num_allocated_vgprs = regs.cs_program.settings.num_vgprs * 4; info.cs_info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full, cs_pgm.num_thread_z.full}; info.cs_info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1), @@ -295,6 +298,16 @@ bool PipelineCache::RefreshGraphicsKey() { return false; } + static bool TessMissingLogged = false; + if (auto* pgm = regs.ProgramForStage(3); + regs.stage_enable.IsStageEnabled(3) && pgm->Address() != 0) { + if (!TessMissingLogged) { + LOG_WARNING(Render_Vulkan, "Tess pipeline compilation skipped"); + TessMissingLogged = true; + } + return false; + } + std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(stage, params, binding); } return true; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 9f72d0448..6344315a5 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -175,6 +175,10 @@ u64 Rasterizer::Flush() { return current_tick; } +void Rasterizer::Finish() { + scheduler.Finish(); +} + void Rasterizer::BeginRendering() { const auto& regs = liverpool->regs; RenderState state; @@ -251,6 +255,17 @@ void Rasterizer::BeginRendering() { scheduler.BeginRendering(state); } +void Rasterizer::InlineDataToGds(u32 gds_offset, u32 value) { + buffer_cache.InlineDataToGds(gds_offset, value); +} + +u32 Rasterizer::ReadDataFromGds(u32 gds_offset) { + auto* gds_buf = buffer_cache.GetGdsBuffer(); + u32 value; + std::memcpy(&value, gds_buf->mapped_data.data() + gds_offset, sizeof(u32)); + return value; +} + void Rasterizer::InvalidateMemory(VAddr addr, u64 size) { buffer_cache.InvalidateMemory(addr, size); texture_cache.InvalidateMemory(addr, size); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 43ab4756d..5aa90c5cc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -41,12 +41,15 @@ public: void ScopeMarkerEnd(); void ScopedMarkerInsert(const std::string_view& str); + void InlineDataToGds(u32 gds_offset, u32 value); + u32 ReadDataFromGds(u32 gsd_offset); void InvalidateMemory(VAddr addr, u64 size); void MapMemory(VAddr addr, u64 size); void UnmapMemory(VAddr addr, u64 size); void CpSync(); u64 Flush(); + void Finish(); private: void BeginRendering(); diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index f932b25a0..1bbb975ba 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -32,7 +32,6 @@ enum ImageFlagBits : u32 { Registered = 1 << 6, ///< True when the image is registered Picked = 1 << 7, ///< Temporary flag to mark the image as picked MetaRegistered = 1 << 8, ///< True when metadata for this surface is known and registered - Deleted = 1 << 9, ///< Indicates that images was marked for deletion once frame is done }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 66fde5c83..0b0f4278d 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -204,8 +204,8 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, bool force_depth /*= false*/) n tiling_mode = image.GetTilingMode(); pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt()); // Override format if image is forced to be a depth target - if (force_depth || tiling_mode == AmdGpu::TilingMode::Depth_MacroTiled) { - if (pixel_format == vk::Format::eR32Sfloat) { + if (force_depth) { + if (pixel_format == vk::Format::eR32Sfloat || pixel_format == vk::Format::eR8Unorm) { pixel_format = vk::Format::eD32SfloatS8Uint; } else if (pixel_format == vk::Format::eR16Unorm) { pixel_format = vk::Format::eD16UnormS8Uint; diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index e554bad7e..bb2d90530 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -128,6 +128,10 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info format = image.info.pixel_format; aspect = vk::ImageAspectFlagBits::eDepth; } + if (image.aspect_mask & vk::ImageAspectFlagBits::eStencil && format == vk::Format::eR8Unorm) { + format = image.info.pixel_format; + aspect = vk::ImageAspectFlagBits::eStencil; + } const vk::ImageViewCreateInfo image_view_ci = { .pNext = usage_override ? &usage_ci : nullptr, diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 996fcad04..37bb5da14 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -40,17 +40,27 @@ TextureCache::~TextureCache() = default; void TextureCache::InvalidateMemory(VAddr address, size_t size) { std::scoped_lock lock{mutex}; ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) { - const size_t image_dist = - image.cpu_addr > address ? image.cpu_addr - address : address - image.cpu_addr; - if (image_dist < MaxInvalidateDist) { - // Ensure image is reuploaded when accessed again. - image.flags |= ImageFlagBits::CpuModified; - } + // Ensure image is reuploaded when accessed again. + image.flags |= ImageFlagBits::CpuModified; // Untrack image, so the range is unprotected and the guest can write freely. UntrackImage(image_id); }); } +void TextureCache::MarkWritten(VAddr address, size_t max_size) { + static constexpr FindFlags find_flags = + FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize; + ImageInfo info{}; + info.guest_address = address; + info.guest_size_bytes = max_size; + const ImageId image_id = FindImage(info, find_flags); + if (!image_id) { + return; + } + // Ensure image is copied when accessed again. + slot_images[image_id].flags |= ImageFlagBits::CpuModified; +} + void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) { std::scoped_lock lk{mutex}; @@ -199,10 +209,14 @@ ImageId TextureCache::FindImage(const ImageInfo& info, FindFlags flags) { !IsVulkanFormatCompatible(info.pixel_format, cache_image.info.pixel_format)) { continue; } - ASSERT(cache_image.info.type == info.type); + ASSERT(cache_image.info.type == info.type || True(flags & FindFlags::RelaxFmt)); image_id = cache_id; } + if (True(flags & FindFlags::NoCreate) && !image_id) { + return {}; + } + // Try to resolve overlaps (if any) if (!image_id) { for (const auto& cache_id : image_ids) { @@ -211,10 +225,6 @@ ImageId TextureCache::FindImage(const ImageInfo& info, FindFlags flags) { } } - if (True(flags & FindFlags::NoCreate) && !image_id) { - return {}; - } - // Create and register a new image if (!image_id) { image_id = slot_images.insert(instance, scheduler, info); @@ -251,9 +261,6 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) { const ImageId image_id = FindImage(info); Image& image = slot_images[image_id]; - if (view_info.is_storage) { - image.flags |= ImageFlagBits::GpuModified; - } UpdateImage(image_id); auto& usage = image.info.usage; @@ -351,7 +358,6 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule if (False(image.flags & ImageFlagBits::CpuModified)) { return; } - // Mark image as validated. image.flags &= ~ImageFlagBits::CpuModified; @@ -485,8 +491,6 @@ void TextureCache::DeleteImage(ImageId image_id) { ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); - image.flags |= ImageFlagBits::Deleted; - // Remove any registered meta areas. const auto& meta_info = image.info.meta_info; if (meta_info.cmask_addr) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 44bc2b431..cc19ac4a8 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -50,6 +50,9 @@ public: /// Invalidates any image in the logical page range. void InvalidateMemory(VAddr address, size_t size); + /// Marks an image as dirty if it exists at the provided address. + void MarkWritten(VAddr address, size_t max_size); + /// Evicts any images that overlap the unmapped range. void UnmapMemory(VAddr cpu_addr, size_t size);