Merge branch 'main' into m4aac

This commit is contained in:
georgemoralis 2025-06-09 23:11:38 +03:00 committed by GitHub
commit dd7c1cbd86
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
75 changed files with 1754 additions and 1172 deletions

View File

@ -76,18 +76,13 @@ jobs:
${{ env.cache-name }}- ${{ env.cache-name }}-
- name: Cache CMake Build - name: Cache CMake Build
uses: hendrikmuhs/ccache-action@v1.2.17 uses: hendrikmuhs/ccache-action@v1.2.18
env: env:
cache-name: ${{ runner.os }}-sdl-cache-cmake-build cache-name: ${{ runner.os }}-sdl-cache-cmake-build
with: with:
append-timestamp: false append-timestamp: false
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
- name: Setup VS Environment
uses: ilammy/msvc-dev-cmd@v1.13.0
with:
arch: amd64
- name: Configure CMake - name: Configure CMake
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
@ -111,7 +106,7 @@ jobs:
- name: Setup Qt - name: Setup Qt
uses: jurplel/install-qt-action@v4 uses: jurplel/install-qt-action@v4
with: with:
version: 6.9.0 version: 6.9.1
host: windows host: windows
target: desktop target: desktop
arch: win64_msvc2022_64 arch: win64_msvc2022_64
@ -130,18 +125,13 @@ jobs:
${{ env.cache-name }}- ${{ env.cache-name }}-
- name: Cache CMake Build - name: Cache CMake Build
uses: hendrikmuhs/ccache-action@v1.2.17 uses: hendrikmuhs/ccache-action@v1.2.18
env: env:
cache-name: ${{ runner.os }}-qt-cache-cmake-build cache-name: ${{ runner.os }}-qt-cache-cmake-build
with: with:
append-timestamp: false append-timestamp: false
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
- name: Setup VS Environment
uses: ilammy/msvc-dev-cmd@v1.13.0
with:
arch: amd64
- name: Configure CMake - name: Configure CMake
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
@ -186,7 +176,7 @@ jobs:
${{ env.cache-name }}- ${{ env.cache-name }}-
- name: Cache CMake Build - name: Cache CMake Build
uses: hendrikmuhs/ccache-action@v1.2.17 uses: hendrikmuhs/ccache-action@v1.2.18
env: env:
cache-name: ${{runner.os}}-sdl-cache-cmake-build cache-name: ${{runner.os}}-sdl-cache-cmake-build
with: with:
@ -228,7 +218,7 @@ jobs:
- name: Setup Qt - name: Setup Qt
uses: jurplel/install-qt-action@v4 uses: jurplel/install-qt-action@v4
with: with:
version: 6.9.0 version: 6.9.1
host: mac host: mac
target: desktop target: desktop
arch: clang_64 arch: clang_64
@ -247,7 +237,7 @@ jobs:
${{ env.cache-name }}- ${{ env.cache-name }}-
- name: Cache CMake Build - name: Cache CMake Build
uses: hendrikmuhs/ccache-action@v1.2.17 uses: hendrikmuhs/ccache-action@v1.2.18
env: env:
cache-name: ${{runner.os}}-qt-cache-cmake-build cache-name: ${{runner.os}}-qt-cache-cmake-build
with: with:
@ -301,7 +291,7 @@ jobs:
${{ env.cache-name }}- ${{ env.cache-name }}-
- name: Cache CMake Build - name: Cache CMake Build
uses: hendrikmuhs/ccache-action@v1.2.17 uses: hendrikmuhs/ccache-action@v1.2.18
env: env:
cache-name: ${{ runner.os }}-sdl-cache-cmake-build cache-name: ${{ runner.os }}-sdl-cache-cmake-build
with: with:
@ -362,7 +352,7 @@ jobs:
${{ env.cache-name }}- ${{ env.cache-name }}-
- name: Cache CMake Build - name: Cache CMake Build
uses: hendrikmuhs/ccache-action@v1.2.17 uses: hendrikmuhs/ccache-action@v1.2.18
env: env:
cache-name: ${{ runner.os }}-qt-cache-cmake-build cache-name: ${{ runner.os }}-qt-cache-cmake-build
with: with:
@ -409,7 +399,7 @@ jobs:
${{ env.cache-name }}- ${{ env.cache-name }}-
- name: Cache CMake Build - name: Cache CMake Build
uses: hendrikmuhs/ccache-action@v1.2.17 uses: hendrikmuhs/ccache-action@v1.2.18
env: env:
cache-name: ${{ runner.os }}-sdl-gcc-cache-cmake-build cache-name: ${{ runner.os }}-sdl-gcc-cache-cmake-build
with: with:
@ -445,7 +435,7 @@ jobs:
${{ env.cache-name }}- ${{ env.cache-name }}-
- name: Cache CMake Build - name: Cache CMake Build
uses: hendrikmuhs/ccache-action@v1.2.17 uses: hendrikmuhs/ccache-action@v1.2.18
env: env:
cache-name: ${{ runner.os }}-qt-gcc-cache-cmake-build cache-name: ${{ runner.os }}-qt-gcc-cache-cmake-build
with: with:
@ -494,7 +484,7 @@ jobs:
with: with:
token: ${{ secrets.SHADPS4_TOKEN_REPO }} token: ${{ secrets.SHADPS4_TOKEN_REPO }}
name: "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}" name: "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}"
tag: "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}" tag: "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.fullhash }}"
draft: false draft: false
prerelease: true prerelease: true
body: "Full Changelog: [${{ env.last_release_tag }}...${{ needs.get-info.outputs.shorthash }}](https://github.com/shadps4-emu/shadPS4/compare/${{ env.last_release_tag }}...${{ needs.get-info.outputs.fullhash }})" body: "Full Changelog: [${{ env.last_release_tag }}...${{ needs.get-info.outputs.shorthash }}](https://github.com/shadps4-emu/shadPS4/compare/${{ env.last_release_tag }}...${{ needs.get-info.outputs.fullhash }})"
@ -530,14 +520,14 @@ jobs:
# Check if release already exists and get ID # Check if release already exists and get ID
release_id=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \ release_id=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \
"https://api.github.com/repos/$REPO/releases/tags/Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}" | jq -r '.id') "https://api.github.com/repos/$REPO/releases/tags/Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.fullhash }}" | jq -r '.id')
if [[ "$release_id" == "null" ]]; then if [[ "$release_id" == "null" ]]; then
echo "Creating release in $REPO for $filename" echo "Creating release in $REPO for $filename"
release_id=$(curl -s -X POST -H "Authorization: token $GITHUB_TOKEN" \ release_id=$(curl -s -X POST -H "Authorization: token $GITHUB_TOKEN" \
-H "Accept: application/vnd.github.v3+json" \ -H "Accept: application/vnd.github.v3+json" \
-d '{ -d '{
"tag_name": "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}", "tag_name": "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.fullhash }}",
"name": "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}", "name": "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}",
"draft": false, "draft": false,
"prerelease": true, "prerelease": true,

View File

@ -966,6 +966,7 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
src/video_core/texture_cache/tile_manager.cpp src/video_core/texture_cache/tile_manager.cpp
src/video_core/texture_cache/tile_manager.h src/video_core/texture_cache/tile_manager.h
src/video_core/texture_cache/types.h src/video_core/texture_cache/types.h
src/video_core/texture_cache/host_compatibility.cpp
src/video_core/texture_cache/host_compatibility.h src/video_core/texture_cache/host_compatibility.h
src/video_core/page_manager.cpp src/video_core/page_manager.cpp
src/video_core/page_manager.h src/video_core/page_manager.h

View File

@ -88,7 +88,8 @@ static bool FilterTcbAccess(const ZydisDecodedOperand* operands) {
dst_op.reg.value <= ZYDIS_REGISTER_R15; dst_op.reg.value <= ZYDIS_REGISTER_R15;
} }
static void GenerateTcbAccess(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) { static void GenerateTcbAccess(void* /* address */, const ZydisDecodedOperand* operands,
Xbyak::CodeGenerator& c) {
const auto dst = ZydisToXbyakRegisterOperand(operands[0]); const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
#if defined(_WIN32) #if defined(_WIN32)
@ -126,7 +127,8 @@ static bool FilterNoSSE4a(const ZydisDecodedOperand*) {
return !cpu.has(Cpu::tSSE4a); return !cpu.has(Cpu::tSSE4a);
} }
static void GenerateEXTRQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) { static void GenerateEXTRQ(void* /* address */, const ZydisDecodedOperand* operands,
Xbyak::CodeGenerator& c) {
bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE && bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE; operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
@ -245,7 +247,8 @@ static void GenerateEXTRQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenera
} }
} }
static void GenerateINSERTQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) { static void GenerateINSERTQ(void* /* address */, const ZydisDecodedOperand* operands,
Xbyak::CodeGenerator& c) {
bool immediateForm = operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE && bool immediateForm = operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
operands[3].type == ZYDIS_OPERAND_TYPE_IMMEDIATE; operands[3].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
@ -383,8 +386,44 @@ static void GenerateINSERTQ(const ZydisDecodedOperand* operands, Xbyak::CodeGene
} }
} }
static void ReplaceMOVNT(void* address, u8 rep_prefix) {
// Find the opcode byte
// There can be any amount of prefixes but the instruction can't be more than 15 bytes
// And we know for sure this is a MOVNTSS/MOVNTSD
bool found = false;
bool rep_prefix_found = false;
int index = 0;
u8* ptr = reinterpret_cast<u8*>(address);
for (int i = 0; i < 15; i++) {
if (ptr[i] == rep_prefix) {
rep_prefix_found = true;
} else if (ptr[i] == 0x2B) {
index = i;
found = true;
break;
}
}
// Some sanity checks
ASSERT(found);
ASSERT(index >= 2);
ASSERT(ptr[index - 1] == 0x0F);
ASSERT(rep_prefix_found);
// This turns the MOVNTSS/MOVNTSD to a MOVSS/MOVSD m, xmm
ptr[index] = 0x11;
}
static void ReplaceMOVNTSS(void* address, const ZydisDecodedOperand*, Xbyak::CodeGenerator&) {
ReplaceMOVNT(address, 0xF3);
}
static void ReplaceMOVNTSD(void* address, const ZydisDecodedOperand*, Xbyak::CodeGenerator&) {
ReplaceMOVNT(address, 0xF2);
}
using PatchFilter = bool (*)(const ZydisDecodedOperand*); using PatchFilter = bool (*)(const ZydisDecodedOperand*);
using InstructionGenerator = void (*)(const ZydisDecodedOperand*, Xbyak::CodeGenerator&); using InstructionGenerator = void (*)(void*, const ZydisDecodedOperand*, Xbyak::CodeGenerator&);
struct PatchInfo { struct PatchInfo {
/// Filter for more granular patch conditions past just the instruction mnemonic. /// Filter for more granular patch conditions past just the instruction mnemonic.
PatchFilter filter; PatchFilter filter;
@ -400,6 +439,8 @@ static const std::unordered_map<ZydisMnemonic, PatchInfo> Patches = {
// SSE4a // SSE4a
{ZYDIS_MNEMONIC_EXTRQ, {FilterNoSSE4a, GenerateEXTRQ, true}}, {ZYDIS_MNEMONIC_EXTRQ, {FilterNoSSE4a, GenerateEXTRQ, true}},
{ZYDIS_MNEMONIC_INSERTQ, {FilterNoSSE4a, GenerateINSERTQ, true}}, {ZYDIS_MNEMONIC_INSERTQ, {FilterNoSSE4a, GenerateINSERTQ, true}},
{ZYDIS_MNEMONIC_MOVNTSS, {FilterNoSSE4a, ReplaceMOVNTSS, false}},
{ZYDIS_MNEMONIC_MOVNTSD, {FilterNoSSE4a, ReplaceMOVNTSD, false}},
#if defined(_WIN32) #if defined(_WIN32)
// Windows needs a trampoline. // Windows needs a trampoline.
@ -477,7 +518,7 @@ static std::pair<bool, u64> TryPatch(u8* code, PatchModule* module) {
auto& trampoline_gen = module->trampoline_gen; auto& trampoline_gen = module->trampoline_gen;
const auto trampoline_ptr = trampoline_gen.getCurr(); const auto trampoline_ptr = trampoline_gen.getCurr();
patch_info.generator(operands, trampoline_gen); patch_info.generator(code, operands, trampoline_gen);
// Return to the following instruction at the end of the trampoline. // Return to the following instruction at the end of the trampoline.
trampoline_gen.jmp(code + instruction.length); trampoline_gen.jmp(code + instruction.length);
@ -485,7 +526,7 @@ static std::pair<bool, u64> TryPatch(u8* code, PatchModule* module) {
// Replace instruction with near jump to the trampoline. // Replace instruction with near jump to the trampoline.
patch_gen.jmp(trampoline_ptr, Xbyak::CodeGenerator::LabelType::T_NEAR); patch_gen.jmp(trampoline_ptr, Xbyak::CodeGenerator::LabelType::T_NEAR);
} else { } else {
patch_info.generator(operands, patch_gen); patch_info.generator(code, operands, patch_gen);
} }
const auto patch_size = patch_gen.getCurr() - code; const auto patch_size = patch_gen.getCurr() - code;

View File

@ -273,6 +273,10 @@ void RegisterKernel(Core::Loader::SymbolsResolver* sym) {
Libraries::Net::sceNetInetNtop); // TODO fix it to sys_ ... Libraries::Net::sceNetInetNtop); // TODO fix it to sys_ ...
LIB_FUNCTION("4n51s0zEf0c", "libScePosix", 1, "libkernel", 1, 1, LIB_FUNCTION("4n51s0zEf0c", "libScePosix", 1, "libkernel", 1, 1,
Libraries::Net::sceNetInetPton); // TODO fix it to sys_ ... Libraries::Net::sceNetInetPton); // TODO fix it to sys_ ...
LIB_FUNCTION("XVL8So3QJUk", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_connect);
LIB_FUNCTION("3e+4Iv7IJ8U", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_accept);
LIB_FUNCTION("aNeavPDNKzA", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_sendmsg);
LIB_FUNCTION("pxnCmagrtao", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_listen);
} }
} // namespace Libraries::Kernel } // namespace Libraries::Kernel

View File

@ -222,9 +222,10 @@ s32 PS4_SYSV_ABI sceKernelMapDirectMemory2(void** addr, u64 len, s32 type, s32 p
return ret; return ret;
} }
s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t len, int prot, s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags,
int flags, const char* name) { const char* name) {
LOG_INFO(Kernel_Vmm, "in_addr = {}, len = {:#x}, prot = {:#x}, flags = {:#x}, name = '{}'",
fmt::ptr(*addr_in_out), len, prot, flags, name);
if (len == 0 || !Common::Is16KBAligned(len)) { if (len == 0 || !Common::Is16KBAligned(len)) {
LOG_ERROR(Kernel_Vmm, "len is 0 or not 16kb multiple"); LOG_ERROR(Kernel_Vmm, "len is 0 or not 16kb multiple");
return ORBIS_KERNEL_ERROR_EINVAL; return ORBIS_KERNEL_ERROR_EINVAL;
@ -243,18 +244,14 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t
const VAddr in_addr = reinterpret_cast<VAddr>(*addr_in_out); const VAddr in_addr = reinterpret_cast<VAddr>(*addr_in_out);
const auto mem_prot = static_cast<Core::MemoryProt>(prot); const auto mem_prot = static_cast<Core::MemoryProt>(prot);
const auto map_flags = static_cast<Core::MemoryMapFlags>(flags); const auto map_flags = static_cast<Core::MemoryMapFlags>(flags);
SCOPE_EXIT {
LOG_INFO(Kernel_Vmm,
"in_addr = {:#x}, out_addr = {}, len = {:#x}, prot = {:#x}, flags = {:#x}",
in_addr, fmt::ptr(*addr_in_out), len, prot, flags);
};
auto* memory = Core::Memory::Instance(); auto* memory = Core::Memory::Instance();
return memory->MapMemory(addr_in_out, in_addr, len, mem_prot, map_flags, const auto ret = memory->MapMemory(addr_in_out, in_addr, len, mem_prot, map_flags,
Core::VMAType::Flexible, name); Core::VMAType::Flexible, name);
LOG_INFO(Kernel_Vmm, "out_addr = {}", fmt::ptr(*addr_in_out));
return ret;
} }
s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot, s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags) {
int flags) {
return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, "anon"); return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, "anon");
} }
@ -663,6 +660,9 @@ int PS4_SYSV_ABI sceKernelSetPrtAperture(int id, VAddr address, size_t size) {
"PRT aperture id = {}, address = {:#x}, size = {:#x} is set but not used", id, "PRT aperture id = {}, address = {:#x}, size = {:#x} is set but not used", id,
address, size); address, size);
auto* memory = Core::Memory::Instance();
memory->SetPrtArea(id, address, size);
PrtApertures[id] = {address, size}; PrtApertures[id] = {address, size};
return ORBIS_OK; return ORBIS_OK;
} }

View File

@ -141,10 +141,9 @@ s32 PS4_SYSV_ABI sceKernelAvailableDirectMemorySize(u64 searchStart, u64 searchE
s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtualQueryInfo* info, s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtualQueryInfo* info,
size_t infoSize); size_t infoSize);
s32 PS4_SYSV_ABI sceKernelReserveVirtualRange(void** addr, u64 len, int flags, u64 alignment); s32 PS4_SYSV_ABI sceKernelReserveVirtualRange(void** addr, u64 len, int flags, u64 alignment);
s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addrInOut, std::size_t len, int prot, s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags,
int flags, const char* name); const char* name);
s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot, s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags);
int flags);
int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot); int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot);
s32 PS4_SYSV_ABI sceKernelMprotect(const void* addr, u64 size, s32 prot); s32 PS4_SYSV_ABI sceKernelMprotect(const void* addr, u64 size, s32 prot);

View File

@ -426,6 +426,7 @@ void RegisterMutex(Core::Loader::SymbolsResolver* sym) {
// Posix // Posix
LIB_FUNCTION("ttHNfU+qDBU", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_init); LIB_FUNCTION("ttHNfU+qDBU", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_init);
LIB_FUNCTION("7H0iTOciTLo", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_lock); LIB_FUNCTION("7H0iTOciTLo", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_lock);
LIB_FUNCTION("Io9+nTKXZtA", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_timedlock);
LIB_FUNCTION("2Z+PpY6CaJg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_unlock); LIB_FUNCTION("2Z+PpY6CaJg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_unlock);
LIB_FUNCTION("ltCfaGr2JGE", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_destroy); LIB_FUNCTION("ltCfaGr2JGE", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_destroy);
LIB_FUNCTION("dQHWEsJtoE4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutexattr_init); LIB_FUNCTION("dQHWEsJtoE4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutexattr_init);

View File

@ -282,7 +282,12 @@ s32 PS4_SYSV_ABI sceVideoOutGetVblankStatus(int handle, SceVideoOutVblankStatus*
s32 PS4_SYSV_ABI sceVideoOutGetResolutionStatus(s32 handle, SceVideoOutResolutionStatus* status) { s32 PS4_SYSV_ABI sceVideoOutGetResolutionStatus(s32 handle, SceVideoOutResolutionStatus* status) {
LOG_INFO(Lib_VideoOut, "called"); LOG_INFO(Lib_VideoOut, "called");
*status = driver->GetPort(handle)->resolution; auto* port = driver->GetPort(handle);
if (!port || !port->is_open) {
return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE;
}
*status = port->resolution;
return ORBIS_OK; return ORBIS_OK;
} }

View File

@ -51,7 +51,7 @@ void ZlibTaskThread(const std::stop_token& stop) {
if (!task_queue_cv.wait(lock, stop, [&] { return !task_queue.empty(); })) { if (!task_queue_cv.wait(lock, stop, [&] { return !task_queue.empty(); })) {
break; break;
} }
task = task_queue.back(); task = task_queue.front();
task_queue.pop(); task_queue.pop();
} }
@ -136,7 +136,7 @@ s32 PS4_SYSV_ABI sceZlibWaitForDone(u64* request_id, const u32* timeout) {
} else { } else {
done_queue_cv.wait(lock, pred); done_queue_cv.wait(lock, pred);
} }
*request_id = done_queue.back(); *request_id = done_queue.front();
done_queue.pop(); done_queue.pop();
} }
return ORBIS_OK; return ORBIS_OK;

View File

@ -332,21 +332,22 @@ bool Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul
sr.type = sym_type; sr.type = sym_type;
const auto* record = m_hle_symbols.FindSymbol(sr); const auto* record = m_hle_symbols.FindSymbol(sr);
if (!record) {
// Check if it an export function
const auto* p = FindExportedModule(*module, *library);
if (p && p->export_sym.GetSize() > 0) {
record = p->export_sym.FindSymbol(sr);
}
}
if (record) { if (record) {
*return_info = *record; *return_info = *record;
Core::Devtools::Widget::ModuleList::AddModule(sr.library); Core::Devtools::Widget::ModuleList::AddModule(sr.library);
return true; return true;
} }
// Check if it an export function
const auto* p = FindExportedModule(*module, *library);
if (p && p->export_sym.GetSize() > 0) {
record = p->export_sym.FindSymbol(sr);
if (record) {
*return_info = *record;
return true;
}
}
const auto aeronid = AeroLib::FindByNid(sr.name.c_str()); const auto aeronid = AeroLib::FindByNid(sr.name.c_str());
if (aeronid) { if (aeronid) {
return_info->name = aeronid->name; return_info->name = aeronid->name;

View File

@ -95,6 +95,46 @@ u64 MemoryManager::ClampRangeSize(VAddr virtual_addr, u64 size) {
return clamped_size; return clamped_size;
} }
void MemoryManager::SetPrtArea(u32 id, VAddr address, u64 size) {
PrtArea& area = prt_areas[id];
if (area.mapped) {
rasterizer->UnmapMemory(area.start, area.end - area.start);
}
area.start = address;
area.end = address + size;
area.mapped = true;
// Pretend the entire PRT area is mapped to avoid GPU tracking errors.
// The caches will use CopySparseMemory to fetch data which avoids unmapped areas.
rasterizer->MapMemory(address, size);
}
void MemoryManager::CopySparseMemory(VAddr virtual_addr, u8* dest, u64 size) {
const bool is_sparse = std::ranges::any_of(
prt_areas, [&](const PrtArea& area) { return area.Overlaps(virtual_addr, size); });
if (!is_sparse) {
std::memcpy(dest, std::bit_cast<const u8*>(virtual_addr), size);
return;
}
auto vma = FindVMA(virtual_addr);
ASSERT_MSG(vma->second.Contains(virtual_addr, 0),
"Attempted to access invalid GPU address {:#x}", virtual_addr);
while (size) {
u64 copy_size = std::min<u64>(vma->second.size - (virtual_addr - vma->first), size);
if (vma->second.IsFree()) {
std::memset(dest, 0, copy_size);
} else {
std::memcpy(dest, std::bit_cast<const u8*>(virtual_addr), copy_size);
}
size -= copy_size;
virtual_addr += copy_size;
dest += copy_size;
++vma;
}
}
bool MemoryManager::TryWriteBacking(void* address, const void* data, u32 num_bytes) { bool MemoryManager::TryWriteBacking(void* address, const void* data, u32 num_bytes) {
const VAddr virtual_addr = std::bit_cast<VAddr>(address); const VAddr virtual_addr = std::bit_cast<VAddr>(address);
const auto& vma = FindVMA(virtual_addr)->second; const auto& vma = FindVMA(virtual_addr)->second;
@ -182,7 +222,6 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size,
auto& area = CarveDmemArea(mapping_start, size)->second; auto& area = CarveDmemArea(mapping_start, size)->second;
area.memory_type = memory_type; area.memory_type = memory_type;
area.is_free = false; area.is_free = false;
MergeAdjacent(dmem_map, dmem_area);
return mapping_start; return mapping_start;
} }

View File

@ -172,6 +172,10 @@ public:
u64 ClampRangeSize(VAddr virtual_addr, u64 size); u64 ClampRangeSize(VAddr virtual_addr, u64 size);
void SetPrtArea(u32 id, VAddr address, u64 size);
void CopySparseMemory(VAddr source, u8* dest, u64 size);
bool TryWriteBacking(void* address, const void* data, u32 num_bytes); bool TryWriteBacking(void* address, const void* data, u32 num_bytes);
void SetupMemoryRegions(u64 flexible_size, bool use_extended_mem1, bool use_extended_mem2); void SetupMemoryRegions(u64 flexible_size, bool use_extended_mem1, bool use_extended_mem2);
@ -275,6 +279,18 @@ private:
size_t pool_budget{}; size_t pool_budget{};
Vulkan::Rasterizer* rasterizer{}; Vulkan::Rasterizer* rasterizer{};
struct PrtArea {
VAddr start;
VAddr end;
bool mapped;
bool Overlaps(VAddr test_address, u64 test_size) const {
const VAddr overlap_end = test_address + test_size;
return start < overlap_end && test_address < end;
}
};
std::array<PrtArea, 3> prt_areas{};
friend class ::Core::Devtools::Widget::MemoryMapViewer; friend class ::Core::Devtools::Widget::MemoryMapViewer;
}; };

View File

@ -5,6 +5,7 @@
#include <set> #include <set>
#include "common/singleton.h" #include "common/singleton.h"
#include "common/types.h"
namespace Core { namespace Core {

View File

@ -137,7 +137,7 @@ tr("The Auto Updater allows up to 60 update checks per hour.\\nYou have reached
} }
} }
latestRev = latestVersion.right(7); latestRev = latestVersion.right(40);
latestDate = jsonObj["published_at"].toString(); latestDate = jsonObj["published_at"].toString();
QJsonArray assets = jsonObj["assets"].toArray(); QJsonArray assets = jsonObj["assets"].toArray();
@ -167,7 +167,7 @@ tr("The Auto Updater allows up to 60 update checks per hour.\\nYou have reached
QDateTime dateTime = QDateTime::fromString(latestDate, Qt::ISODate); QDateTime dateTime = QDateTime::fromString(latestDate, Qt::ISODate);
latestDate = dateTime.isValid() ? dateTime.toString("yyyy-MM-dd HH:mm:ss") : "Unknown date"; latestDate = dateTime.isValid() ? dateTime.toString("yyyy-MM-dd HH:mm:ss") : "Unknown date";
if (latestRev == currentRev.left(7)) { if (latestRev == currentRev) {
if (showMessage) { if (showMessage) {
QMessageBox::information(this, tr("Auto Updater"), QMessageBox::information(this, tr("Auto Updater"),
tr("Your version is already up to date!")); tr("Your version is already up to date!"));
@ -215,7 +215,7 @@ void CheckUpdate::setupUI(const QString& downloadUrl, const QString& latestDate,
"<td>%3</td>" "<td>%3</td>"
"<td>(%4)</td>" "<td>(%4)</td>"
"</tr></table></p>") "</tr></table></p>")
.arg(currentRev.left(7), currentDate, latestRev, latestDate); .arg(currentRev.left(7), currentDate, latestRev.left(7), latestDate);
QLabel* updateLabel = new QLabel(updateText, this); QLabel* updateLabel = new QLabel(updateText, this);
layout->addWidget(updateLabel); layout->addWidget(updateLabel);

View File

@ -2048,7 +2048,7 @@
</message> </message>
<message> <message>
<source> * Unsupported Vulkan Version</source> <source> * Unsupported Vulkan Version</source>
<translation type="unfinished"> * Unsupported Vulkan Version</translation> <translation> * Versão do Vulkan não suportada</translation>
</message> </message>
</context> </context>
<context> <context>

View File

@ -138,7 +138,7 @@
</message> </message>
<message> <message>
<source>File Exists</source> <source>File Exists</source>
<translation>Dosya mevcut</translation> <translation>Dosya Mevcut</translation>
</message> </message>
<message> <message>
<source>File already exists. Do you want to replace it?</source> <source>File already exists. Do you want to replace it?</source>
@ -1221,7 +1221,7 @@
</message> </message>
<message> <message>
<source>Exit shadPS4</source> <source>Exit shadPS4</source>
<translation>shadPS4&apos;ten Çık</translation> <translation>shadPS4 Çıkış</translation>
</message> </message>
<message> <message>
<source>Exit the application.</source> <source>Exit the application.</source>
@ -1381,7 +1381,7 @@
</message> </message>
<message> <message>
<source>Game Boot</source> <source>Game Boot</source>
<translation>Oyun Başlatma</translation> <translation>Oyun Başlat</translation>
</message> </message>
<message> <message>
<source>Only one file can be selected!</source> <source>Only one file can be selected!</source>

View File

@ -303,6 +303,11 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
ctx.AddCapability(spv::Capability::PhysicalStorageBufferAddresses); ctx.AddCapability(spv::Capability::PhysicalStorageBufferAddresses);
ctx.AddExtension("SPV_KHR_physical_storage_buffer"); ctx.AddExtension("SPV_KHR_physical_storage_buffer");
} }
if (info.uses_shared && profile.supports_workgroup_explicit_memory_layout) {
ctx.AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
ctx.AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
ctx.AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
}
} }
void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) { void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {

View File

@ -1,6 +1,8 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "common/div_ceil.h"
#include "shader_recompiler/backend/spirv/emit_spirv_bounds.h"
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h"
@ -15,42 +17,40 @@ std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value, Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
const Id shift_id{ctx.ConstU32(2U)}; const Id shift_id{ctx.ConstU32(2U)};
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
const Id pointer{
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)};
const auto [scope, semantics]{AtomicArgs(ctx)}; const auto [scope, semantics]{AtomicArgs(ctx)};
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
});
}
Id SharedAtomicU64(EmitContext& ctx, Id offset, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
const Id shift_id{ctx.ConstU32(3U)};
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
const Id pointer{
ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
const auto [scope, semantics]{AtomicArgs(ctx)};
return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value);
});
} }
Id SharedAtomicU32_IncDec(EmitContext& ctx, Id offset, Id SharedAtomicU32_IncDec(EmitContext& ctx, Id offset,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) { Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) {
const Id shift_id{ctx.ConstU32(2U)}; const Id shift_id{ctx.ConstU32(2U)};
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
const Id pointer{
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)};
const auto [scope, semantics]{AtomicArgs(ctx)}; const auto [scope, semantics]{AtomicArgs(ctx)};
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics); return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
} return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics);
});
Id BufferAtomicU32BoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto emit_func) {
if (Sirit::ValidId(buffer_size)) {
// Bounds checking enabled, wrap in a conditional branch to make sure that
// the atomic is not mistakenly executed when the index is out of bounds.
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer_size);
const Id ib_label = ctx.OpLabel();
const Id oob_label = ctx.OpLabel();
const Id end_label = ctx.OpLabel();
ctx.OpSelectionMerge(end_label, spv::SelectionControlMask::MaskNone);
ctx.OpBranchConditional(in_bounds, ib_label, oob_label);
ctx.AddLabel(ib_label);
const Id ib_result = emit_func();
ctx.OpBranch(end_label);
ctx.AddLabel(oob_label);
const Id oob_result = ctx.u32_zero_value;
ctx.OpBranch(end_label);
ctx.AddLabel(end_label);
return ctx.OpPhi(ctx.U32[1], ib_result, ib_label, oob_result, oob_label);
}
// Bounds checking not enabled, just perform the atomic operation.
return emit_func();
} }
Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value, Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
@ -63,11 +63,42 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32]; const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index); const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
const auto [scope, semantics]{AtomicArgs(ctx)}; const auto [scope, semantics]{AtomicArgs(ctx)};
return BufferAtomicU32BoundsCheck(ctx, index, buffer.size_dwords, [&] { return AccessBoundsCheck<32>(ctx, index, buffer.size_dwords, [&] {
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, value); return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, value);
}); });
} }
Id BufferAtomicU32CmpSwap(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
Id cmp_value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id, Id, Id)) {
const auto& buffer = ctx.buffers[handle];
if (Sirit::ValidId(buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
}
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
const auto [scope, semantics]{AtomicArgs(ctx)};
return AccessBoundsCheck<32>(ctx, index, buffer.size_dwords, [&] {
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, semantics, value, cmp_value);
});
}
Id BufferAtomicU64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
const auto& buffer = ctx.buffers[handle];
if (Sirit::ValidId(buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
}
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(3u));
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U64];
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
const auto [scope, semantics]{AtomicArgs(ctx)};
return AccessBoundsCheck<64>(ctx, index, buffer.size_qwords, [&] {
return (ctx.*atomic_func)(ctx.U64, ptr, scope, semantics, value);
});
}
Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value, Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
const auto& texture = ctx.images[handle & 0xFFFF]; const auto& texture = ctx.images[handle & 0xFFFF];
@ -89,6 +120,10 @@ Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicIAdd); return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicIAdd);
} }
Id EmitSharedAtomicIAdd64(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicIAdd);
}
Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) { Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax); return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax);
} }
@ -133,6 +168,10 @@ Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd); return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd);
} }
Id EmitBufferAtomicIAdd64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU64(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd);
}
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin); return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin);
} }
@ -175,6 +214,12 @@ Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicExchange); return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicExchange);
} }
Id EmitBufferAtomicCmpSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
Id cmp_value) {
return BufferAtomicU32CmpSwap(ctx, inst, handle, address, value, cmp_value,
&Sirit::Module::OpAtomicCompareExchange);
}
Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) { Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicIAdd); return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicIAdd);
} }

View File

@ -0,0 +1,48 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
namespace Shader::Backend::SPIRV {
template <u32 bit_size>
auto AccessBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto emit_func) {
Id zero_value{};
Id result_type{};
if constexpr (bit_size == 64) {
zero_value = ctx.u64_zero_value;
result_type = ctx.U64;
} else if constexpr (bit_size == 32) {
zero_value = ctx.u32_zero_value;
result_type = ctx.U32[1];
} else if constexpr (bit_size == 16) {
zero_value = ctx.u16_zero_value;
result_type = ctx.U16;
} else {
static_assert(false, "type not supported");
}
if (Sirit::ValidId(buffer_size)) {
// Bounds checking enabled, wrap in a conditional branch to make sure that
// the atomic is not mistakenly executed when the index is out of bounds.
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer_size);
const Id ib_label = ctx.OpLabel();
const Id end_label = ctx.OpLabel();
ctx.OpSelectionMerge(end_label, spv::SelectionControlMask::MaskNone);
ctx.OpBranchConditional(in_bounds, ib_label, end_label);
const auto last_label = ctx.last_label;
ctx.AddLabel(ib_label);
const auto ib_result = emit_func();
ctx.OpBranch(end_label);
ctx.AddLabel(end_label);
if (Sirit::ValidId(ib_result)) {
return ctx.OpPhi(result_type, ib_result, ib_label, zero_value, last_label);
} else {
return Id{0};
}
}
// Bounds checking not enabled, just perform the atomic operation.
return emit_func();
}
} // namespace Shader::Backend::SPIRV

View File

@ -86,6 +86,7 @@ void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicIAdd64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
@ -96,6 +97,8 @@ Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicCmpSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
Id cmp_value);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index); Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
@ -118,11 +121,14 @@ Id EmitUndefU8(EmitContext& ctx);
Id EmitUndefU16(EmitContext& ctx); Id EmitUndefU16(EmitContext& ctx);
Id EmitUndefU32(EmitContext& ctx); Id EmitUndefU32(EmitContext& ctx);
Id EmitUndefU64(EmitContext& ctx); Id EmitUndefU64(EmitContext& ctx);
Id EmitLoadSharedU16(EmitContext& ctx, Id offset);
Id EmitLoadSharedU32(EmitContext& ctx, Id offset); Id EmitLoadSharedU32(EmitContext& ctx, Id offset);
Id EmitLoadSharedU64(EmitContext& ctx, Id offset); Id EmitLoadSharedU64(EmitContext& ctx, Id offset);
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value);
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value);
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicIAdd64(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value);
@ -372,6 +378,7 @@ Id EmitBitCount64(EmitContext& ctx, Id value);
Id EmitBitwiseNot32(EmitContext& ctx, Id value); Id EmitBitwiseNot32(EmitContext& ctx, Id value);
Id EmitFindSMsb32(EmitContext& ctx, Id value); Id EmitFindSMsb32(EmitContext& ctx, Id value);
Id EmitFindUMsb32(EmitContext& ctx, Id value); Id EmitFindUMsb32(EmitContext& ctx, Id value);
Id EmitFindUMsb64(EmitContext& ctx, Id value);
Id EmitFindILsb32(EmitContext& ctx, Id value); Id EmitFindILsb32(EmitContext& ctx, Id value);
Id EmitFindILsb64(EmitContext& ctx, Id value); Id EmitFindILsb64(EmitContext& ctx, Id value);
Id EmitSMin32(EmitContext& ctx, Id a, Id b); Id EmitSMin32(EmitContext& ctx, Id a, Id b);

View File

@ -229,6 +229,20 @@ Id EmitFindUMsb32(EmitContext& ctx, Id value) {
return ctx.OpFindUMsb(ctx.U32[1], value); return ctx.OpFindUMsb(ctx.U32[1], value);
} }
Id EmitFindUMsb64(EmitContext& ctx, Id value) {
// Vulkan restricts some bitwise operations to 32-bit only, so decompose into
// two 32-bit values and select the correct result.
const Id unpacked{ctx.OpBitcast(ctx.U32[2], value)};
const Id hi{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 1U)};
const Id lo{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 0U)};
const Id hi_msb{ctx.OpFindUMsb(ctx.U32[1], hi)};
const Id lo_msb{ctx.OpFindUMsb(ctx.U32[1], lo)};
const Id found_hi{ctx.OpINotEqual(ctx.U1[1], hi_msb, ctx.ConstU32(u32(-1)))};
const Id shifted_hi{ctx.OpIAdd(ctx.U32[1], hi_msb, ctx.ConstU32(32u))};
// value == 0 case is checked in IREmitter
return ctx.OpSelect(ctx.U32[1], found_hi, shifted_hi, lo_msb);
}
Id EmitFindILsb32(EmitContext& ctx, Id value) { Id EmitFindILsb32(EmitContext& ctx, Id value) {
return ctx.OpFindILsb(ctx.U32[1], value); return ctx.OpFindILsb(ctx.U32[1], value);
} }

View File

@ -1,43 +1,86 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "common/div_ceil.h"
#include "shader_recompiler/backend/spirv/emit_spirv_bounds.h"
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h"
namespace Shader::Backend::SPIRV { namespace Shader::Backend::SPIRV {
Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
const Id shift_id{ctx.ConstU32(1U)};
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 2u)};
return AccessBoundsCheck<16>(ctx, index, ctx.ConstU32(num_elements), [&] {
const Id pointer =
ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index);
return ctx.OpLoad(ctx.U16, pointer);
});
}
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) { Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
const Id shift_id{ctx.ConstU32(2U)}; const Id shift_id{ctx.ConstU32(2U)};
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index); const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
return ctx.OpLoad(ctx.U32[1], pointer);
return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
const Id pointer =
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index);
return ctx.OpLoad(ctx.U32[1], pointer);
});
} }
Id EmitLoadSharedU64(EmitContext& ctx, Id offset) { Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
const Id shift_id{ctx.ConstU32(2U)}; const Id shift_id{ctx.ConstU32(3U)};
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))}; const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)}; return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), const Id pointer{
ctx.OpLoad(ctx.U32[1], rhs_pointer)); ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
return ctx.OpLoad(ctx.U64, pointer);
});
}
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
const Id shift{ctx.ConstU32(1U)};
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift)};
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 2u)};
AccessBoundsCheck<16>(ctx, index, ctx.ConstU32(num_elements), [&] {
const Id pointer =
ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index);
ctx.OpStore(pointer, value);
return Id{0};
});
} }
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) { void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
const Id shift{ctx.ConstU32(2U)}; const Id shift{ctx.ConstU32(2U)};
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift)};
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset); const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
ctx.OpStore(pointer, value);
AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
const Id pointer =
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index);
ctx.OpStore(pointer, value);
return Id{0};
});
} }
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
const Id shift{ctx.ConstU32(2U)}; const Id shift{ctx.ConstU32(3U)};
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift)};
const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.ConstU32(1U))}; const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)}; AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); const Id pointer{
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
ctx.OpStore(pointer, value);
return Id{0};
});
} }
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View File

@ -146,6 +146,7 @@ void EmitContext::DefineArithmeticTypes() {
false_value = ConstantFalse(U1[1]); false_value = ConstantFalse(U1[1]);
u8_one_value = Constant(U8, 1U); u8_one_value = Constant(U8, 1U);
u8_zero_value = Constant(U8, 0U); u8_zero_value = Constant(U8, 0U);
u16_zero_value = Constant(U16, 0U);
u32_one_value = ConstU32(1U); u32_one_value = ConstU32(1U);
u32_zero_value = ConstU32(0U); u32_zero_value = ConstU32(0U);
f32_zero_value = ConstF32(0.0f); f32_zero_value = ConstF32(0.0f);
@ -285,6 +286,8 @@ void EmitContext::DefineBufferProperties() {
Name(buffer.size_shorts, fmt::format("buf{}_short_size", binding)); Name(buffer.size_shorts, fmt::format("buf{}_short_size", binding));
buffer.size_dwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(2U)); buffer.size_dwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(2U));
Name(buffer.size_dwords, fmt::format("buf{}_dword_size", binding)); Name(buffer.size_dwords, fmt::format("buf{}_dword_size", binding));
buffer.size_qwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(3U));
Name(buffer.size_qwords, fmt::format("buf{}_qword_size", binding));
} }
} }
} }
@ -307,7 +310,9 @@ void EmitContext::DefineInterpolatedAttribs() {
const Id p2{OpCompositeExtract(F32[4], p_array, 2U)}; const Id p2{OpCompositeExtract(F32[4], p_array, 2U)};
const Id p10{OpFSub(F32[4], p1, p0)}; const Id p10{OpFSub(F32[4], p1, p0)};
const Id p20{OpFSub(F32[4], p2, p0)}; const Id p20{OpFSub(F32[4], p2, p0)};
const Id bary_coord{OpLoad(F32[3], gl_bary_coord_id)}; const Id bary_coord{OpLoad(F32[3], IsLinear(info.interp_qualifiers[i])
? bary_coord_linear_id
: bary_coord_persp_id)};
const Id bary_coord_y{OpCompositeExtract(F32[1], bary_coord, 1)}; const Id bary_coord_y{OpCompositeExtract(F32[1], bary_coord, 1)};
const Id bary_coord_z{OpCompositeExtract(F32[1], bary_coord, 2)}; const Id bary_coord_z{OpCompositeExtract(F32[1], bary_coord, 2)};
const Id p10_y{OpVectorTimesScalar(F32[4], p10, bary_coord_y)}; const Id p10_y{OpVectorTimesScalar(F32[4], p10, bary_coord_y)};
@ -411,8 +416,14 @@ void EmitContext::DefineInputs() {
DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input); DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
} }
if (profile.needs_manual_interpolation) { if (profile.needs_manual_interpolation) {
gl_bary_coord_id = if (info.has_perspective_interp) {
DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input); bary_coord_persp_id =
DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input);
}
if (info.has_linear_interp) {
bary_coord_linear_id = DefineVariable(F32[3], spv::BuiltIn::BaryCoordNoPerspKHR,
spv::StorageClass::Input);
}
} }
for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) { for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
const auto& input = runtime_info.fs_info.inputs[i]; const auto& input = runtime_info.fs_info.inputs[i];
@ -435,9 +446,12 @@ void EmitContext::DefineInputs() {
} else { } else {
attr_id = DefineInput(type, semantic); attr_id = DefineInput(type, semantic);
Name(attr_id, fmt::format("fs_in_attr{}", semantic)); Name(attr_id, fmt::format("fs_in_attr{}", semantic));
}
if (input.is_flat) { if (input.is_flat) {
Decorate(attr_id, spv::Decoration::Flat); Decorate(attr_id, spv::Decoration::Flat);
} else if (IsLinear(info.interp_qualifiers[i])) {
Decorate(attr_id, spv::Decoration::NoPerspective);
}
} }
input_params[semantic] = input_params[semantic] =
GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false); GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false);
@ -634,7 +648,8 @@ void EmitContext::DefineOutputs() {
} }
break; break;
} }
case LogicalStage::Fragment: case LogicalStage::Fragment: {
u32 num_render_targets = 0;
for (u32 i = 0; i < IR::NumRenderTargets; i++) { for (u32 i = 0; i < IR::NumRenderTargets; i++) {
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i}; const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
if (!info.stores.GetAny(mrt)) { if (!info.stores.GetAny(mrt)) {
@ -643,11 +658,21 @@ void EmitContext::DefineOutputs() {
const u32 num_components = info.stores.NumComponents(mrt); const u32 num_components = info.stores.NumComponents(mrt);
const AmdGpu::NumberFormat num_format{runtime_info.fs_info.color_buffers[i].num_format}; const AmdGpu::NumberFormat num_format{runtime_info.fs_info.color_buffers[i].num_format};
const Id type{GetAttributeType(*this, num_format)[num_components]}; const Id type{GetAttributeType(*this, num_format)[num_components]};
const Id id{DefineOutput(type, i)}; Id id;
if (runtime_info.fs_info.dual_source_blending) {
id = DefineOutput(type, 0);
Decorate(id, spv::Decoration::Index, i);
} else {
id = DefineOutput(type, i);
}
Name(id, fmt::format("frag_color{}", i)); Name(id, fmt::format("frag_color{}", i));
frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true); frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true);
++num_render_targets;
} }
ASSERT_MSG(!runtime_info.fs_info.dual_source_blending || num_render_targets == 2,
"Dual source blending enabled, there must be exactly two MRT exports");
break; break;
}
case LogicalStage::Geometry: { case LogicalStage::Geometry: {
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
@ -957,13 +982,27 @@ void EmitContext::DefineSharedMemory() {
} }
ASSERT(info.stage == Stage::Compute); ASSERT(info.stage == Stage::Compute);
const u32 shared_memory_size = runtime_info.cs_info.shared_memory_size; const u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
const u32 num_elements{Common::DivCeil(shared_memory_size, 4U)};
const Id type{TypeArray(U32[1], ConstU32(num_elements))}; const auto make_type = [&](Id element_type, u32 element_size) {
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type); const u32 num_elements{Common::DivCeil(shared_memory_size, element_size)};
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); const Id array_type{TypeArray(element_type, ConstU32(num_elements))};
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup); Decorate(array_type, spv::Decoration::ArrayStride, element_size);
Name(shared_memory_u32, "shared_mem");
interfaces.push_back(shared_memory_u32); const Id struct_type{TypeStruct(array_type)};
MemberDecorate(struct_type, 0u, spv::Decoration::Offset, 0u);
Decorate(struct_type, spv::Decoration::Block);
const Id pointer = TypePointer(spv::StorageClass::Workgroup, struct_type);
const Id element_pointer = TypePointer(spv::StorageClass::Workgroup, element_type);
const Id variable = AddGlobalVariable(pointer, spv::StorageClass::Workgroup);
Decorate(variable, spv::Decoration::Aliased);
interfaces.push_back(variable);
return std::make_tuple(variable, element_pointer, pointer);
};
std::tie(shared_memory_u16, shared_u16, shared_memory_u16_type) = make_type(U16, 2u);
std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make_type(U32[1], 4u);
std::tie(shared_memory_u64, shared_u64, shared_memory_u64_type) = make_type(U64, 8u);
} }
Id EmitContext::DefineFloat32ToUfloatM5(u32 mantissa_bits, const std::string_view name) { Id EmitContext::DefineFloat32ToUfloatM5(u32 mantissa_bits, const std::string_view name) {

View File

@ -235,17 +235,16 @@ public:
Id false_value{}; Id false_value{};
Id u8_one_value{}; Id u8_one_value{};
Id u8_zero_value{}; Id u8_zero_value{};
Id u16_zero_value{};
Id u32_one_value{}; Id u32_one_value{};
Id u32_zero_value{}; Id u32_zero_value{};
Id f32_zero_value{}; Id f32_zero_value{};
Id u64_one_value{}; Id u64_one_value{};
Id u64_zero_value{}; Id u64_zero_value{};
Id shared_u8{};
Id shared_u16{}; Id shared_u16{};
Id shared_u32{}; Id shared_u32{};
Id shared_u32x2{}; Id shared_u64{};
Id shared_u32x4{};
Id input_u32{}; Id input_u32{};
Id input_f32{}; Id input_f32{};
@ -285,16 +284,16 @@ public:
Id image_u32{}; Id image_u32{};
Id image_f32{}; Id image_f32{};
Id shared_memory_u8{};
Id shared_memory_u16{}; Id shared_memory_u16{};
Id shared_memory_u32{}; Id shared_memory_u32{};
Id shared_memory_u32x2{}; Id shared_memory_u64{};
Id shared_memory_u32x4{};
Id shared_memory_u16_type{};
Id shared_memory_u32_type{}; Id shared_memory_u32_type{};
Id shared_memory_u64_type{};
Id interpolate_func{}; Id bary_coord_persp_id{};
Id gl_bary_coord_id{}; Id bary_coord_linear_id{};
struct TextureDefinition { struct TextureDefinition {
const VectorIds* data_types; const VectorIds* data_types;
@ -320,6 +319,7 @@ public:
Id size; Id size;
Id size_shorts; Id size_shorts;
Id size_dwords; Id size_dwords;
Id size_qwords;
std::array<BufferSpv, u32(PointerType::NumAlias)> aliases; std::array<BufferSpv, u32(PointerType::NumAlias)> aliases;
const BufferSpv& operator[](PointerType alias) const { const BufferSpv& operator[](PointerType alias) const {

View File

@ -67,6 +67,9 @@ CopyShaderData ParseCopyShader(std::span<const u32> code) {
if (last_attr != IR::Attribute::Position0) { if (last_attr != IR::Attribute::Position0) {
data.num_attrs = static_cast<u32>(last_attr) - static_cast<u32>(IR::Attribute::Param0) + 1; data.num_attrs = static_cast<u32>(last_attr) - static_cast<u32>(IR::Attribute::Param0) + 1;
const auto it = data.attr_map.begin();
const u32 comp_stride = std::next(it)->first - it->first;
data.output_vertices = comp_stride / 64;
} }
return data; return data;

View File

@ -3,8 +3,8 @@
#pragma once #pragma once
#include <map>
#include <span> #include <span>
#include <unordered_map>
#include "common/types.h" #include "common/types.h"
#include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/attribute.h"
@ -12,8 +12,9 @@
namespace Shader { namespace Shader {
struct CopyShaderData { struct CopyShaderData {
std::unordered_map<u32, std::pair<Shader::IR::Attribute, u32>> attr_map; std::map<u32, std::pair<Shader::IR::Attribute, u32>> attr_map;
u32 num_attrs{0}; u32 num_attrs{0};
u32 output_vertices{0};
}; };
CopyShaderData ParseCopyShader(std::span<const u32> code); CopyShaderData ParseCopyShader(std::span<const u32> code);

View File

@ -605,11 +605,12 @@ public:
Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_) Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_)
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_},
syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_}, syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_},
runtime_info{runtime_info_}, profile{profile_} { runtime_info{runtime_info_}, profile{profile_},
translator{info_, runtime_info_, profile_} {
Visit(root_stmt, nullptr, nullptr); Visit(root_stmt, nullptr, nullptr);
IR::Block& first_block{*syntax_list.front().data.block}; IR::Block* first_block = syntax_list.front().data.block;
Translator{&first_block, info, runtime_info, profile}.EmitPrologue(); translator.EmitPrologue(first_block);
} }
private: private:
@ -637,8 +638,8 @@ private:
current_block->has_multiple_predecessors = stmt.block->num_predecessors > 1; current_block->has_multiple_predecessors = stmt.block->num_predecessors > 1;
const u32 start = stmt.block->begin_index; const u32 start = stmt.block->begin_index;
const u32 size = stmt.block->end_index - start + 1; const u32 size = stmt.block->end_index - start + 1;
Translate(current_block, stmt.block->begin, inst_list.subspan(start, size), translator.Translate(current_block, stmt.block->begin,
info, runtime_info, profile); inst_list.subspan(start, size));
} }
break; break;
} }
@ -820,6 +821,7 @@ private:
Info& info; Info& info;
const RuntimeInfo& runtime_info; const RuntimeInfo& runtime_info;
const Profile& profile; const Profile& profile;
Translator translator;
}; };
} // Anonymous namespace } // Anonymous namespace

View File

@ -13,6 +13,8 @@ void Translator::EmitDataShare(const GcnInst& inst) {
// DS // DS
case Opcode::DS_ADD_U32: case Opcode::DS_ADD_U32:
return DS_ADD_U32(inst, false); return DS_ADD_U32(inst, false);
case Opcode::DS_ADD_U64:
return DS_ADD_U64(inst, false);
case Opcode::DS_SUB_U32: case Opcode::DS_SUB_U32:
return DS_SUB_U32(inst, false); return DS_SUB_U32(inst, false);
case Opcode::DS_INC_U32: case Opcode::DS_INC_U32:
@ -61,10 +63,14 @@ void Translator::EmitDataShare(const GcnInst& inst) {
return DS_READ(32, false, true, false, inst); return DS_READ(32, false, true, false, inst);
case Opcode::DS_READ2ST64_B32: case Opcode::DS_READ2ST64_B32:
return DS_READ(32, false, true, true, inst); return DS_READ(32, false, true, true, inst);
case Opcode::DS_READ_U16:
return DS_READ(16, false, false, false, inst);
case Opcode::DS_CONSUME: case Opcode::DS_CONSUME:
return DS_CONSUME(inst); return DS_CONSUME(inst);
case Opcode::DS_APPEND: case Opcode::DS_APPEND:
return DS_APPEND(inst); return DS_APPEND(inst);
case Opcode::DS_WRITE_B16:
return DS_WRITE(16, false, false, false, inst);
case Opcode::DS_WRITE_B64: case Opcode::DS_WRITE_B64:
return DS_WRITE(64, false, false, false, inst); return DS_WRITE(64, false, false, false, inst);
case Opcode::DS_WRITE2_B64: case Opcode::DS_WRITE2_B64:
@ -123,6 +129,18 @@ void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) {
} }
} }
void Translator::DS_ADD_U64(const GcnInst& inst, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U64 data{GetSrc64(inst.src[1])};
const IR::U32 offset =
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
if (rtn) {
SetDst64(inst.dst[0], IR::U64{original_val});
}
}
void Translator::DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn) { void Translator::DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])}; const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 data{GetSrc(inst.src[1])}; const IR::U32 data{GetSrc(inst.src[1])};
@ -201,23 +219,28 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid
if (bit_size == 32) { if (bit_size == 32) {
ir.WriteShared(32, ir.GetVectorReg(data0), addr0); ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
} else { } else {
ir.WriteShared( ir.WriteShared(64,
64, ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)), ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data0),
addr0); ir.GetVectorReg(data0 + 1))),
addr0);
} }
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj))); const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
if (bit_size == 32) { if (bit_size == 32) {
ir.WriteShared(32, ir.GetVectorReg(data1), addr1); ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
} else { } else {
ir.WriteShared( ir.WriteShared(64,
64, ir.CompositeConstruct(ir.GetVectorReg(data1), ir.GetVectorReg(data1 + 1)), ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data1),
addr1); ir.GetVectorReg(data1 + 1))),
addr1);
} }
} else if (bit_size == 64) { } else if (bit_size == 64) {
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset)); const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
const IR::Value data = const IR::Value data =
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)); ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
ir.WriteShared(bit_size, data, addr0); ir.WriteShared(bit_size, ir.PackUint2x32(data), addr0);
} else if (bit_size == 16) {
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
} else { } else {
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset)); const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0); ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
@ -289,22 +312,29 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride
if (bit_size == 32) { if (bit_size == 32) {
ir.SetVectorReg(dst_reg++, IR::U32{data0}); ir.SetVectorReg(dst_reg++, IR::U32{data0});
} else { } else {
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 0)}); const auto vector = ir.UnpackUint2x32(IR::U64{data0});
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 1)}); ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)});
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 1)});
} }
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj))); const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1); const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1);
if (bit_size == 32) { if (bit_size == 32) {
ir.SetVectorReg(dst_reg++, IR::U32{data1}); ir.SetVectorReg(dst_reg++, IR::U32{data1});
} else { } else {
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 0)}); const auto vector = ir.UnpackUint2x32(IR::U64{data1});
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 1)}); ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)});
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 1)});
} }
} else if (bit_size == 64) { } else if (bit_size == 64) {
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset)); const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0); const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0);
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)}); const auto vector = ir.UnpackUint2x32(IR::U64{data});
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(data, 1)}); ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(vector, 0)});
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(vector, 1)});
} else if (bit_size == 16) {
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
const IR::U16 data = IR::U16{ir.LoadShared(bit_size, is_signed, addr0)};
ir.SetVectorReg(dst_reg, ir.UConvert(32, data));
} else { } else {
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset)); const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr0)}; const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr0)};

View File

@ -26,8 +26,11 @@ void Translator::ExportMrtValue(IR::Attribute attribute, u32 comp, const IR::F32
} }
void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value) { void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value) {
const u32 color_buffer_idx = u32 color_buffer_idx =
static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0); static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0);
if (runtime_info.fs_info.dual_source_blending && attribute == IR::Attribute::RenderTarget1) {
color_buffer_idx = 0;
}
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx]; const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
AmdGpu::NumberFormat num_format; AmdGpu::NumberFormat num_format;
@ -68,8 +71,11 @@ void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR:
} }
void Translator::ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) { void Translator::ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) {
const u32 color_buffer_idx = u32 color_buffer_idx =
static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0); static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0);
if (runtime_info.fs_info.dual_source_blending && attribute == IR::Attribute::RenderTarget1) {
color_buffer_idx = 0;
}
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx]; const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
const auto swizzled_comp = SwizzleMrtComponent(color_buffer, comp); const auto swizzled_comp = SwizzleMrtComponent(color_buffer, comp);

View File

@ -114,6 +114,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
return S_FF1_I32_B64(inst); return S_FF1_I32_B64(inst);
case Opcode::S_FLBIT_I32_B32: case Opcode::S_FLBIT_I32_B32:
return S_FLBIT_I32_B32(inst); return S_FLBIT_I32_B32(inst);
case Opcode::S_FLBIT_I32_B64:
return S_FLBIT_I32_B64(inst);
case Opcode::S_BITSET0_B32: case Opcode::S_BITSET0_B32:
return S_BITSET_B32(inst, 0); return S_BITSET_B32(inst, 0);
case Opcode::S_BITSET1_B32: case Opcode::S_BITSET1_B32:
@ -686,6 +688,17 @@ void Translator::S_FLBIT_I32_B32(const GcnInst& inst) {
SetDst(inst.dst[0], IR::U32{ir.Select(cond, pos_from_left, ir.Imm32(~0U))}); SetDst(inst.dst[0], IR::U32{ir.Select(cond, pos_from_left, ir.Imm32(~0U))});
} }
void Translator::S_FLBIT_I32_B64(const GcnInst& inst) {
const IR::U64 src0{GetSrc64(inst.src[0])};
// Gcn wants the MSB position counting from the left, but SPIR-V counts from the rightmost (LSB)
// position
const IR::U32 msb_pos = ir.FindUMsb(src0);
const IR::U32 pos_from_left = ir.ISub(ir.Imm32(63), msb_pos);
// Select 0xFFFFFFFF if src0 was 0
const IR::U1 cond = ir.INotEqual(src0, ir.Imm64(u64(0u)));
SetDst(inst.dst[0], IR::U32{ir.Select(cond, pos_from_left, ir.Imm32(~0U))});
}
void Translator::S_BITSET_B32(const GcnInst& inst, u32 bit_value) { void Translator::S_BITSET_B32(const GcnInst& inst, u32 bit_value) {
const IR::U32 old_value{GetSrc(inst.dst[0])}; const IR::U32 old_value{GetSrc(inst.dst[0])};
const IR::U32 offset{ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0U), ir.Imm32(5U))}; const IR::U32 offset{ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0U), ir.Imm32(5U))};

View File

@ -21,16 +21,60 @@
namespace Shader::Gcn { namespace Shader::Gcn {
static u32 next_vgpr_num; Translator::Translator(Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_)
static std::unordered_map<u32, IR::VectorReg> vgpr_map; : info{info_}, runtime_info{runtime_info_}, profile{profile_},
next_vgpr_num{runtime_info.num_allocated_vgprs} {
Translator::Translator(IR::Block* block_, Info& info_, const RuntimeInfo& runtime_info_, if (info.l_stage == LogicalStage::Fragment) {
const Profile& profile_) dst_frag_vreg = GatherInterpQualifiers();
: ir{*block_, block_->begin()}, info{info_}, runtime_info{runtime_info_}, profile{profile_} { }
next_vgpr_num = vgpr_map.empty() ? runtime_info.num_allocated_vgprs : next_vgpr_num;
} }
void Translator::EmitPrologue() { IR::VectorReg Translator::GatherInterpQualifiers() {
u32 dst_vreg{};
if (runtime_info.fs_info.addr_flags.persp_sample_ena) {
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveSample; // I
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveSample; // J
info.has_perspective_interp = true;
}
if (runtime_info.fs_info.addr_flags.persp_center_ena) {
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCenter; // I
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCenter; // J
info.has_perspective_interp = true;
}
if (runtime_info.fs_info.addr_flags.persp_centroid_ena) {
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCentroid; // I
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCentroid; // J
info.has_perspective_interp = true;
}
if (runtime_info.fs_info.addr_flags.persp_pull_model_ena) {
++dst_vreg; // I/W
++dst_vreg; // J/W
++dst_vreg; // 1/W
}
if (runtime_info.fs_info.addr_flags.linear_sample_ena) {
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearSample; // I
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearSample; // J
info.has_linear_interp = true;
}
if (runtime_info.fs_info.addr_flags.linear_center_ena) {
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCenter; // I
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCenter; // J
info.has_linear_interp = true;
}
if (runtime_info.fs_info.addr_flags.linear_centroid_ena) {
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCentroid; // I
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCentroid; // J
info.has_linear_interp = true;
}
if (runtime_info.fs_info.addr_flags.line_stipple_tex_ena) {
++dst_vreg;
}
return IR::VectorReg(dst_vreg);
}
void Translator::EmitPrologue(IR::Block* first_block) {
ir = IR::IREmitter(*first_block, first_block->begin());
ir.Prologue(); ir.Prologue();
ir.SetExec(ir.Imm1(true)); ir.SetExec(ir.Imm1(true));
@ -60,39 +104,7 @@ void Translator::EmitPrologue() {
} }
break; break;
case LogicalStage::Fragment: case LogicalStage::Fragment:
dst_vreg = IR::VectorReg::V0; dst_vreg = dst_frag_vreg;
if (runtime_info.fs_info.addr_flags.persp_sample_ena) {
++dst_vreg; // I
++dst_vreg; // J
}
if (runtime_info.fs_info.addr_flags.persp_center_ena) {
++dst_vreg; // I
++dst_vreg; // J
}
if (runtime_info.fs_info.addr_flags.persp_centroid_ena) {
++dst_vreg; // I
++dst_vreg; // J
}
if (runtime_info.fs_info.addr_flags.persp_pull_model_ena) {
++dst_vreg; // I/W
++dst_vreg; // J/W
++dst_vreg; // 1/W
}
if (runtime_info.fs_info.addr_flags.linear_sample_ena) {
++dst_vreg; // I
++dst_vreg; // J
}
if (runtime_info.fs_info.addr_flags.linear_center_ena) {
++dst_vreg; // I
++dst_vreg; // J
}
if (runtime_info.fs_info.addr_flags.linear_centroid_ena) {
++dst_vreg; // I
++dst_vreg; // J
}
if (runtime_info.fs_info.addr_flags.line_stipple_tex_ena) {
++dst_vreg;
}
if (runtime_info.fs_info.addr_flags.pos_x_float_ena) { if (runtime_info.fs_info.addr_flags.pos_x_float_ena) {
if (runtime_info.fs_info.en_flags.pos_x_float_ena) { if (runtime_info.fs_info.en_flags.pos_x_float_ena) {
ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 0)); ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 0));
@ -543,6 +555,26 @@ void Translator::LogMissingOpcode(const GcnInst& inst) {
info.translation_failed = true; info.translation_failed = true;
} }
void Translator::Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list) {
if (inst_list.empty()) {
return;
}
ir = IR::IREmitter{*block, block->begin()};
for (const auto& inst : inst_list) {
pc += inst.length;
// Special case for emitting fetch shader.
if (inst.opcode == Opcode::S_SWAPPC_B64) {
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export ||
info.stage == Stage::Local);
EmitFetch(inst);
continue;
}
TranslateInstruction(inst, pc);
}
}
void Translator::TranslateInstruction(const GcnInst& inst, const u32 pc) { void Translator::TranslateInstruction(const GcnInst& inst, const u32 pc) {
// Emit instructions for each category. // Emit instructions for each category.
switch (inst.category) { switch (inst.category) {
@ -577,25 +609,4 @@ void Translator::TranslateInstruction(const GcnInst& inst, const u32 pc) {
} }
} }
void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Info& info,
const RuntimeInfo& runtime_info, const Profile& profile) {
if (inst_list.empty()) {
return;
}
Translator translator{block, info, runtime_info, profile};
for (const auto& inst : inst_list) {
pc += inst.length;
// Special case for emitting fetch shader.
if (inst.opcode == Opcode::S_SWAPPC_B64) {
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export ||
info.stage == Stage::Local);
translator.EmitFetch(inst);
continue;
}
translator.TranslateInstruction(inst, pc);
}
}
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -4,6 +4,7 @@
#pragma once #pragma once
#include <span> #include <span>
#include <unordered_map>
#include "shader_recompiler/frontend/instruction.h" #include "shader_recompiler/frontend/instruction.h"
#include "shader_recompiler/info.h" #include "shader_recompiler/info.h"
#include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/basic_block.h"
@ -53,15 +54,17 @@ enum class NegateMode : u32 {
Result, Result,
}; };
static constexpr size_t MaxInterpVgpr = 16;
class Translator { class Translator {
public: public:
explicit Translator(IR::Block* block_, Info& info, const RuntimeInfo& runtime_info, explicit Translator(Info& info, const RuntimeInfo& runtime_info, const Profile& profile);
const Profile& profile);
void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list);
void TranslateInstruction(const GcnInst& inst, u32 pc); void TranslateInstruction(const GcnInst& inst, u32 pc);
// Instruction categories // Instruction categories
void EmitPrologue(); void EmitPrologue(IR::Block* first_block);
void EmitFetch(const GcnInst& inst); void EmitFetch(const GcnInst& inst);
void EmitExport(const GcnInst& inst); void EmitExport(const GcnInst& inst);
void EmitFlowControl(u32 pc, const GcnInst& inst); void EmitFlowControl(u32 pc, const GcnInst& inst);
@ -121,6 +124,7 @@ public:
void S_FF1_I32_B32(const GcnInst& inst); void S_FF1_I32_B32(const GcnInst& inst);
void S_FF1_I32_B64(const GcnInst& inst); void S_FF1_I32_B64(const GcnInst& inst);
void S_FLBIT_I32_B32(const GcnInst& inst); void S_FLBIT_I32_B32(const GcnInst& inst);
void S_FLBIT_I32_B64(const GcnInst& inst);
void S_BITSET_B32(const GcnInst& inst, u32 bit_value); void S_BITSET_B32(const GcnInst& inst, u32 bit_value);
void S_GETPC_B64(u32 pc, const GcnInst& inst); void S_GETPC_B64(u32 pc, const GcnInst& inst);
void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst); void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst);
@ -204,6 +208,7 @@ public:
void V_EXP_F32(const GcnInst& inst); void V_EXP_F32(const GcnInst& inst);
void V_LOG_F32(const GcnInst& inst); void V_LOG_F32(const GcnInst& inst);
void V_RCP_F32(const GcnInst& inst); void V_RCP_F32(const GcnInst& inst);
void V_RCP_LEGACY_F32(const GcnInst& inst);
void V_RCP_F64(const GcnInst& inst); void V_RCP_F64(const GcnInst& inst);
void V_RSQ_F32(const GcnInst& inst); void V_RSQ_F32(const GcnInst& inst);
void V_SQRT_F32(const GcnInst& inst); void V_SQRT_F32(const GcnInst& inst);
@ -266,6 +271,7 @@ public:
// Data share // Data share
// DS // DS
void DS_ADD_U32(const GcnInst& inst, bool rtn); void DS_ADD_U32(const GcnInst& inst, bool rtn);
void DS_ADD_U64(const GcnInst& inst, bool rtn);
void DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn); void DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn);
void DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn); void DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn);
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst); void DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
@ -324,16 +330,18 @@ private:
void LogMissingOpcode(const GcnInst& inst); void LogMissingOpcode(const GcnInst& inst);
IR::VectorReg GetScratchVgpr(u32 offset); IR::VectorReg GetScratchVgpr(u32 offset);
IR::VectorReg GatherInterpQualifiers();
private: private:
IR::IREmitter ir; IR::IREmitter ir;
Info& info; Info& info;
const RuntimeInfo& runtime_info; const RuntimeInfo& runtime_info;
const Profile& profile; const Profile& profile;
u32 next_vgpr_num;
std::unordered_map<u32, IR::VectorReg> vgpr_map;
std::array<IR::Interpolation, MaxInterpVgpr> vgpr_to_interp{};
IR::VectorReg dst_frag_vreg{};
bool opcode_missing = false; bool opcode_missing = false;
}; };
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info,
const RuntimeInfo& runtime_info, const Profile& profile);
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -158,6 +158,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
return V_LOG_F32(inst); return V_LOG_F32(inst);
case Opcode::V_RCP_F32: case Opcode::V_RCP_F32:
return V_RCP_F32(inst); return V_RCP_F32(inst);
case Opcode::V_RCP_LEGACY_F32:
return V_RCP_LEGACY_F32(inst);
case Opcode::V_RCP_F64: case Opcode::V_RCP_F64:
return V_RCP_F64(inst); return V_RCP_F64(inst);
case Opcode::V_RCP_IFLAG_F32: case Opcode::V_RCP_IFLAG_F32:
@ -798,6 +800,20 @@ void Translator::V_RCP_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPRecip(src0)); SetDst(inst.dst[0], ir.FPRecip(src0));
} }
void Translator::V_RCP_LEGACY_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const auto result = ir.FPRecip(src0);
const auto inf = ir.FPIsInf(result);
const auto raw_result = ir.ConvertFToU(32, result);
const auto sign_bit = ir.ShiftRightLogical(raw_result, ir.Imm32(31u));
const auto sign_bit_set = ir.INotEqual(sign_bit, ir.Imm32(0u));
const IR::F32 inf_result{ir.Select(sign_bit_set, ir.Imm32(-0.0f), ir.Imm32(0.0f))};
const IR::F32 val{ir.Select(inf, inf_result, result)};
SetDst(inst.dst[0], val);
}
void Translator::V_RCP_F64(const GcnInst& inst) { void Translator::V_RCP_F64(const GcnInst& inst) {
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])}; const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
SetDst64(inst.dst[0], ir.FPRecip(src0)); SetDst64(inst.dst[0], ir.FPRecip(src0));

View File

@ -22,13 +22,14 @@ void Translator::EmitVectorInterpolation(const GcnInst& inst) {
// VINTRP // VINTRP
void Translator::V_INTERP_P2_F32(const GcnInst& inst) { void Translator::V_INTERP_P2_F32(const GcnInst& inst) {
auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr); const auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr);
info.interp_qualifiers[attr.param_index] = vgpr_to_interp[inst.src[0].code];
const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index}; const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index};
SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan)); SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan));
} }
void Translator::V_INTERP_MOV_F32(const GcnInst& inst) { void Translator::V_INTERP_MOV_F32(const GcnInst& inst) {
auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr); const auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr);
const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index}; const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index};
SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan)); SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan));
} }

View File

@ -70,6 +70,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
return BUFFER_ATOMIC(AtomicOp::Add, inst); return BUFFER_ATOMIC(AtomicOp::Add, inst);
case Opcode::BUFFER_ATOMIC_SWAP: case Opcode::BUFFER_ATOMIC_SWAP:
return BUFFER_ATOMIC(AtomicOp::Swap, inst); return BUFFER_ATOMIC(AtomicOp::Swap, inst);
case Opcode::BUFFER_ATOMIC_CMPSWAP:
return BUFFER_ATOMIC(AtomicOp::CmpSwap, inst);
case Opcode::BUFFER_ATOMIC_SMIN: case Opcode::BUFFER_ATOMIC_SMIN:
return BUFFER_ATOMIC(AtomicOp::Smin, inst); return BUFFER_ATOMIC(AtomicOp::Smin, inst);
case Opcode::BUFFER_ATOMIC_UMIN: case Opcode::BUFFER_ATOMIC_UMIN:
@ -331,6 +333,10 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
switch (op) { switch (op) {
case AtomicOp::Swap: case AtomicOp::Swap:
return ir.BufferAtomicSwap(handle, address, vdata_val, buffer_info); return ir.BufferAtomicSwap(handle, address, vdata_val, buffer_info);
case AtomicOp::CmpSwap: {
const IR::Value cmp_val = ir.GetVectorReg(vdata + 1);
return ir.BufferAtomicCmpSwap(handle, address, vdata_val, cmp_val, buffer_info);
}
case AtomicOp::Add: case AtomicOp::Add:
return ir.BufferAtomicIAdd(handle, address, vdata_val, buffer_info); return ir.BufferAtomicIAdd(handle, address, vdata_val, buffer_info);
case AtomicOp::Smin: case AtomicOp::Smin:

View File

@ -193,6 +193,8 @@ struct Info {
PersistentSrtInfo srt_info; PersistentSrtInfo srt_info;
std::vector<u32> flattened_ud_buf; std::vector<u32> flattened_ud_buf;
std::array<IR::Interpolation, 32> interp_qualifiers{};
IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max; IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max;
s32 tess_consts_dword_offset = -1; s32 tess_consts_dword_offset = -1;
@ -206,6 +208,8 @@ struct Info {
bool has_discard{}; bool has_discard{};
bool has_image_gather{}; bool has_image_gather{};
bool has_image_query{}; bool has_image_query{};
bool has_perspective_interp{};
bool has_linear_interp{};
bool uses_atomic_float_min_max{}; bool uses_atomic_float_min_max{};
bool uses_lane_id{}; bool uses_lane_id{};
bool uses_group_quad{}; bool uses_group_quad{};

View File

@ -83,6 +83,16 @@ enum class Attribute : u64 {
Max, Max,
}; };
enum class Interpolation {
Invalid = 0,
PerspectiveSample = 1,
PerspectiveCenter = 2,
PerspectiveCentroid = 3,
LinearSample = 4,
LinearCenter = 5,
LinearCentroid = 6,
};
constexpr size_t NumAttributes = static_cast<size_t>(Attribute::Max); constexpr size_t NumAttributes = static_cast<size_t>(Attribute::Max);
constexpr size_t NumRenderTargets = 8; constexpr size_t NumRenderTargets = 8;
constexpr size_t NumParams = 32; constexpr size_t NumParams = 32;
@ -104,6 +114,15 @@ constexpr bool IsMrt(Attribute attribute) noexcept {
return attribute >= Attribute::RenderTarget0 && attribute <= Attribute::RenderTarget7; return attribute >= Attribute::RenderTarget0 && attribute <= Attribute::RenderTarget7;
} }
constexpr bool IsLinear(Interpolation interp) noexcept {
return interp >= Interpolation::LinearSample && interp <= Interpolation::LinearCentroid;
}
constexpr bool IsPerspective(Interpolation interp) noexcept {
return interp >= Interpolation::PerspectiveSample &&
interp <= Interpolation::PerspectiveCentroid;
}
[[nodiscard]] std::string NameOf(Attribute attribute); [[nodiscard]] std::string NameOf(Attribute attribute);
[[nodiscard]] constexpr Attribute operator+(Attribute attr, int num) { [[nodiscard]] constexpr Attribute operator+(Attribute attr, int num) {

View File

@ -2,7 +2,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <array> #include <array>
#include <bit>
#include <source_location> #include <source_location>
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include "common/assert.h" #include "common/assert.h"
@ -294,10 +293,12 @@ void IREmitter::SetPatch(Patch patch, const F32& value) {
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) { Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
switch (bit_size) { switch (bit_size) {
case 16:
return Inst<U16>(Opcode::LoadSharedU16, offset);
case 32: case 32:
return Inst<U32>(Opcode::LoadSharedU32, offset); return Inst<U32>(Opcode::LoadSharedU32, offset);
case 64: case 64:
return Inst(Opcode::LoadSharedU64, offset); return Inst<U64>(Opcode::LoadSharedU64, offset);
default: default:
UNREACHABLE_MSG("Invalid bit size {}", bit_size); UNREACHABLE_MSG("Invalid bit size {}", bit_size);
} }
@ -305,6 +306,9 @@ Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) { void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) {
switch (bit_size) { switch (bit_size) {
case 16:
Inst(Opcode::WriteSharedU16, offset, value);
break;
case 32: case 32:
Inst(Opcode::WriteSharedU32, offset, value); Inst(Opcode::WriteSharedU32, offset, value);
break; break;
@ -316,10 +320,12 @@ void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset)
} }
} }
U32F32 IREmitter::SharedAtomicIAdd(const U32& address, const U32F32& data) { U32U64 IREmitter::SharedAtomicIAdd(const U32& address, const U32U64& data) {
switch (data.Type()) { switch (data.Type()) {
case Type::U32: case Type::U32:
return Inst<U32>(Opcode::SharedAtomicIAdd32, address, data); return Inst<U32>(Opcode::SharedAtomicIAdd32, address, data);
case Type::U64:
return Inst<U64>(Opcode::SharedAtomicIAdd64, address, data);
default: default:
ThrowInvalidType(data.Type()); ThrowInvalidType(data.Type());
} }
@ -513,6 +519,11 @@ Value IREmitter::BufferAtomicSwap(const Value& handle, const Value& address, con
return Inst(Opcode::BufferAtomicSwap32, Flags{info}, handle, address, value); return Inst(Opcode::BufferAtomicSwap32, Flags{info}, handle, address, value);
} }
Value IREmitter::BufferAtomicCmpSwap(const Value& handle, const Value& address, const Value& vdata,
const Value& cmp_value, BufferInstInfo info) {
return Inst(Opcode::BufferAtomicCmpSwap32, Flags{info}, handle, address, vdata, cmp_value);
}
U32 IREmitter::DataAppend(const U32& counter) { U32 IREmitter::DataAppend(const U32& counter) {
return Inst<U32>(Opcode::DataAppend, counter, Imm32(0)); return Inst<U32>(Opcode::DataAppend, counter, Imm32(0));
} }
@ -1546,8 +1557,15 @@ U32 IREmitter::FindSMsb(const U32& value) {
return Inst<U32>(Opcode::FindSMsb32, value); return Inst<U32>(Opcode::FindSMsb32, value);
} }
U32 IREmitter::FindUMsb(const U32& value) { U32 IREmitter::FindUMsb(const U32U64& value) {
return Inst<U32>(Opcode::FindUMsb32, value); switch (value.Type()) {
case Type::U32:
return Inst<U32>(Opcode::FindUMsb32, value);
case Type::U64:
return Inst<U32>(Opcode::FindUMsb64, value);
default:
ThrowInvalidType(value.Type());
}
} }
U32 IREmitter::FindILsb(const U32U64& value) { U32 IREmitter::FindILsb(const U32U64& value) {

View File

@ -6,7 +6,6 @@
#include <cstring> #include <cstring>
#include <type_traits> #include <type_traits>
#include "shader_recompiler/info.h"
#include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/condition.h" #include "shader_recompiler/ir/condition.h"
@ -17,6 +16,7 @@ namespace Shader::IR {
class IREmitter { class IREmitter {
public: public:
explicit IREmitter() = default;
explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {} explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {}
explicit IREmitter(Block& block_, Block::iterator insertion_point_) explicit IREmitter(Block& block_, Block::iterator insertion_point_)
: block{&block_}, insertion_point{insertion_point_} {} : block{&block_}, insertion_point{insertion_point_} {}
@ -99,7 +99,7 @@ public:
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset); [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
void WriteShared(int bit_size, const Value& value, const U32& offset); void WriteShared(int bit_size, const Value& value, const U32& offset);
[[nodiscard]] U32F32 SharedAtomicIAdd(const U32& address, const U32F32& data); [[nodiscard]] U32U64 SharedAtomicIAdd(const U32& address, const U32U64& data);
[[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed); [[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed);
[[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed); [[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed);
[[nodiscard]] U32 SharedAtomicAnd(const U32& address, const U32& data); [[nodiscard]] U32 SharedAtomicAnd(const U32& address, const U32& data);
@ -150,6 +150,9 @@ public:
const Value& value, BufferInstInfo info); const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicSwap(const Value& handle, const Value& address, [[nodiscard]] Value BufferAtomicSwap(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info); const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicCmpSwap(const Value& handle, const Value& address,
const Value& value, const Value& cmp_value,
BufferInstInfo info);
[[nodiscard]] U32 DataAppend(const U32& counter); [[nodiscard]] U32 DataAppend(const U32& counter);
[[nodiscard]] U32 DataConsume(const U32& counter); [[nodiscard]] U32 DataConsume(const U32& counter);
@ -266,7 +269,7 @@ public:
[[nodiscard]] U32 BitwiseNot(const U32& value); [[nodiscard]] U32 BitwiseNot(const U32& value);
[[nodiscard]] U32 FindSMsb(const U32& value); [[nodiscard]] U32 FindSMsb(const U32& value);
[[nodiscard]] U32 FindUMsb(const U32& value); [[nodiscard]] U32 FindUMsb(const U32U64& value);
[[nodiscard]] U32 FindILsb(const U32U64& value); [[nodiscard]] U32 FindILsb(const U32U64& value);
[[nodiscard]] U32 SMin(const U32& a, const U32& b); [[nodiscard]] U32 SMin(const U32& a, const U32& b);
[[nodiscard]] U32 UMin(const U32& a, const U32& b); [[nodiscard]] U32 UMin(const U32& a, const U32& b);

View File

@ -30,13 +30,16 @@ OPCODE(EmitVertex, Void,
OPCODE(EmitPrimitive, Void, ) OPCODE(EmitPrimitive, Void, )
// Shared memory operations // Shared memory operations
OPCODE(LoadSharedU16, U16, U32, )
OPCODE(LoadSharedU32, U32, U32, ) OPCODE(LoadSharedU32, U32, U32, )
OPCODE(LoadSharedU64, U32x2, U32, ) OPCODE(LoadSharedU64, U64, U32, )
OPCODE(WriteSharedU16, Void, U32, U16, )
OPCODE(WriteSharedU32, Void, U32, U32, ) OPCODE(WriteSharedU32, Void, U32, U32, )
OPCODE(WriteSharedU64, Void, U32, U32x2, ) OPCODE(WriteSharedU64, Void, U32, U64, )
// Shared atomic operations // Shared atomic operations
OPCODE(SharedAtomicIAdd32, U32, U32, U32, ) OPCODE(SharedAtomicIAdd32, U32, U32, U32, )
OPCODE(SharedAtomicIAdd64, U64, U32, U64, )
OPCODE(SharedAtomicSMin32, U32, U32, U32, ) OPCODE(SharedAtomicSMin32, U32, U32, U32, )
OPCODE(SharedAtomicUMin32, U32, U32, U32, ) OPCODE(SharedAtomicUMin32, U32, U32, U32, )
OPCODE(SharedAtomicSMax32, U32, U32, U32, ) OPCODE(SharedAtomicSMax32, U32, U32, U32, )
@ -116,6 +119,7 @@ OPCODE(StoreBufferFormatF32, Void, Opaq
// Buffer atomic operations // Buffer atomic operations
OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicIAdd64, U64, Opaque, Opaque, U64 )
OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 )
@ -126,6 +130,7 @@ OPCODE(BufferAtomicAnd32, U32, Opaq
OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, ) OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, ) OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicSwap32, U32, Opaque, Opaque, U32, ) OPCODE(BufferAtomicSwap32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicCmpSwap32, U32, Opaque, Opaque, U32, U32, )
// Vector utility // Vector utility
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
@ -349,6 +354,7 @@ OPCODE(BitwiseNot32, U32, U32,
OPCODE(FindSMsb32, U32, U32, ) OPCODE(FindSMsb32, U32, U32, )
OPCODE(FindUMsb32, U32, U32, ) OPCODE(FindUMsb32, U32, U32, )
OPCODE(FindUMsb64, U32, U64, )
OPCODE(FindILsb32, U32, U32, ) OPCODE(FindILsb32, U32, U32, )
OPCODE(FindILsb64, U32, U64, ) OPCODE(FindILsb64, U32, U64, )
OPCODE(SMin32, U32, U32, U32, ) OPCODE(SMin32, U32, U32, U32, )

View File

@ -10,6 +10,8 @@
#include "common/io_file.h" #include "common/io_file.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "common/path_util.h" #include "common/path_util.h"
#include "common/signal_context.h"
#include "core/signals.h"
#include "shader_recompiler/info.h" #include "shader_recompiler/info.h"
#include "shader_recompiler/ir/breadth_first_search.h" #include "shader_recompiler/ir/breadth_first_search.h"
#include "shader_recompiler/ir/opcodes.h" #include "shader_recompiler/ir/opcodes.h"
@ -24,6 +26,7 @@
using namespace Xbyak::util; using namespace Xbyak::util;
static Xbyak::CodeGenerator g_srt_codegen(32_MB); static Xbyak::CodeGenerator g_srt_codegen(32_MB);
static const u8* g_srt_codegen_start = nullptr;
namespace { namespace {
@ -54,6 +57,57 @@ static void DumpSrtProgram(const Shader::Info& info, const u8* code, size_t code
#endif #endif
} }
static bool SrtWalkerSignalHandler(void* context, void* fault_address) {
// Only handle if the fault address is within the SRT code range
const u8* code_start = g_srt_codegen_start;
const u8* code_end = code_start + g_srt_codegen.getSize();
const void* code = Common::GetRip(context);
if (code < code_start || code >= code_end) {
return false; // Not in SRT code range
}
// Patch instruction to zero register
ZydisDecodedInstruction instruction;
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
ZyanStatus status = Common::Decoder::Instance()->decodeInstruction(instruction, operands,
const_cast<void*>(code), 15);
ASSERT(ZYAN_SUCCESS(status) && instruction.mnemonic == ZYDIS_MNEMONIC_MOV &&
operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER &&
operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY);
size_t len = instruction.length;
const size_t patch_size = 3;
u8* code_patch = const_cast<u8*>(reinterpret_cast<const u8*>(code));
// We can only encounter rdi or r10d as the first operand in a
// fault memory access for SRT walker.
switch (operands[0].reg.value) {
case ZYDIS_REGISTER_RDI:
// mov rdi, [rdi + (off_dw << 2)] -> xor rdi, rdi
code_patch[0] = 0x48;
code_patch[1] = 0x31;
code_patch[2] = 0xFF;
break;
case ZYDIS_REGISTER_R10D:
// mov r10d, [rdi + (off_dw << 2)] -> xor r10d, r10d
code_patch[0] = 0x45;
code_patch[1] = 0x31;
code_patch[2] = 0xD2;
break;
default:
UNREACHABLE_MSG("Unsupported register for SRT walker patch");
return false;
}
// Fill nops
memset(code_patch + patch_size, 0x90, len - patch_size);
LOG_DEBUG(Render_Recompiler, "Patched SRT walker at {}", code);
return true;
}
using namespace Shader; using namespace Shader;
struct PassInfo { struct PassInfo {
@ -141,6 +195,15 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
return; return;
} }
// Register the signal handler for SRT walker, if not already registered
if (g_srt_codegen_start == nullptr) {
g_srt_codegen_start = c.getCurr();
auto* signals = Core::Signals::Instance();
// Call after the memory invalidation handler
constexpr u32 priority = 1;
signals->RegisterAccessViolationHandler(SrtWalkerSignalHandler, priority);
}
info.srt_info.walker_func = c.getCurr<PFN_SrtWalker>(); info.srt_info.walker_func = c.getCurr<PFN_SrtWalker>();
pass_info.dst_off_dw = NumUserDataRegs; pass_info.dst_off_dw = NumUserDataRegs;

View File

@ -15,7 +15,7 @@ struct FormatInfo {
AmdGpu::NumberFormat num_format; AmdGpu::NumberFormat num_format;
AmdGpu::CompMapping swizzle; AmdGpu::CompMapping swizzle;
AmdGpu::NumberConversion num_conversion; AmdGpu::NumberConversion num_conversion;
int num_components; u32 num_components;
}; };
static bool IsBufferFormatLoad(const IR::Inst& inst) { static bool IsBufferFormatLoad(const IR::Inst& inst) {

View File

@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <unordered_map>
#include "shader_recompiler/ir/program.h" #include "shader_recompiler/ir/program.h"
namespace Shader::Optimization { namespace Shader::Optimization {

View File

@ -39,11 +39,13 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
ASSERT(addr->Arg(1).IsImmediate()); ASSERT(addr->Arg(1).IsImmediate());
offset = addr->Arg(1).U32(); offset = addr->Arg(1).U32();
} }
IR::Value data = inst.Arg(1).Resolve(); IR::Value data = is_composite ? ir.UnpackUint2x32(IR::U64{inst.Arg(1).Resolve()})
: inst.Arg(1).Resolve();
for (s32 i = 0; i < num_components; i++) { for (s32 i = 0; i < num_components; i++) {
const auto attrib = IR::Attribute::Param0 + (offset / 16); const auto attrib = IR::Attribute::Param0 + (offset / 16);
const auto comp = (offset / 4) % 4; const auto comp = (offset / 4) % 4;
const IR::U32 value = IR::U32{is_composite ? data.Inst()->Arg(i) : data}; const IR::U32 value =
IR::U32{is_composite ? ir.CompositeExtract(data, i) : data};
ir.SetAttribute(attrib, ir.BitCast<IR::F32, IR::U32>(value), comp); ir.SetAttribute(attrib, ir.BitCast<IR::F32, IR::U32>(value), comp);
offset += 4; offset += 4;
} }
@ -91,6 +93,19 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
const auto& gs_info = runtime_info.gs_info; const auto& gs_info = runtime_info.gs_info;
info.gs_copy_data = Shader::ParseCopyShader(gs_info.vs_copy); info.gs_copy_data = Shader::ParseCopyShader(gs_info.vs_copy);
u32 output_vertices = gs_info.output_vertices;
if (info.gs_copy_data.output_vertices &&
info.gs_copy_data.output_vertices != output_vertices) {
ASSERT_MSG(output_vertices > info.gs_copy_data.output_vertices &&
gs_info.mode == AmdGpu::Liverpool::GsMode::Mode::ScenarioG,
"Invalid geometry shader vertex configuration scenario = {}, max_vert_out = "
"{}, output_vertices = {}",
u32(gs_info.mode), output_vertices, info.gs_copy_data.output_vertices);
LOG_WARNING(Render_Vulkan, "MAX_VERT_OUT {} is larger than actual output vertices {}",
output_vertices, info.gs_copy_data.output_vertices);
output_vertices = info.gs_copy_data.output_vertices;
}
ForEachInstruction([&](IR::IREmitter& ir, IR::Inst& inst) { ForEachInstruction([&](IR::IREmitter& ir, IR::Inst& inst) {
const auto opcode = inst.GetOpcode(); const auto opcode = inst.GetOpcode();
switch (opcode) { switch (opcode) {
@ -122,7 +137,7 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
const auto offset = inst.Flags<IR::BufferInstInfo>().inst_offset.Value(); const auto offset = inst.Flags<IR::BufferInstInfo>().inst_offset.Value();
const auto data = ir.BitCast<IR::F32>(IR::U32{inst.Arg(2)}); const auto data = ir.BitCast<IR::F32>(IR::U32{inst.Arg(2)});
const auto comp_ofs = gs_info.output_vertices * 4u; const auto comp_ofs = output_vertices * 4u;
const auto output_size = comp_ofs * gs_info.out_vertex_data_size; const auto output_size = comp_ofs * gs_info.out_vertex_data_size;
const auto vc_read_ofs = (((offset / comp_ofs) * comp_ofs) % output_size) * 16u; const auto vc_read_ofs = (((offset / comp_ofs) * comp_ofs) % output_size) * 16u;

View File

@ -34,8 +34,10 @@ void Visit(Info& info, const IR::Inst& inst) {
info.uses_patches |= 1U << IR::GenericPatchIndex(patch); info.uses_patches |= 1U << IR::GenericPatchIndex(patch);
break; break;
} }
case IR::Opcode::LoadSharedU16:
case IR::Opcode::LoadSharedU32: case IR::Opcode::LoadSharedU32:
case IR::Opcode::LoadSharedU64: case IR::Opcode::LoadSharedU64:
case IR::Opcode::WriteSharedU16:
case IR::Opcode::WriteSharedU32: case IR::Opcode::WriteSharedU32:
case IR::Opcode::WriteSharedU64: case IR::Opcode::WriteSharedU64:
info.uses_shared = true; info.uses_shared = true;

View File

@ -16,6 +16,7 @@ static bool IsSharedAccess(const IR::Inst& inst) {
case IR::Opcode::WriteSharedU64: case IR::Opcode::WriteSharedU64:
case IR::Opcode::SharedAtomicAnd32: case IR::Opcode::SharedAtomicAnd32:
case IR::Opcode::SharedAtomicIAdd32: case IR::Opcode::SharedAtomicIAdd32:
case IR::Opcode::SharedAtomicIAdd64:
case IR::Opcode::SharedAtomicOr32: case IR::Opcode::SharedAtomicOr32:
case IR::Opcode::SharedAtomicSMax32: case IR::Opcode::SharedAtomicSMax32:
case IR::Opcode::SharedAtomicUMax32: case IR::Opcode::SharedAtomicUMax32:
@ -33,9 +34,11 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
if (program.info.stage != Stage::Compute) { if (program.info.stage != Stage::Compute) {
return; return;
} }
// Only perform the transform if the host shared memory is insufficient. // Only perform the transform if the host shared memory is insufficient
// or the device does not support VK_KHR_workgroup_memory_explicit_layout
const u32 shared_memory_size = runtime_info.cs_info.shared_memory_size; const u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
if (shared_memory_size <= profile.max_shared_memory_size) { if (shared_memory_size <= profile.max_shared_memory_size &&
profile.supports_workgroup_explicit_memory_layout) {
return; return;
} }
// Add buffer binding for shared memory storage buffer. // Add buffer binding for shared memory storage buffer.
@ -60,6 +63,7 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
ir.BufferAtomicAnd(handle, inst.Arg(0), inst.Arg(1), {})); ir.BufferAtomicAnd(handle, inst.Arg(0), inst.Arg(1), {}));
continue; continue;
case IR::Opcode::SharedAtomicIAdd32: case IR::Opcode::SharedAtomicIAdd32:
case IR::Opcode::SharedAtomicIAdd64:
inst.ReplaceUsesWithAndRemove( inst.ReplaceUsesWithAndRemove(
ir.BufferAtomicIAdd(handle, inst.Arg(0), inst.Arg(1), {})); ir.BufferAtomicIAdd(handle, inst.Arg(0), inst.Arg(1), {}));
continue; continue;
@ -93,12 +97,19 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
ir.Imm32(shared_memory_size)); ir.Imm32(shared_memory_size));
const IR::U32 address = ir.IAdd(IR::U32{inst.Arg(0)}, offset); const IR::U32 address = ir.IAdd(IR::U32{inst.Arg(0)}, offset);
switch (inst.GetOpcode()) { switch (inst.GetOpcode()) {
case IR::Opcode::LoadSharedU16:
inst.ReplaceUsesWithAndRemove(ir.LoadBufferU16(handle, address, {}));
break;
case IR::Opcode::LoadSharedU32: case IR::Opcode::LoadSharedU32:
inst.ReplaceUsesWithAndRemove(ir.LoadBufferU32(1, handle, address, {})); inst.ReplaceUsesWithAndRemove(ir.LoadBufferU32(1, handle, address, {}));
break; break;
case IR::Opcode::LoadSharedU64: case IR::Opcode::LoadSharedU64:
inst.ReplaceUsesWithAndRemove(ir.LoadBufferU32(2, handle, address, {})); inst.ReplaceUsesWithAndRemove(ir.LoadBufferU32(2, handle, address, {}));
break; break;
case IR::Opcode::WriteSharedU16:
ir.StoreBufferU16(handle, address, IR::U32{inst.Arg(1)}, {});
inst.Invalidate();
break;
case IR::Opcode::WriteSharedU32: case IR::Opcode::WriteSharedU32:
ir.StoreBufferU32(1, handle, address, inst.Arg(1), {}); ir.StoreBufferU32(1, handle, address, inst.Arg(1), {});
inst.Invalidate(); inst.Invalidate();

View File

@ -7,7 +7,7 @@
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/enum.h" #include "common/enum.h"
#include "common/types.h" #include "common/types.h"
#include "video_core/amdgpu/types.h" #include "video_core/amdgpu/pixel_format.h"
namespace Shader::IR { namespace Shader::IR {

View File

@ -23,13 +23,13 @@ struct Profile {
bool support_fp32_denorm_preserve{}; bool support_fp32_denorm_preserve{};
bool support_fp32_denorm_flush{}; bool support_fp32_denorm_flush{};
bool support_fp32_round_to_zero{}; bool support_fp32_round_to_zero{};
bool support_explicit_workgroup_layout{};
bool support_legacy_vertex_attributes{}; bool support_legacy_vertex_attributes{};
bool supports_image_load_store_lod{}; bool supports_image_load_store_lod{};
bool supports_native_cube_calc{}; bool supports_native_cube_calc{};
bool supports_trinary_minmax{}; bool supports_trinary_minmax{};
bool supports_robust_buffer_access{}; bool supports_robust_buffer_access{};
bool supports_image_fp32_atomic_min_max{}; bool supports_image_fp32_atomic_min_max{};
bool supports_workgroup_explicit_memory_layout{};
bool has_broken_spirv_clamp{}; bool has_broken_spirv_clamp{};
bool lower_left_origin_mode{}; bool lower_left_origin_mode{};
bool needs_manual_interpolation{}; bool needs_manual_interpolation{};

View File

@ -149,6 +149,7 @@ struct GeometryRuntimeInfo {
u32 out_vertex_data_size{}; u32 out_vertex_data_size{};
AmdGpu::PrimitiveType in_primitive; AmdGpu::PrimitiveType in_primitive;
GsOutputPrimTypes out_primitive; GsOutputPrimTypes out_primitive;
AmdGpu::Liverpool::GsMode::Mode mode;
std::span<const u32> vs_copy; std::span<const u32> vs_copy;
u64 vs_copy_hash; u64 vs_copy_hash;
@ -196,11 +197,13 @@ struct FragmentRuntimeInfo {
u32 num_inputs; u32 num_inputs;
std::array<PsInput, 32> inputs; std::array<PsInput, 32> inputs;
std::array<PsColorBuffer, MaxColorBuffers> color_buffers; std::array<PsColorBuffer, MaxColorBuffers> color_buffers;
bool dual_source_blending;
bool operator==(const FragmentRuntimeInfo& other) const noexcept { bool operator==(const FragmentRuntimeInfo& other) const noexcept {
return std::ranges::equal(color_buffers, other.color_buffers) && return std::ranges::equal(color_buffers, other.color_buffers) &&
en_flags.raw == other.en_flags.raw && addr_flags.raw == other.addr_flags.raw && en_flags.raw == other.en_flags.raw && addr_flags.raw == other.addr_flags.raw &&
num_inputs == other.num_inputs && num_inputs == other.num_inputs &&
dual_source_blending == other.dual_source_blending &&
std::ranges::equal(inputs.begin(), inputs.begin() + num_inputs, other.inputs.begin(), std::ranges::equal(inputs.begin(), inputs.begin() + num_inputs, other.inputs.begin(),
other.inputs.begin() + num_inputs); other.inputs.begin() + num_inputs);
} }

View File

@ -228,9 +228,12 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const u32 type = header->type; const u32 type = header->type;
switch (type) { switch (type) {
default:
UNREACHABLE_MSG("Wrong PM4 type {}", type);
break;
case 0: case 0:
case 1: UNREACHABLE_MSG("Unimplemented PM4 type 0, base reg: {}, size: {}",
UNREACHABLE_MSG("Unsupported PM4 type {}", type); header->type0.base.Value(), header->type0.NumWords());
break; break;
case 2: case 2:
// Type-2 packet are used for padding purposes // Type-2 packet are used for padding purposes
@ -394,7 +397,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
break; break;
} }
case PM4ItOpcode::SetPredication: { case PM4ItOpcode::SetPredication: {
LOG_WARNING(Render_Vulkan, "Unimplemented IT_SET_PREDICATION"); LOG_WARNING(Render, "Unimplemented IT_SET_PREDICATION");
break; break;
} }
case PM4ItOpcode::IndexType: { case PM4ItOpcode::IndexType: {
@ -586,8 +589,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
} }
case PM4ItOpcode::EventWrite: { case PM4ItOpcode::EventWrite: {
const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header); const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
LOG_DEBUG(Render_Vulkan, LOG_DEBUG(Render, "Encountered EventWrite: event_type = {}, event_index = {}",
"Encountered EventWrite: event_type = {}, event_index = {}",
magic_enum::enum_name(event->event_type.Value()), magic_enum::enum_name(event->event_type.Value()),
magic_enum::enum_name(event->event_index.Value())); magic_enum::enum_name(event->event_index.Value()));
if (event->event_type.Value() == EventType::SoVgtStreamoutFlush) { if (event->event_type.Value() == EventType::SoVgtStreamoutFlush) {
@ -673,6 +675,16 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
} }
break; break;
} }
case PM4ItOpcode::CopyData: {
const auto* copy_data = reinterpret_cast<const PM4CmdCopyData*>(header);
LOG_WARNING(Render,
"unhandled IT_COPY_DATA src_sel = {}, dst_sel = {}, "
"count_sel = {}, wr_confirm = {}, engine_sel = {}",
u32(copy_data->src_sel.Value()), u32(copy_data->dst_sel.Value()),
copy_data->count_sel.Value(), copy_data->wr_confirm.Value(),
u32(copy_data->engine_sel.Value()));
break;
}
case PM4ItOpcode::MemSemaphore: { case PM4ItOpcode::MemSemaphore: {
const auto* mem_semaphore = reinterpret_cast<const PM4CmdMemSemaphore*>(header); const auto* mem_semaphore = reinterpret_cast<const PM4CmdMemSemaphore*>(header);
if (mem_semaphore->IsSignaling()) { if (mem_semaphore->IsSignaling()) {
@ -756,6 +768,19 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
LOG_WARNING(Render_Vulkan, "Unimplemented IT_GET_LOD_STATS"); LOG_WARNING(Render_Vulkan, "Unimplemented IT_GET_LOD_STATS");
break; break;
} }
case PM4ItOpcode::CondExec: {
const auto* cond_exec = reinterpret_cast<const PM4CmdCondExec*>(header);
if (cond_exec->command.Value() != 0) {
LOG_WARNING(Render, "IT_COND_EXEC used a reserved command");
}
const auto skip = *cond_exec->Address() == false;
if (skip) {
dcb = NextPacket(dcb,
header->type3.NumWords() + 1 + cond_exec->exec_count.Value());
continue;
}
break;
}
default: default:
UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}", UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}",
static_cast<u32>(opcode), count); static_cast<u32>(opcode), count);
@ -804,6 +829,19 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq
break; break;
} }
if (header->type == 2) {
// Type-2 packet are used for padding purposes
next_dw_off = 1;
acb += next_dw_off;
acb_dwords -= next_dw_off;
if constexpr (!is_indirect) {
*queue.read_addr += next_dw_off;
*queue.read_addr %= queue.ring_size_dw;
}
continue;
}
if (header->type != 3) { if (header->type != 3) {
// No other types of packets were spotted so far // No other types of packets were spotted so far
UNREACHABLE_MSG("Invalid PM4 type {}", header->type.Value()); UNREACHABLE_MSG("Invalid PM4 type {}", header->type.Value());

View File

@ -914,7 +914,7 @@ struct Liverpool {
} }
size_t GetColorSliceSize() const { size_t GetColorSliceSize() const {
const auto num_bytes_per_element = NumBits(info.format) / 8u; const auto num_bytes_per_element = NumBitsPerBlock(info.format) / 8u;
const auto slice_size = const auto slice_size =
num_bytes_per_element * (slice.tile_max + 1) * 64u * NumSamples(); num_bytes_per_element * (slice.tile_max + 1) * 64u * NumSamples();
return slice_size; return slice_size;
@ -1179,8 +1179,16 @@ struct Liverpool {
}; };
union GsMode { union GsMode {
enum class Mode : u32 {
Off = 0,
ScenarioA = 1,
ScenarioB = 2,
ScenarioG = 3,
ScenarioC = 4,
};
u32 raw; u32 raw;
BitField<0, 3, u32> mode; BitField<0, 3, Mode> mode;
BitField<3, 2, u32> cut_mode; BitField<3, 2, u32> cut_mode;
BitField<22, 2, u32> onchip; BitField<22, 2, u32> onchip;
}; };

View File

@ -111,136 +111,106 @@ std::string_view NameOf(NumberFormat fmt) {
} }
} }
int NumComponents(DataFormat format) { static constexpr std::array NUM_COMPONENTS = {
constexpr std::array num_components_per_element = { 0, // 0 FormatInvalid
0, 1, 1, 2, 1, 2, 3, 3, 4, 4, 4, 2, 4, 3, 4, -1, 3, 4, 4, 4, 2, 1, // 1 Format8
2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 3, 3, 4, 4, 4, 1, 2, 3, 4, 1, // 2 Format16
-1, -1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 1, 1}; 2, // 3 Format8_8
1, // 4 Format32
const u32 index = static_cast<u32>(format); 2, // 5 Format16_16
if (index >= num_components_per_element.size()) { 3, // 6 Format10_11_11
return 0; 3, // 7 Format11_11_10
} 4, // 8 Format10_10_10_2
return num_components_per_element[index]; 4, // 9 Format2_10_10_10
} 4, // 10 Format8_8_8_8
2, // 11 Format32_32
int NumBits(DataFormat format) { 4, // 12 Format16_16_16_16
const std::array num_bits_per_element = { 3, // 13 Format32_32_32
0, 8, 16, 16, 32, 32, 32, 32, 32, 32, 32, 64, 64, 96, 128, -1, 16, 16, 16, 16, 32, 4, // 14 Format32_32_32_32
32, 64, -1, -1, -1, -1, -1, -1, -1, -1, -1, 16, 16, 32, 4, 8, 8, 4, 8, 8, 8, 0, // 15
-1, -1, 8, 8, 8, 8, 8, 8, 16, 16, 32, 32, 32, 64, 64, 8, 16, 1, 1}; 3, // 16 Format5_6_5
4, // 17 Format1_5_5_5
const u32 index = static_cast<u32>(format); 4, // 18 Format5_5_5_1
if (index >= num_bits_per_element.size()) { 4, // 19 Format4_4_4_4
return 0; 2, // 20 Format8_24
} 2, // 21 Format24_8
return num_bits_per_element[index]; 2, // 22 FormatX24_8_32
} 0, // 23
0, // 24
static constexpr std::array component_bits = { 0, // 25
std::array{0, 0, 0, 0}, // 0 FormatInvalid 0, // 26
std::array{8, 0, 0, 0}, // 1 Format8 0, // 27
std::array{16, 0, 0, 0}, // 2 Format16 0, // 28
std::array{8, 8, 0, 0}, // 3 Format8_8 0, // 29
std::array{32, 0, 0, 0}, // 4 Format32 0, // 30
std::array{16, 16, 0, 0}, // 5 Format16_16 0, // 31
std::array{11, 11, 10, 0}, // 6 Format10_11_11 3, // 32 FormatGB_GR
std::array{10, 11, 11, 0}, // 7 Format11_11_10 3, // 33 FormatBG_RG
std::array{2, 10, 10, 10}, // 8 Format10_10_10_2 4, // 34 Format5_9_9_9
std::array{10, 10, 10, 2}, // 9 Format2_10_10_10 4, // 35 FormatBc1
std::array{8, 8, 8, 8}, // 10 Format8_8_8_8 4, // 36 FormatBc2
std::array{32, 32, 0, 0}, // 11 Format32_32 4, // 37 FormatBc3
std::array{16, 16, 16, 16}, // 12 Format16_16_16_16 1, // 38 FormatBc4
std::array{32, 32, 32, 0}, // 13 Format32_32_32 2, // 39 FormatBc5
std::array{32, 32, 32, 32}, // 14 Format32_32_32_32 3, // 40 FormatBc6
std::array{0, 0, 0, 0}, // 15 4, // 41 FormatBc7
std::array{5, 6, 5, 0}, // 16 Format5_6_5
std::array{5, 5, 5, 1}, // 17 Format1_5_5_5
std::array{1, 5, 5, 5}, // 18 Format5_5_5_1
std::array{4, 4, 4, 4}, // 19 Format4_4_4_4
std::array{24, 8, 0, 0}, // 20 Format8_24
std::array{8, 24, 0, 0}, // 21 Format24_8
std::array{8, 24, 0, 0}, // 22 FormatX24_8_32
std::array{0, 0, 0, 0}, // 23
std::array{0, 0, 0, 0}, // 24
std::array{0, 0, 0, 0}, // 25
std::array{0, 0, 0, 0}, // 26
std::array{0, 0, 0, 0}, // 27
std::array{0, 0, 0, 0}, // 28
std::array{0, 0, 0, 0}, // 29
std::array{0, 0, 0, 0}, // 30
std::array{0, 0, 0, 0}, // 31
std::array{0, 0, 0, 0}, // 32 FormatGB_GR
std::array{0, 0, 0, 0}, // 33 FormatBG_RG
std::array{0, 0, 0, 0}, // 34 Format5_9_9_9
std::array{0, 0, 0, 0}, // 35 FormatBc1
std::array{0, 0, 0, 0}, // 36 FormatBc2
std::array{0, 0, 0, 0}, // 37 FormatBc3
std::array{0, 0, 0, 0}, // 38 FormatBc4
std::array{0, 0, 0, 0}, // 39 FormatBc5
std::array{0, 0, 0, 0}, // 40 FormatBc6
std::array{0, 0, 0, 0}, // 41 FormatBc7
}; };
u32 ComponentBits(DataFormat format, u32 comp) { u32 NumComponents(DataFormat format) {
const u32 index = static_cast<u32>(format); const u32 index = static_cast<u32>(format);
if (index >= component_bits.size() || comp >= 4) { ASSERT_MSG(index < NUM_COMPONENTS.size(), "Invalid data format = {}", format);
return 0; return NUM_COMPONENTS[index];
}
return component_bits[index][comp];
} }
static constexpr std::array component_offset = { static constexpr std::array BITS_PER_BLOCK = {
std::array{-1, -1, -1, -1}, // 0 FormatInvalid 0, // 0 FormatInvalid
std::array{0, -1, -1, -1}, // 1 Format8 8, // 1 Format8
std::array{0, -1, -1, -1}, // 2 Format16 16, // 2 Format16
std::array{0, 8, -1, -1}, // 3 Format8_8 16, // 3 Format8_8
std::array{0, -1, -1, -1}, // 4 Format32 32, // 4 Format32
std::array{0, 16, -1, -1}, // 5 Format16_16 32, // 5 Format16_16
std::array{0, 11, 22, -1}, // 6 Format10_11_11 32, // 6 Format10_11_11
std::array{0, 10, 21, -1}, // 7 Format11_11_10 32, // 7 Format11_11_10
std::array{0, 2, 12, 22}, // 8 Format10_10_10_2 32, // 8 Format10_10_10_2
std::array{0, 10, 20, 30}, // 9 Format2_10_10_10 32, // 9 Format2_10_10_10
std::array{0, 8, 16, 24}, // 10 Format8_8_8_8 32, // 10 Format8_8_8_8
std::array{0, 32, -1, -1}, // 11 Format32_32 64, // 11 Format32_32
std::array{0, 16, 32, 48}, // 12 Format16_16_16_16 64, // 12 Format16_16_16_16
std::array{0, 32, 64, -1}, // 13 Format32_32_32 96, // 13 Format32_32_32
std::array{0, 32, 64, 96}, // 14 Format32_32_32_32 128, // 14 Format32_32_32_32
std::array{-1, -1, -1, -1}, // 15 0, // 15
std::array{0, 5, 11, -1}, // 16 Format5_6_5 16, // 16 Format5_6_5
std::array{0, 5, 10, 15}, // 17 Format1_5_5_5 16, // 17 Format1_5_5_5
std::array{0, 1, 6, 11}, // 18 Format5_5_5_1 16, // 18 Format5_5_5_1
std::array{0, 4, 8, 12}, // 19 Format4_4_4_4 16, // 19 Format4_4_4_4
std::array{0, 24, -1, -1}, // 20 Format8_24 32, // 20 Format8_24
std::array{0, 8, -1, -1}, // 21 Format24_8 32, // 21 Format24_8
std::array{0, 8, -1, -1}, // 22 FormatX24_8_32 64, // 22 FormatX24_8_32
std::array{-1, -1, -1, -1}, // 23 0, // 23
std::array{-1, -1, -1, -1}, // 24 0, // 24
std::array{-1, -1, -1, -1}, // 25 0, // 25
std::array{-1, -1, -1, -1}, // 26 0, // 26
std::array{-1, -1, -1, -1}, // 27 0, // 27
std::array{-1, -1, -1, -1}, // 28 0, // 28
std::array{-1, -1, -1, -1}, // 29 0, // 29
std::array{-1, -1, -1, -1}, // 30 0, // 30
std::array{-1, -1, -1, -1}, // 31 0, // 31
std::array{-1, -1, -1, -1}, // 32 FormatGB_GR 16, // 32 FormatGB_GR
std::array{-1, -1, -1, -1}, // 33 FormatBG_RG 16, // 33 FormatBG_RG
std::array{-1, -1, -1, -1}, // 34 Format5_9_9_9 32, // 34 Format5_9_9_9
std::array{-1, -1, -1, -1}, // 35 FormatBc1 64, // 35 FormatBc1
std::array{-1, -1, -1, -1}, // 36 FormatBc2 128, // 36 FormatBc2
std::array{-1, -1, -1, -1}, // 37 FormatBc3 128, // 37 FormatBc3
std::array{-1, -1, -1, -1}, // 38 FormatBc4 64, // 38 FormatBc4
std::array{-1, -1, -1, -1}, // 39 FormatBc5 128, // 39 FormatBc5
std::array{-1, -1, -1, -1}, // 40 FormatBc6 128, // 40 FormatBc6
std::array{-1, -1, -1, -1}, // 41 FormatBc7 128, // 41 FormatBc7
}; };
s32 ComponentOffset(DataFormat format, u32 comp) { u32 NumBitsPerBlock(DataFormat format) {
const u32 index = static_cast<u32>(format); const u32 index = static_cast<u32>(format);
if (index >= component_offset.size() || comp >= 4) { ASSERT_MSG(index < BITS_PER_BLOCK.size(), "Invalid data format = {}", format);
return -1; return BITS_PER_BLOCK[index];
}
return component_offset[index][comp];
} }
} // namespace AmdGpu } // namespace AmdGpu

View File

@ -5,39 +5,313 @@
#include <string_view> #include <string_view>
#include <fmt/format.h> #include <fmt/format.h>
#include "common/assert.h"
#include "common/types.h" #include "common/types.h"
#include "video_core/amdgpu/types.h"
namespace AmdGpu { namespace AmdGpu {
enum NumberClass { // Table 8.13 Data and Image Formats [Sea Islands Series Instruction Set Architecture]
enum class DataFormat : u32 {
FormatInvalid = 0,
Format8 = 1,
Format16 = 2,
Format8_8 = 3,
Format32 = 4,
Format16_16 = 5,
Format10_11_11 = 6,
Format11_11_10 = 7,
Format10_10_10_2 = 8,
Format2_10_10_10 = 9,
Format8_8_8_8 = 10,
Format32_32 = 11,
Format16_16_16_16 = 12,
Format32_32_32 = 13,
Format32_32_32_32 = 14,
Format5_6_5 = 16,
Format1_5_5_5 = 17,
Format5_5_5_1 = 18,
Format4_4_4_4 = 19,
Format8_24 = 20,
Format24_8 = 21,
FormatX24_8_32 = 22,
FormatGB_GR = 32,
FormatBG_RG = 33,
Format5_9_9_9 = 34,
FormatBc1 = 35,
FormatBc2 = 36,
FormatBc3 = 37,
FormatBc4 = 38,
FormatBc5 = 39,
FormatBc6 = 40,
FormatBc7 = 41,
FormatFmask8_1 = 47,
FormatFmask8_2 = 48,
FormatFmask8_4 = 49,
FormatFmask16_1 = 50,
FormatFmask16_2 = 51,
FormatFmask32_2 = 52,
FormatFmask32_4 = 53,
FormatFmask32_8 = 54,
FormatFmask64_4 = 55,
FormatFmask64_8 = 56,
Format4_4 = 57,
Format6_5_5 = 58,
Format1 = 59,
Format1_Reversed = 60,
Format32_As_8 = 61,
Format32_As_8_8 = 62,
Format32_As_32_32_32_32 = 63,
};
enum class NumberFormat : u32 {
Unorm = 0,
Snorm = 1,
Uscaled = 2,
Sscaled = 3,
Uint = 4,
Sint = 5,
SnormNz = 6,
Float = 7,
Srgb = 9,
Ubnorm = 10,
UbnormNz = 11,
Ubint = 12,
Ubscaled = 13,
};
enum class NumberClass {
Float, Float,
Sint, Sint,
Uint, Uint,
}; };
[[nodiscard]] constexpr NumberClass GetNumberClass(const NumberFormat nfmt) { enum class CompSwizzle : u8 {
switch (nfmt) { Zero = 0,
case NumberFormat::Sint: One = 1,
return Sint; Red = 4,
case NumberFormat::Uint: Green = 5,
return Uint; Blue = 6,
Alpha = 7,
};
enum class NumberConversion : u32 {
None = 0,
UintToUscaled = 1,
SintToSscaled = 2,
UnormToUbnorm = 3,
Sint8ToSnormNz = 4,
Sint16ToSnormNz = 5,
Uint32ToUnorm = 6,
};
struct CompMapping {
CompSwizzle r;
CompSwizzle g;
CompSwizzle b;
CompSwizzle a;
auto operator<=>(const CompMapping& other) const = default;
template <typename T>
[[nodiscard]] std::array<T, 4> Apply(const std::array<T, 4>& data) const {
return {
ApplySingle(data, r),
ApplySingle(data, g),
ApplySingle(data, b),
ApplySingle(data, a),
};
}
[[nodiscard]] CompMapping Inverse() const {
CompMapping result{};
InverseSingle(result.r, CompSwizzle::Red);
InverseSingle(result.g, CompSwizzle::Green);
InverseSingle(result.b, CompSwizzle::Blue);
InverseSingle(result.a, CompSwizzle::Alpha);
return result;
}
private:
template <typename T>
T ApplySingle(const std::array<T, 4>& data, const CompSwizzle swizzle) const {
switch (swizzle) {
case CompSwizzle::Zero:
return T(0);
case CompSwizzle::One:
return T(1);
case CompSwizzle::Red:
return data[0];
case CompSwizzle::Green:
return data[1];
case CompSwizzle::Blue:
return data[2];
case CompSwizzle::Alpha:
return data[3];
default:
UNREACHABLE();
}
}
void InverseSingle(CompSwizzle& dst, const CompSwizzle target) const {
if (r == target) {
dst = CompSwizzle::Red;
} else if (g == target) {
dst = CompSwizzle::Green;
} else if (b == target) {
dst = CompSwizzle::Blue;
} else if (a == target) {
dst = CompSwizzle::Alpha;
} else {
dst = CompSwizzle::Zero;
}
}
};
static constexpr CompMapping IdentityMapping = {
.r = CompSwizzle::Red,
.g = CompSwizzle::Green,
.b = CompSwizzle::Blue,
.a = CompSwizzle::Alpha,
};
constexpr DataFormat RemapDataFormat(const DataFormat format) {
switch (format) {
case DataFormat::Format11_11_10:
return DataFormat::Format10_11_11;
case DataFormat::Format10_10_10_2:
return DataFormat::Format2_10_10_10;
case DataFormat::Format5_5_5_1:
return DataFormat::Format1_5_5_5;
default: default:
return Float; return format;
} }
} }
[[nodiscard]] constexpr bool IsInteger(const NumberFormat nfmt) { constexpr NumberFormat RemapNumberFormat(const NumberFormat format, const DataFormat data_format) {
switch (format) {
case NumberFormat::Unorm: {
switch (data_format) {
case DataFormat::Format32:
case DataFormat::Format32_32:
case DataFormat::Format32_32_32:
case DataFormat::Format32_32_32_32:
return NumberFormat::Uint;
default:
return format;
}
}
case NumberFormat::Uscaled:
return NumberFormat::Uint;
case NumberFormat::Sscaled:
case NumberFormat::SnormNz:
return NumberFormat::Sint;
case NumberFormat::Ubnorm:
return NumberFormat::Unorm;
case NumberFormat::Float:
if (data_format == DataFormat::Format8) {
// Games may ask for 8-bit float when they want to access the stencil component
// of a depth-stencil image. Change to unsigned int to match the stencil format.
// This is also the closest approximation to pass the bits through unconverted.
return NumberFormat::Uint;
}
[[fallthrough]];
default:
return format;
}
}
constexpr CompMapping RemapSwizzle(const DataFormat format, const CompMapping swizzle) {
switch (format) {
case DataFormat::Format1_5_5_5:
case DataFormat::Format11_11_10: {
CompMapping result;
result.r = swizzle.b;
result.g = swizzle.g;
result.b = swizzle.r;
result.a = swizzle.a;
return result;
}
case DataFormat::Format10_10_10_2: {
CompMapping result;
result.r = swizzle.a;
result.g = swizzle.b;
result.b = swizzle.g;
result.a = swizzle.r;
return result;
}
case DataFormat::Format4_4_4_4: {
// Remap to a more supported component order.
CompMapping result;
result.r = swizzle.g;
result.g = swizzle.b;
result.b = swizzle.a;
result.a = swizzle.r;
return result;
}
default:
return swizzle;
}
}
constexpr NumberConversion MapNumberConversion(const NumberFormat num_fmt,
const DataFormat data_fmt) {
switch (num_fmt) {
case NumberFormat::Unorm: {
switch (data_fmt) {
case DataFormat::Format32:
case DataFormat::Format32_32:
case DataFormat::Format32_32_32:
case DataFormat::Format32_32_32_32:
return NumberConversion::Uint32ToUnorm;
default:
return NumberConversion::None;
}
}
case NumberFormat::Uscaled:
return NumberConversion::UintToUscaled;
case NumberFormat::Sscaled:
return NumberConversion::SintToSscaled;
case NumberFormat::Ubnorm:
return NumberConversion::UnormToUbnorm;
case NumberFormat::SnormNz: {
switch (data_fmt) {
case DataFormat::Format8:
case DataFormat::Format8_8:
case DataFormat::Format8_8_8_8:
return NumberConversion::Sint8ToSnormNz;
case DataFormat::Format16:
case DataFormat::Format16_16:
case DataFormat::Format16_16_16_16:
return NumberConversion::Sint16ToSnormNz;
default:
UNREACHABLE_MSG("data_fmt = {}", u32(data_fmt));
}
}
default:
return NumberConversion::None;
}
}
constexpr NumberClass GetNumberClass(const NumberFormat nfmt) {
switch (nfmt) {
case NumberFormat::Sint:
return NumberClass::Sint;
case NumberFormat::Uint:
return NumberClass::Uint;
default:
return NumberClass::Float;
}
}
constexpr bool IsInteger(const NumberFormat nfmt) {
return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint; return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint;
} }
[[nodiscard]] std::string_view NameOf(DataFormat fmt); std::string_view NameOf(DataFormat fmt);
[[nodiscard]] std::string_view NameOf(NumberFormat fmt); std::string_view NameOf(NumberFormat fmt);
int NumComponents(DataFormat format); u32 NumComponents(DataFormat format);
int NumBits(DataFormat format); u32 NumBitsPerBlock(DataFormat format);
u32 ComponentBits(DataFormat format, u32 comp);
s32 ComponentOffset(DataFormat format, u32 comp);
} // namespace AmdGpu } // namespace AmdGpu

View File

@ -554,6 +554,61 @@ struct PM4DmaData {
} }
}; };
enum class CopyDataSrc : u32 {
MappedRegister = 0,
Memory = 1,
TCL2 = 2,
Gds = 3,
// Reserved = 4,
Immediate = 5,
Atomic = 6,
GdsAtomic0 = 7,
GdsAtomic1 = 8,
GpuClock = 9,
};
enum class CopyDataDst : u32 {
MappedRegister = 0,
MemorySync = 1,
TCL2 = 2,
Gds = 3,
// Reserved = 4,
MemoryAsync = 5,
};
enum class CopyDataEngine : u32 {
Me = 0,
Pfp = 1,
Ce = 2,
// Reserved = 3
};
struct PM4CmdCopyData {
PM4Type3Header header;
union {
BitField<0, 4, CopyDataSrc> src_sel;
BitField<8, 4, CopyDataDst> dst_sel;
BitField<16, 1, u32> count_sel;
BitField<20, 1, u32> wr_confirm;
BitField<30, 2, CopyDataEngine> engine_sel;
u32 control;
};
u32 src_addr_lo;
u32 src_addr_hi;
u32 dst_addr_lo;
u32 dst_addr_hi;
template <typename T>
T SrcAddress() const {
return std::bit_cast<T>(src_addr_lo | u64(src_addr_hi) << 32);
}
template <typename T>
T DstAddress() const {
return std::bit_cast<T>(dst_addr_lo | u64(dst_addr_hi) << 32);
}
};
struct PM4CmdRewind { struct PM4CmdRewind {
PM4Type3Header header; PM4Type3Header header;
union { union {
@ -1104,4 +1159,25 @@ struct PM4CmdMemSemaphore {
} }
}; };
struct PM4CmdCondExec {
PM4Type3Header header;
union {
BitField<2, 30, u32> bool_addr_lo; ///< low 32 address bits for the block in memory from
///< where the CP will fetch the condition
};
union {
BitField<0, 16, u32> bool_addr_hi; ///< high address bits for the condition
BitField<28, 4, u32> command;
};
union {
BitField<0, 14, u32> exec_count; ///< Number of DWords that the CP will skip
///< if bool pointed to is zero
};
bool* Address() const {
return std::bit_cast<bool*>(u64(bool_addr_hi.Value()) << 32 | u64(bool_addr_lo.Value())
<< 2);
}
};
} // namespace AmdGpu } // namespace AmdGpu

View File

@ -6,7 +6,6 @@
#include "common/alignment.h" #include "common/alignment.h"
#include "common/assert.h" #include "common/assert.h"
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/types.h"
#include "video_core/amdgpu/pixel_format.h" #include "video_core/amdgpu/pixel_format.h"
namespace AmdGpu { namespace AmdGpu {

View File

@ -5,7 +5,6 @@
#include <string_view> #include <string_view>
#include <fmt/format.h> #include <fmt/format.h>
#include "common/assert.h"
#include "common/types.h" #include "common/types.h"
namespace AmdGpu { namespace AmdGpu {
@ -114,281 +113,6 @@ enum class GsOutputPrimitiveType : u32 {
TriangleStrip = 2, TriangleStrip = 2,
}; };
// Table 8.13 Data and Image Formats [Sea Islands Series Instruction Set Architecture]
enum class DataFormat : u32 {
FormatInvalid = 0,
Format8 = 1,
Format16 = 2,
Format8_8 = 3,
Format32 = 4,
Format16_16 = 5,
Format10_11_11 = 6,
Format11_11_10 = 7,
Format10_10_10_2 = 8,
Format2_10_10_10 = 9,
Format8_8_8_8 = 10,
Format32_32 = 11,
Format16_16_16_16 = 12,
Format32_32_32 = 13,
Format32_32_32_32 = 14,
Format5_6_5 = 16,
Format1_5_5_5 = 17,
Format5_5_5_1 = 18,
Format4_4_4_4 = 19,
Format8_24 = 20,
Format24_8 = 21,
FormatX24_8_32 = 22,
FormatGB_GR = 32,
FormatBG_RG = 33,
Format5_9_9_9 = 34,
FormatBc1 = 35,
FormatBc2 = 36,
FormatBc3 = 37,
FormatBc4 = 38,
FormatBc5 = 39,
FormatBc6 = 40,
FormatBc7 = 41,
FormatFmask8_1 = 47,
FormatFmask8_2 = 48,
FormatFmask8_4 = 49,
FormatFmask16_1 = 50,
FormatFmask16_2 = 51,
FormatFmask32_2 = 52,
FormatFmask32_4 = 53,
FormatFmask32_8 = 54,
FormatFmask64_4 = 55,
FormatFmask64_8 = 56,
Format4_4 = 57,
Format6_5_5 = 58,
Format1 = 59,
Format1_Reversed = 60,
Format32_As_8 = 61,
Format32_As_8_8 = 62,
Format32_As_32_32_32_32 = 63,
};
enum class NumberFormat : u32 {
Unorm = 0,
Snorm = 1,
Uscaled = 2,
Sscaled = 3,
Uint = 4,
Sint = 5,
SnormNz = 6,
Float = 7,
Srgb = 9,
Ubnorm = 10,
UbnormNz = 11,
Ubint = 12,
Ubscaled = 13,
};
enum class CompSwizzle : u8 {
Zero = 0,
One = 1,
Red = 4,
Green = 5,
Blue = 6,
Alpha = 7,
};
enum class NumberConversion : u32 {
None = 0,
UintToUscaled = 1,
SintToSscaled = 2,
UnormToUbnorm = 3,
Sint8ToSnormNz = 4,
Sint16ToSnormNz = 5,
Uint32ToUnorm = 6,
};
struct CompMapping {
CompSwizzle r;
CompSwizzle g;
CompSwizzle b;
CompSwizzle a;
auto operator<=>(const CompMapping& other) const = default;
template <typename T>
[[nodiscard]] std::array<T, 4> Apply(const std::array<T, 4>& data) const {
return {
ApplySingle(data, r),
ApplySingle(data, g),
ApplySingle(data, b),
ApplySingle(data, a),
};
}
[[nodiscard]] CompMapping Inverse() const {
CompMapping result{};
InverseSingle(result.r, CompSwizzle::Red);
InverseSingle(result.g, CompSwizzle::Green);
InverseSingle(result.b, CompSwizzle::Blue);
InverseSingle(result.a, CompSwizzle::Alpha);
return result;
}
private:
template <typename T>
T ApplySingle(const std::array<T, 4>& data, const CompSwizzle swizzle) const {
switch (swizzle) {
case CompSwizzle::Zero:
return T(0);
case CompSwizzle::One:
return T(1);
case CompSwizzle::Red:
return data[0];
case CompSwizzle::Green:
return data[1];
case CompSwizzle::Blue:
return data[2];
case CompSwizzle::Alpha:
return data[3];
default:
UNREACHABLE();
}
}
void InverseSingle(CompSwizzle& dst, const CompSwizzle target) const {
if (r == target) {
dst = CompSwizzle::Red;
} else if (g == target) {
dst = CompSwizzle::Green;
} else if (b == target) {
dst = CompSwizzle::Blue;
} else if (a == target) {
dst = CompSwizzle::Alpha;
} else {
dst = CompSwizzle::Zero;
}
}
};
static constexpr CompMapping IdentityMapping = {
.r = CompSwizzle::Red,
.g = CompSwizzle::Green,
.b = CompSwizzle::Blue,
.a = CompSwizzle::Alpha,
};
inline DataFormat RemapDataFormat(const DataFormat format) {
switch (format) {
case DataFormat::Format11_11_10:
return DataFormat::Format10_11_11;
case DataFormat::Format10_10_10_2:
return DataFormat::Format2_10_10_10;
case DataFormat::Format5_5_5_1:
return DataFormat::Format1_5_5_5;
default:
return format;
}
}
inline NumberFormat RemapNumberFormat(const NumberFormat format, const DataFormat data_format) {
switch (format) {
case NumberFormat::Unorm: {
switch (data_format) {
case DataFormat::Format32:
case DataFormat::Format32_32:
case DataFormat::Format32_32_32:
case DataFormat::Format32_32_32_32:
return NumberFormat::Uint;
default:
return format;
}
}
case NumberFormat::Uscaled:
return NumberFormat::Uint;
case NumberFormat::Sscaled:
case NumberFormat::SnormNz:
return NumberFormat::Sint;
case NumberFormat::Ubnorm:
return NumberFormat::Unorm;
case NumberFormat::Float:
if (data_format == DataFormat::Format8) {
// Games may ask for 8-bit float when they want to access the stencil component
// of a depth-stencil image. Change to unsigned int to match the stencil format.
// This is also the closest approximation to pass the bits through unconverted.
return NumberFormat::Uint;
}
[[fallthrough]];
default:
return format;
}
}
inline CompMapping RemapSwizzle(const DataFormat format, const CompMapping swizzle) {
switch (format) {
case DataFormat::Format1_5_5_5:
case DataFormat::Format11_11_10: {
CompMapping result;
result.r = swizzle.b;
result.g = swizzle.g;
result.b = swizzle.r;
result.a = swizzle.a;
return result;
}
case DataFormat::Format10_10_10_2: {
CompMapping result;
result.r = swizzle.a;
result.g = swizzle.b;
result.b = swizzle.g;
result.a = swizzle.r;
return result;
}
case DataFormat::Format4_4_4_4: {
// Remap to a more supported component order.
CompMapping result;
result.r = swizzle.g;
result.g = swizzle.b;
result.b = swizzle.a;
result.a = swizzle.r;
return result;
}
default:
return swizzle;
}
}
inline NumberConversion MapNumberConversion(const NumberFormat num_fmt, const DataFormat data_fmt) {
switch (num_fmt) {
case NumberFormat::Unorm: {
switch (data_fmt) {
case DataFormat::Format32:
case DataFormat::Format32_32:
case DataFormat::Format32_32_32:
case DataFormat::Format32_32_32_32:
return NumberConversion::Uint32ToUnorm;
default:
return NumberConversion::None;
}
}
case NumberFormat::Uscaled:
return NumberConversion::UintToUscaled;
case NumberFormat::Sscaled:
return NumberConversion::SintToSscaled;
case NumberFormat::Ubnorm:
return NumberConversion::UnormToUbnorm;
case NumberFormat::SnormNz: {
switch (data_fmt) {
case DataFormat::Format8:
case DataFormat::Format8_8:
case DataFormat::Format8_8_8_8:
return NumberConversion::Sint8ToSnormNz;
case DataFormat::Format16:
case DataFormat::Format16_16:
case DataFormat::Format16_16_16_16:
return NumberConversion::Sint16ToSnormNz;
default:
UNREACHABLE_MSG("data_fmt = {}", u32(data_fmt));
}
}
default:
return NumberConversion::None;
}
}
} // namespace AmdGpu } // namespace AmdGpu
template <> template <>

View File

@ -6,6 +6,7 @@
#include "common/debug.h" #include "common/debug.h"
#include "common/scope_exit.h" #include "common/scope_exit.h"
#include "common/types.h" #include "common/types.h"
#include "core/memory.h"
#include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/liverpool.h"
#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/host_shaders/fault_buffer_process_comp.h" #include "video_core/host_shaders/fault_buffer_process_comp.h"
@ -28,7 +29,7 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
Vulkan::Rasterizer& rasterizer_, AmdGpu::Liverpool* liverpool_, Vulkan::Rasterizer& rasterizer_, AmdGpu::Liverpool* liverpool_,
TextureCache& texture_cache_, PageManager& tracker_) TextureCache& texture_cache_, PageManager& tracker_)
: instance{instance_}, scheduler{scheduler_}, rasterizer{rasterizer_}, liverpool{liverpool_}, : instance{instance_}, scheduler{scheduler_}, rasterizer{rasterizer_}, liverpool{liverpool_},
texture_cache{texture_cache_}, tracker{tracker_}, memory{Core::Memory::Instance()}, texture_cache{texture_cache_}, tracker{tracker_},
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize}, staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize}, stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
download_buffer(instance, scheduler, MemoryUsage::Download, DownloadBufferSize), download_buffer(instance, scheduler, MemoryUsage::Download, DownloadBufferSize),
@ -293,7 +294,7 @@ void BufferCache::BindIndexBuffer(u32 index_offset) {
void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) { void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned"); ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
if (!is_gds && !IsRegionRegistered(address, num_bytes)) { if (!is_gds && !IsRegionGpuModified(address, num_bytes)) {
memcpy(std::bit_cast<void*>(address), value, num_bytes); memcpy(std::bit_cast<void*>(address), value, num_bytes);
return; return;
} }
@ -365,7 +366,9 @@ std::pair<Buffer*, u32> BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size,
return ObtainBuffer(gpu_addr, size, false, false); return ObtainBuffer(gpu_addr, size, false, false);
} }
// In all other cases, just do a CPU copy to the staging buffer. // In all other cases, just do a CPU copy to the staging buffer.
const u32 offset = staging_buffer.Copy(gpu_addr, size, 16); const auto [data, offset] = staging_buffer.Map(size, 16);
memory->CopySparseMemory(gpu_addr, data, size);
staging_buffer.Commit();
return {&staging_buffer, offset}; return {&staging_buffer, offset};
} }
@ -798,24 +801,45 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
} }
bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) { bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) {
static constexpr FindFlags find_flags = boost::container::small_vector<ImageId, 6> image_ids;
FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize; texture_cache.ForEachImageInRegion(device_addr, size, [&](ImageId image_id, Image& image) {
TextureCache::BaseDesc desc{}; if (image.info.guest_address != device_addr) {
desc.info.guest_address = device_addr; return;
desc.info.guest_size = size; }
const ImageId image_id = texture_cache.FindImage(desc, find_flags); // Only perform sync if image is:
if (!image_id) { // - GPU modified; otherwise there are no changes to synchronize.
// - Not CPU dirty; otherwise we could overwrite CPU changes with stale GPU changes.
// - Not GPU dirty; otherwise we could overwrite GPU changes with stale image data.
if (False(image.flags & ImageFlagBits::GpuModified) ||
True(image.flags & ImageFlagBits::Dirty)) {
return;
}
image_ids.push_back(image_id);
});
if (image_ids.empty()) {
return false; return false;
} }
ImageId image_id{};
if (image_ids.size() == 1) {
// Sometimes image size might not exactly match with requested buffer size
// If we only found 1 candidate image use it without too many questions.
image_id = image_ids[0];
} else {
for (s32 i = 0; i < image_ids.size(); ++i) {
Image& image = texture_cache.GetImage(image_ids[i]);
if (image.info.guest_size == size) {
image_id = image_ids[i];
break;
}
}
if (!image_id) {
LOG_WARNING(Render_Vulkan,
"Failed to find exact image match for copy addr={:#x}, size={:#x}",
device_addr, size);
return false;
}
}
Image& image = texture_cache.GetImage(image_id); Image& image = texture_cache.GetImage(image_id);
// Only perform sync if image is:
// - GPU modified; otherwise there are no changes to synchronize.
// - Not CPU dirty; otherwise we could overwrite CPU changes with stale GPU changes.
// - Not GPU dirty; otherwise we could overwrite GPU changes with stale image data.
if (False(image.flags & ImageFlagBits::GpuModified) ||
True(image.flags & ImageFlagBits::Dirty)) {
return false;
}
ASSERT_MSG(device_addr == image.info.guest_address, ASSERT_MSG(device_addr == image.info.guest_address,
"Texel buffer aliases image subresources {:x} : {:x}", device_addr, "Texel buffer aliases image subresources {:x} : {:x}", device_addr,
image.info.guest_address); image.info.guest_address);

View File

@ -17,6 +17,10 @@ namespace AmdGpu {
struct Liverpool; struct Liverpool;
} }
namespace Core {
class MemoryManager;
}
namespace Shader { namespace Shader {
namespace Gcn { namespace Gcn {
struct FetchShaderData; struct FetchShaderData;
@ -183,6 +187,7 @@ private:
Vulkan::Scheduler& scheduler; Vulkan::Scheduler& scheduler;
Vulkan::Rasterizer& rasterizer; Vulkan::Rasterizer& rasterizer;
AmdGpu::Liverpool* liverpool; AmdGpu::Liverpool* liverpool;
Core::MemoryManager* memory;
TextureCache& texture_cache; TextureCache& texture_cache;
PageManager& tracker; PageManager& tracker;
StreamBuffer staging_buffer; StreamBuffer staging_buffer;

View File

@ -16,7 +16,7 @@ layout(push_constant) uniform image_info {
uint num_levels; uint num_levels;
uint pitch; uint pitch;
uint height; uint height;
uint sizes[14]; uint sizes[16];
} info; } info;
// Inverse morton LUT, small enough to fit into K$ // Inverse morton LUT, small enough to fit into K$

View File

@ -18,7 +18,7 @@ layout(push_constant) uniform image_info {
uint num_levels; uint num_levels;
uint pitch; uint pitch;
uint height; uint height;
uint sizes[14]; uint sizes[16];
} info; } info;
#define MICRO_TILE_DIM 8 #define MICRO_TILE_DIM 8

View File

@ -16,7 +16,7 @@ layout(push_constant) uniform image_info {
uint num_levels; uint num_levels;
uint pitch; uint pitch;
uint height; uint height;
uint sizes[14]; uint sizes[16];
} info; } info;
// Inverse morton LUT, small enough to fit into K$ // Inverse morton LUT, small enough to fit into K$

View File

@ -16,7 +16,7 @@ layout(push_constant) uniform image_info {
uint num_levels; uint num_levels;
uint pitch; uint pitch;
uint height; uint height;
uint sizes[14]; uint sizes[16];
} info; } info;
// Inverse morton LUT, small enough to fit into K$ // Inverse morton LUT, small enough to fit into K$

View File

@ -19,7 +19,7 @@ layout(push_constant) uniform image_info {
uint num_levels; uint num_levels;
uint pitch; uint pitch;
uint height; uint height;
uint sizes[14]; uint sizes[16];
} info; } info;
#define MICRO_TILE_DIM 8 #define MICRO_TILE_DIM 8

View File

@ -214,6 +214,19 @@ vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor) {
} }
} }
bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor) {
using BlendFactor = Liverpool::BlendControl::BlendFactor;
switch (factor) {
case BlendFactor::Src1Color:
case BlendFactor::Src1Alpha:
case BlendFactor::InvSrc1Color:
case BlendFactor::InvSrc1Alpha:
return true;
default:
return false;
}
}
vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) { vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) {
using BlendFunc = Liverpool::BlendControl::BlendFunc; using BlendFunc = Liverpool::BlendControl::BlendFunc;
switch (func) { switch (func) {

View File

@ -30,6 +30,8 @@ vk::FrontFace FrontFace(Liverpool::FrontFace mode);
vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor); vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor);
bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor);
vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func); vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func);
vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode); vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode);

View File

@ -212,7 +212,8 @@ bool Instance::CreateDevice() {
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT, vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT,
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT, vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
vk::PhysicalDevicePortabilitySubsetFeaturesKHR, vk::PhysicalDevicePortabilitySubsetFeaturesKHR,
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>(); vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT,
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR>();
features = feature_chain.get().features; features = feature_chain.get().features;
const vk::StructureChain properties_chain = physical_device.getProperties2< const vk::StructureChain properties_chain = physical_device.getProperties2<
@ -283,6 +284,20 @@ bool Instance::CreateDevice() {
LOG_INFO(Render_Vulkan, "- shaderImageFloat32AtomicMinMax: {}", LOG_INFO(Render_Vulkan, "- shaderImageFloat32AtomicMinMax: {}",
shader_atomic_float2_features.shaderImageFloat32AtomicMinMax); shader_atomic_float2_features.shaderImageFloat32AtomicMinMax);
} }
workgroup_memory_explicit_layout =
add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
if (workgroup_memory_explicit_layout) {
workgroup_memory_explicit_layout_features =
feature_chain.get<vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR>();
LOG_INFO(Render_Vulkan, "- workgroupMemoryExplicitLayout: {}",
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout);
LOG_INFO(Render_Vulkan, "- workgroupMemoryExplicitLayoutScalarBlockLayout: {}",
workgroup_memory_explicit_layout_features
.workgroupMemoryExplicitLayoutScalarBlockLayout);
LOG_INFO(
Render_Vulkan, "- workgroupMemoryExplicitLayout16BitAccess: {}",
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess);
}
const bool calibrated_timestamps = const bool calibrated_timestamps =
TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false; TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false;
@ -420,6 +435,15 @@ bool Instance::CreateDevice() {
.shaderImageFloat32AtomicMinMax = .shaderImageFloat32AtomicMinMax =
shader_atomic_float2_features.shaderImageFloat32AtomicMinMax, shader_atomic_float2_features.shaderImageFloat32AtomicMinMax,
}, },
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR{
.workgroupMemoryExplicitLayout =
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout,
.workgroupMemoryExplicitLayoutScalarBlockLayout =
workgroup_memory_explicit_layout_features
.workgroupMemoryExplicitLayoutScalarBlockLayout,
.workgroupMemoryExplicitLayout16BitAccess =
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess,
},
#ifdef __APPLE__ #ifdef __APPLE__
portability_features, portability_features,
#endif #endif
@ -452,6 +476,9 @@ bool Instance::CreateDevice() {
if (!shader_atomic_float2) { if (!shader_atomic_float2) {
device_chain.unlink<vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>(); device_chain.unlink<vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
} }
if (!workgroup_memory_explicit_layout) {
device_chain.unlink<vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR>();
}
auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get()); auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get());
if (device_result != vk::Result::eSuccess) { if (device_result != vk::Result::eSuccess) {

View File

@ -171,6 +171,12 @@ public:
return shader_atomic_float2 && shader_atomic_float2_features.shaderImageFloat32AtomicMinMax; return shader_atomic_float2 && shader_atomic_float2_features.shaderImageFloat32AtomicMinMax;
} }
/// Returns true when VK_KHR_workgroup_memory_explicit_layout is supported.
bool IsWorkgroupMemoryExplicitLayoutSupported() const {
return workgroup_memory_explicit_layout &&
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess;
}
/// Returns true when geometry shaders are supported by the device /// Returns true when geometry shaders are supported by the device
bool IsGeometryStageSupported() const { bool IsGeometryStageSupported() const {
return features.geometryShader; return features.geometryShader;
@ -349,6 +355,8 @@ private:
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3_features; vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3_features;
vk::PhysicalDeviceRobustness2FeaturesEXT robustness2_features; vk::PhysicalDeviceRobustness2FeaturesEXT robustness2_features;
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT shader_atomic_float2_features; vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT shader_atomic_float2_features;
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR
workgroup_memory_explicit_layout_features;
vk::DriverIdKHR driver_id; vk::DriverIdKHR driver_id;
vk::UniqueDebugUtilsMessengerEXT debug_callback{}; vk::UniqueDebugUtilsMessengerEXT debug_callback{};
std::string vendor_name; std::string vendor_name;
@ -374,6 +382,7 @@ private:
bool amd_gcn_shader{}; bool amd_gcn_shader{};
bool amd_shader_trinary_minmax{}; bool amd_shader_trinary_minmax{};
bool shader_atomic_float2{}; bool shader_atomic_float2{};
bool workgroup_memory_explicit_layout{};
bool portability_subset{}; bool portability_subset{};
}; };

View File

@ -146,6 +146,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
} }
gs_info.in_vertex_data_size = regs.vgt_esgs_ring_itemsize; gs_info.in_vertex_data_size = regs.vgt_esgs_ring_itemsize;
gs_info.out_vertex_data_size = regs.vgt_gs_vert_itemsize[0]; gs_info.out_vertex_data_size = regs.vgt_gs_vert_itemsize[0];
gs_info.mode = regs.vgt_gs_mode.mode;
const auto params_vc = Liverpool::GetParams(regs.vs_program); const auto params_vc = Liverpool::GetParams(regs.vs_program);
gs_info.vs_copy = params_vc.code; gs_info.vs_copy = params_vc.code;
gs_info.vs_copy_hash = params_vc.hash; gs_info.vs_copy_hash = params_vc.hash;
@ -158,6 +159,15 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
info.fs_info.addr_flags = regs.ps_input_addr; info.fs_info.addr_flags = regs.ps_input_addr;
const auto& ps_inputs = regs.ps_inputs; const auto& ps_inputs = regs.ps_inputs;
info.fs_info.num_inputs = regs.num_interp; info.fs_info.num_inputs = regs.num_interp;
const auto& cb0_blend = regs.blend_control[0];
info.fs_info.dual_source_blending =
LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.color_dst_factor) ||
LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.color_src_factor);
if (cb0_blend.separate_alpha_blend) {
info.fs_info.dual_source_blending |=
LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.alpha_dst_factor) ||
LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.alpha_src_factor);
}
for (u32 i = 0; i < regs.num_interp; i++) { for (u32 i = 0; i < regs.num_interp; i++) {
info.fs_info.inputs[i] = { info.fs_info.inputs[i] = {
.param_index = u8(ps_inputs[i].input_offset.Value()), .param_index = u8(ps_inputs[i].input_offset.Value()),
@ -200,7 +210,6 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
.support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32), .support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32),
.support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32), .support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32),
.support_fp32_round_to_zero = bool(vk12_props.shaderRoundingModeRTZFloat32), .support_fp32_round_to_zero = bool(vk12_props.shaderRoundingModeRTZFloat32),
.support_explicit_workgroup_layout = true,
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(), .support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
.supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(), .supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(),
.supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(), .supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(),
@ -208,6 +217,8 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
// TODO: Emitted bounds checks cause problems with phi control flow; needs to be fixed. // TODO: Emitted bounds checks cause problems with phi control flow; needs to be fixed.
.supports_robust_buffer_access = true, // instance_.IsRobustBufferAccess2Supported(), .supports_robust_buffer_access = true, // instance_.IsRobustBufferAccess2Supported(),
.supports_image_fp32_atomic_min_max = instance_.IsShaderAtomicFloatImage32MinMaxSupported(), .supports_image_fp32_atomic_min_max = instance_.IsShaderAtomicFloatImage32MinMaxSupported(),
.supports_workgroup_explicit_memory_layout =
instance_.IsWorkgroupMemoryExplicitLayoutSupported(),
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() && .needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary || .needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||

View File

@ -0,0 +1,220 @@
// SPDX-License-Identifier: GPL-2.0-or-later
// Copyright © 2023 Skyline Team and Contributors (https://github.com/skyline-emu/)
// Copyright © 2015-2023 The Khronos Group Inc.
// Copyright © 2015-2023 Valve Corporation
// Copyright © 2015-2023 LunarG, Inc.
#include <unordered_map>
#include "common/enum.h"
#include "video_core/texture_cache/host_compatibility.h"
namespace VideoCore {
/**
* @brief All classes of format compatibility according to the Vulkan specification
* @url
* https://github.com/KhronosGroup/Vulkan-ValidationLayers/blob/d37c676f/layers/generated/vk_format_utils.h#L47-L131
*/
enum class CompatibilityClass {
NONE = 0,
_128BIT = 1 << 0,
_16BIT = 1 << 1,
_192BIT = 1 << 2,
_24BIT = 1 << 3,
_256BIT = 1 << 4,
_32BIT = 1 << 5,
_48BIT = 1 << 6,
_64BIT = 1 << 7,
_8BIT = 1 << 8,
_96BIT = 1 << 9,
BC1_RGB = 1 << 10,
BC1_RGBA = 1 << 11,
BC2 = 1 << 12,
BC3 = 1 << 13,
BC4 = 1 << 14,
BC5 = 1 << 15,
BC6H = 1 << 16,
BC7 = 1 << 17,
D16 = 1 << 18,
D16S8 = 1 << 19,
D24 = 1 << 20,
D24S8 = 1 << 21,
D32 = 1 << 22,
D32S8 = 1 << 23,
S8 = 1 << 24,
};
DECLARE_ENUM_FLAG_OPERATORS(CompatibilityClass)
/**
* @brief The format compatibility class according to the Vulkan specification
* @url
* https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#formats-compatibility-classes
* @url
* https://github.com/KhronosGroup/Vulkan-ValidationLayers/blob/d37c676f/layers/generated/vk_format_utils.cpp#L70-L812
*/
static const std::unordered_map<vk::Format, CompatibilityClass> FORMAT_TABLE = {
{vk::Format::eA1R5G5B5UnormPack16, CompatibilityClass::_16BIT},
{vk::Format::eA2B10G10R10SintPack32, CompatibilityClass::_32BIT},
{vk::Format::eA2B10G10R10SnormPack32, CompatibilityClass::_32BIT},
{vk::Format::eA2B10G10R10SscaledPack32, CompatibilityClass::_32BIT},
{vk::Format::eA2B10G10R10UintPack32, CompatibilityClass::_32BIT},
{vk::Format::eA2B10G10R10UnormPack32, CompatibilityClass::_32BIT},
{vk::Format::eA2B10G10R10UscaledPack32, CompatibilityClass::_32BIT},
{vk::Format::eA2R10G10B10SintPack32, CompatibilityClass::_32BIT},
{vk::Format::eA2R10G10B10SnormPack32, CompatibilityClass::_32BIT},
{vk::Format::eA2R10G10B10SscaledPack32, CompatibilityClass::_32BIT},
{vk::Format::eA2R10G10B10UintPack32, CompatibilityClass::_32BIT},
{vk::Format::eA2R10G10B10UnormPack32, CompatibilityClass::_32BIT},
{vk::Format::eA2R10G10B10UscaledPack32, CompatibilityClass::_32BIT},
{vk::Format::eA4B4G4R4UnormPack16, CompatibilityClass::_16BIT},
{vk::Format::eA4R4G4B4UnormPack16, CompatibilityClass::_16BIT},
{vk::Format::eA8B8G8R8SintPack32, CompatibilityClass::_32BIT},
{vk::Format::eA8B8G8R8SnormPack32, CompatibilityClass::_32BIT},
{vk::Format::eA8B8G8R8SrgbPack32, CompatibilityClass::_32BIT},
{vk::Format::eA8B8G8R8SscaledPack32, CompatibilityClass::_32BIT},
{vk::Format::eA8B8G8R8UintPack32, CompatibilityClass::_32BIT},
{vk::Format::eA8B8G8R8UnormPack32, CompatibilityClass::_32BIT},
{vk::Format::eA8B8G8R8UscaledPack32, CompatibilityClass::_32BIT},
{vk::Format::eB10G11R11UfloatPack32, CompatibilityClass::_32BIT},
{vk::Format::eB4G4R4A4UnormPack16, CompatibilityClass::_16BIT},
{vk::Format::eB5G5R5A1UnormPack16, CompatibilityClass::_16BIT},
{vk::Format::eB5G6R5UnormPack16, CompatibilityClass::_16BIT},
{vk::Format::eB8G8R8A8Sint, CompatibilityClass::_32BIT},
{vk::Format::eB8G8R8A8Snorm, CompatibilityClass::_32BIT},
{vk::Format::eB8G8R8A8Srgb, CompatibilityClass::_32BIT},
{vk::Format::eB8G8R8A8Sscaled, CompatibilityClass::_32BIT},
{vk::Format::eB8G8R8A8Uint, CompatibilityClass::_32BIT},
{vk::Format::eB8G8R8A8Unorm, CompatibilityClass::_32BIT},
{vk::Format::eB8G8R8A8Uscaled, CompatibilityClass::_32BIT},
{vk::Format::eB8G8R8Sint, CompatibilityClass::_24BIT},
{vk::Format::eB8G8R8Snorm, CompatibilityClass::_24BIT},
{vk::Format::eB8G8R8Srgb, CompatibilityClass::_24BIT},
{vk::Format::eB8G8R8Sscaled, CompatibilityClass::_24BIT},
{vk::Format::eB8G8R8Uint, CompatibilityClass::_24BIT},
{vk::Format::eB8G8R8Unorm, CompatibilityClass::_24BIT},
{vk::Format::eB8G8R8Uscaled, CompatibilityClass::_24BIT},
{vk::Format::eBc1RgbaSrgbBlock, CompatibilityClass::BC1_RGBA | CompatibilityClass::_64BIT},
{vk::Format::eBc1RgbaUnormBlock, CompatibilityClass::BC1_RGBA | CompatibilityClass::_64BIT},
{vk::Format::eBc1RgbSrgbBlock, CompatibilityClass::BC1_RGB | CompatibilityClass::_64BIT},
{vk::Format::eBc1RgbUnormBlock, CompatibilityClass::BC1_RGB | CompatibilityClass::_64BIT},
{vk::Format::eBc2SrgbBlock, CompatibilityClass::BC2 | CompatibilityClass::_128BIT},
{vk::Format::eBc2UnormBlock, CompatibilityClass::BC2 | CompatibilityClass::_128BIT},
{vk::Format::eBc3SrgbBlock, CompatibilityClass::BC3 | CompatibilityClass::_128BIT},
{vk::Format::eBc3UnormBlock, CompatibilityClass::BC3 | CompatibilityClass::_128BIT},
{vk::Format::eBc4SnormBlock, CompatibilityClass::BC4 | CompatibilityClass::_64BIT},
{vk::Format::eBc4UnormBlock, CompatibilityClass::BC4 | CompatibilityClass::_64BIT},
{vk::Format::eBc5SnormBlock, CompatibilityClass::BC5 | CompatibilityClass::_128BIT},
{vk::Format::eBc5UnormBlock, CompatibilityClass::BC5 | CompatibilityClass::_128BIT},
{vk::Format::eBc6HSfloatBlock, CompatibilityClass::BC6H | CompatibilityClass::_128BIT},
{vk::Format::eBc6HUfloatBlock, CompatibilityClass::BC6H | CompatibilityClass::_128BIT},
{vk::Format::eBc7SrgbBlock, CompatibilityClass::BC7 | CompatibilityClass::_128BIT},
{vk::Format::eBc7UnormBlock, CompatibilityClass::BC7 | CompatibilityClass::_128BIT},
{vk::Format::eD16Unorm, CompatibilityClass::D16},
{vk::Format::eD16UnormS8Uint, CompatibilityClass::D16S8},
{vk::Format::eD24UnormS8Uint, CompatibilityClass::D24S8},
{vk::Format::eD32Sfloat, CompatibilityClass::D32},
{vk::Format::eD32SfloatS8Uint, CompatibilityClass::D32S8},
{vk::Format::eE5B9G9R9UfloatPack32, CompatibilityClass::_32BIT},
{vk::Format::eR10X6G10X6Unorm2Pack16, CompatibilityClass::_32BIT},
{vk::Format::eR10X6UnormPack16, CompatibilityClass::_16BIT},
{vk::Format::eR12X4G12X4Unorm2Pack16, CompatibilityClass::_32BIT},
{vk::Format::eR12X4UnormPack16, CompatibilityClass::_16BIT},
{vk::Format::eR16G16B16A16Sfloat, CompatibilityClass::_64BIT},
{vk::Format::eR16G16B16A16Sint, CompatibilityClass::_64BIT},
{vk::Format::eR16G16B16A16Snorm, CompatibilityClass::_64BIT},
{vk::Format::eR16G16B16A16Sscaled, CompatibilityClass::_64BIT},
{vk::Format::eR16G16B16A16Uint, CompatibilityClass::_64BIT},
{vk::Format::eR16G16B16A16Unorm, CompatibilityClass::_64BIT},
{vk::Format::eR16G16B16A16Uscaled, CompatibilityClass::_64BIT},
{vk::Format::eR16G16B16Sfloat, CompatibilityClass::_48BIT},
{vk::Format::eR16G16B16Sint, CompatibilityClass::_48BIT},
{vk::Format::eR16G16B16Snorm, CompatibilityClass::_48BIT},
{vk::Format::eR16G16B16Sscaled, CompatibilityClass::_48BIT},
{vk::Format::eR16G16B16Uint, CompatibilityClass::_48BIT},
{vk::Format::eR16G16B16Unorm, CompatibilityClass::_48BIT},
{vk::Format::eR16G16B16Uscaled, CompatibilityClass::_48BIT},
{vk::Format::eR16G16Sfloat, CompatibilityClass::_32BIT},
{vk::Format::eR16G16Sint, CompatibilityClass::_32BIT},
{vk::Format::eR16G16Snorm, CompatibilityClass::_32BIT},
{vk::Format::eR16G16Sscaled, CompatibilityClass::_32BIT},
{vk::Format::eR16G16Uint, CompatibilityClass::_32BIT},
{vk::Format::eR16G16Unorm, CompatibilityClass::_32BIT},
{vk::Format::eR16G16Uscaled, CompatibilityClass::_32BIT},
{vk::Format::eR16Sfloat, CompatibilityClass::_16BIT},
{vk::Format::eR16Sint, CompatibilityClass::_16BIT},
{vk::Format::eR16Snorm, CompatibilityClass::_16BIT},
{vk::Format::eR16Sscaled, CompatibilityClass::_16BIT},
{vk::Format::eR16Uint, CompatibilityClass::_16BIT},
{vk::Format::eR16Unorm, CompatibilityClass::_16BIT},
{vk::Format::eR16Uscaled, CompatibilityClass::_16BIT},
{vk::Format::eR32G32B32A32Sfloat, CompatibilityClass::_128BIT},
{vk::Format::eR32G32B32A32Sint, CompatibilityClass::_128BIT},
{vk::Format::eR32G32B32A32Uint, CompatibilityClass::_128BIT},
{vk::Format::eR32G32B32Sfloat, CompatibilityClass::_96BIT},
{vk::Format::eR32G32B32Sint, CompatibilityClass::_96BIT},
{vk::Format::eR32G32B32Uint, CompatibilityClass::_96BIT},
{vk::Format::eR32G32Sfloat, CompatibilityClass::_64BIT},
{vk::Format::eR32G32Sint, CompatibilityClass::_64BIT},
{vk::Format::eR32G32Uint, CompatibilityClass::_64BIT},
{vk::Format::eR32Sfloat, CompatibilityClass::_32BIT},
{vk::Format::eR32Sint, CompatibilityClass::_32BIT},
{vk::Format::eR32Uint, CompatibilityClass::_32BIT},
{vk::Format::eR4G4B4A4UnormPack16, CompatibilityClass::_16BIT},
{vk::Format::eR4G4UnormPack8, CompatibilityClass::_8BIT},
{vk::Format::eR5G5B5A1UnormPack16, CompatibilityClass::_16BIT},
{vk::Format::eR5G6B5UnormPack16, CompatibilityClass::_16BIT},
{vk::Format::eR64G64B64A64Sfloat, CompatibilityClass::_256BIT},
{vk::Format::eR64G64B64A64Sint, CompatibilityClass::_256BIT},
{vk::Format::eR64G64B64A64Uint, CompatibilityClass::_256BIT},
{vk::Format::eR64G64B64Sfloat, CompatibilityClass::_192BIT},
{vk::Format::eR64G64B64Sint, CompatibilityClass::_192BIT},
{vk::Format::eR64G64B64Uint, CompatibilityClass::_192BIT},
{vk::Format::eR64G64Sfloat, CompatibilityClass::_128BIT},
{vk::Format::eR64G64Sint, CompatibilityClass::_128BIT},
{vk::Format::eR64G64Uint, CompatibilityClass::_128BIT},
{vk::Format::eR64Sfloat, CompatibilityClass::_64BIT},
{vk::Format::eR64Sint, CompatibilityClass::_64BIT},
{vk::Format::eR64Uint, CompatibilityClass::_64BIT},
{vk::Format::eR8G8B8A8Sint, CompatibilityClass::_32BIT},
{vk::Format::eR8G8B8A8Snorm, CompatibilityClass::_32BIT},
{vk::Format::eR8G8B8A8Srgb, CompatibilityClass::_32BIT},
{vk::Format::eR8G8B8A8Sscaled, CompatibilityClass::_32BIT},
{vk::Format::eR8G8B8A8Uint, CompatibilityClass::_32BIT},
{vk::Format::eR8G8B8A8Unorm, CompatibilityClass::_32BIT},
{vk::Format::eR8G8B8A8Uscaled, CompatibilityClass::_32BIT},
{vk::Format::eR8G8B8Sint, CompatibilityClass::_24BIT},
{vk::Format::eR8G8B8Snorm, CompatibilityClass::_24BIT},
{vk::Format::eR8G8B8Srgb, CompatibilityClass::_24BIT},
{vk::Format::eR8G8B8Sscaled, CompatibilityClass::_24BIT},
{vk::Format::eR8G8B8Uint, CompatibilityClass::_24BIT},
{vk::Format::eR8G8B8Unorm, CompatibilityClass::_24BIT},
{vk::Format::eR8G8B8Uscaled, CompatibilityClass::_24BIT},
{vk::Format::eR8G8Sint, CompatibilityClass::_16BIT},
{vk::Format::eR8G8Snorm, CompatibilityClass::_16BIT},
{vk::Format::eR8G8Srgb, CompatibilityClass::_16BIT},
{vk::Format::eR8G8Sscaled, CompatibilityClass::_16BIT},
{vk::Format::eR8G8Uint, CompatibilityClass::_16BIT},
{vk::Format::eR8G8Unorm, CompatibilityClass::_16BIT},
{vk::Format::eR8G8Uscaled, CompatibilityClass::_16BIT},
{vk::Format::eR8Sint, CompatibilityClass::_8BIT},
{vk::Format::eR8Snorm, CompatibilityClass::_8BIT},
{vk::Format::eR8Srgb, CompatibilityClass::_8BIT},
{vk::Format::eR8Sscaled, CompatibilityClass::_8BIT},
{vk::Format::eR8Uint, CompatibilityClass::_8BIT},
{vk::Format::eR8Unorm, CompatibilityClass::_8BIT},
{vk::Format::eR8Uscaled, CompatibilityClass::_8BIT},
{vk::Format::eS8Uint, CompatibilityClass::S8},
{vk::Format::eX8D24UnormPack32, CompatibilityClass::D24},
{vk::Format::eUndefined, CompatibilityClass::NONE},
};
bool IsVulkanFormatCompatible(vk::Format base, vk::Format view) {
if (base == view) {
return true;
}
const auto base_comp = FORMAT_TABLE.at(base);
const auto view_comp = FORMAT_TABLE.at(view);
return (base_comp & view_comp) == view_comp;
}
} // namespace VideoCore

View File

@ -6,387 +6,11 @@
#pragma once #pragma once
#include <unordered_map>
#include "video_core/renderer_vulkan/vk_common.h" #include "video_core/renderer_vulkan/vk_common.h"
namespace VideoCore { namespace VideoCore {
/**
* @brief All classes of format compatibility according to the Vulkan specification
* @url
* https://github.com/KhronosGroup/Vulkan-ValidationLayers/blob/d37c676f75f545a3e5a98d7dfb89864391a1db1e/layers/generated/vk_format_utils.h#L47-L131
* @note This is copied directly from Vulkan Validation Layers and doesn't follow the Skyline naming
* conventions
*/
enum class FORMAT_COMPATIBILITY_CLASS {
NONE = 0,
_10BIT_2PLANE_420,
_10BIT_2PLANE_422,
_10BIT_2PLANE_444,
_10BIT_3PLANE_420,
_10BIT_3PLANE_422,
_10BIT_3PLANE_444,
_12BIT_2PLANE_420,
_12BIT_2PLANE_422,
_12BIT_2PLANE_444,
_12BIT_3PLANE_420,
_12BIT_3PLANE_422,
_12BIT_3PLANE_444,
_128BIT,
_16BIT,
_16BIT_2PLANE_420,
_16BIT_2PLANE_422,
_16BIT_2PLANE_444,
_16BIT_3PLANE_420,
_16BIT_3PLANE_422,
_16BIT_3PLANE_444,
_192BIT,
_24BIT,
_256BIT,
_32BIT,
_32BIT_B8G8R8G8,
_32BIT_G8B8G8R8,
_48BIT,
_64BIT,
_64BIT_B10G10R10G10,
_64BIT_B12G12R12G12,
_64BIT_B16G16R16G16,
_64BIT_G10B10G10R10,
_64BIT_G12B12G12R12,
_64BIT_G16B16G16R16,
_64BIT_R10G10B10A10,
_64BIT_R12G12B12A12,
_8BIT,
_8BIT_2PLANE_420,
_8BIT_2PLANE_422,
_8BIT_2PLANE_444,
_8BIT_3PLANE_420,
_8BIT_3PLANE_422,
_8BIT_3PLANE_444,
_96BIT,
ASTC_10X10,
ASTC_10X5,
ASTC_10X6,
ASTC_10X8,
ASTC_12X10,
ASTC_12X12,
ASTC_4X4,
ASTC_5X4,
ASTC_5X5,
ASTC_6X5,
ASTC_6X6,
ASTC_8X5,
ASTC_8X6,
ASTC_8X8,
BC1_RGB,
BC1_RGBA,
BC2,
BC3,
BC4,
BC5,
BC6H,
BC7,
D16,
D16S8,
D24,
D24S8,
D32,
D32S8,
EAC_R,
EAC_RG,
ETC2_EAC_RGBA,
ETC2_RGB,
ETC2_RGBA,
PVRTC1_2BPP,
PVRTC1_4BPP,
PVRTC2_2BPP,
PVRTC2_4BPP,
S8
};
/** /// Returns true if the two formats are compatible according to Vulkan's format compatibility rules
* @brief The format compatibility class according to the Vulkan specification bool IsVulkanFormatCompatible(vk::Format base, vk::Format view);
* @url
* https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#formats-compatibility-classes
* @url
* https://github.com/KhronosGroup/Vulkan-ValidationLayers/blob/d37c676f75f545a3e5a98d7dfb89864391a1db1e/layers/generated/vk_format_utils.cpp#L70-L812
* @note This is copied directly from Vulkan Validation Layers and doesn't follow the Skyline naming
* conventions
*/
static const std::unordered_map<VkFormat, FORMAT_COMPATIBILITY_CLASS> vkFormatClassTable{
{VK_FORMAT_A1R5G5B5_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_A2B10G10R10_SINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_A2B10G10R10_SNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_A2B10G10R10_SSCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_A2B10G10R10_UINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_A2B10G10R10_UNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_A2B10G10R10_USCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_A2R10G10B10_SINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_A2R10G10B10_SNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_A2R10G10B10_SSCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_A2R10G10B10_UINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_A2R10G10B10_UNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_A2R10G10B10_USCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_A8B8G8R8_SINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_A8B8G8R8_SNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_A8B8G8R8_SRGB_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_A8B8G8R8_SSCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_A8B8G8R8_UINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_A8B8G8R8_UNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_A8B8G8R8_USCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_ASTC_10x10_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_10X10},
{VK_FORMAT_ASTC_10x10_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X10},
{VK_FORMAT_ASTC_10x10_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X10},
{VK_FORMAT_ASTC_10x5_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_10X5},
{VK_FORMAT_ASTC_10x5_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X5},
{VK_FORMAT_ASTC_10x5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X5},
{VK_FORMAT_ASTC_10x6_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_10X6},
{VK_FORMAT_ASTC_10x6_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X6},
{VK_FORMAT_ASTC_10x6_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X6},
{VK_FORMAT_ASTC_10x8_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_10X8},
{VK_FORMAT_ASTC_10x8_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X8},
{VK_FORMAT_ASTC_10x8_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X8},
{VK_FORMAT_ASTC_12x10_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_12X10},
{VK_FORMAT_ASTC_12x10_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_12X10},
{VK_FORMAT_ASTC_12x10_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_12X10},
{VK_FORMAT_ASTC_12x12_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_12X12},
{VK_FORMAT_ASTC_12x12_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_12X12},
{VK_FORMAT_ASTC_12x12_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_12X12},
{VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_4X4},
{VK_FORMAT_ASTC_4x4_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_4X4},
{VK_FORMAT_ASTC_4x4_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_4X4},
{VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_5X4},
{VK_FORMAT_ASTC_5x4_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_5X4},
{VK_FORMAT_ASTC_5x4_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_5X4},
{VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_5X5},
{VK_FORMAT_ASTC_5x5_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_5X5},
{VK_FORMAT_ASTC_5x5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_5X5},
{VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_6X5},
{VK_FORMAT_ASTC_6x5_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_6X5},
{VK_FORMAT_ASTC_6x5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_6X5},
{VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_6X6},
{VK_FORMAT_ASTC_6x6_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_6X6},
{VK_FORMAT_ASTC_6x6_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_6X6},
{VK_FORMAT_ASTC_8x5_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_8X5},
{VK_FORMAT_ASTC_8x5_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X5},
{VK_FORMAT_ASTC_8x5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X5},
{VK_FORMAT_ASTC_8x6_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_8X6},
{VK_FORMAT_ASTC_8x6_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X6},
{VK_FORMAT_ASTC_8x6_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X6},
{VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_8X8},
{VK_FORMAT_ASTC_8x8_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X8},
{VK_FORMAT_ASTC_8x8_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X8},
{VK_FORMAT_B10G11R11_UFLOAT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16,
FORMAT_COMPATIBILITY_CLASS::_64BIT_B10G10R10G10},
{VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16,
FORMAT_COMPATIBILITY_CLASS::_64BIT_B12G12R12G12},
{VK_FORMAT_B16G16R16G16_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_64BIT_B16G16R16G16},
{VK_FORMAT_B4G4R4A4_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_B5G5R5A1_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_B5G6R5_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_B8G8R8A8_SINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_B8G8R8A8_SNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_B8G8R8A8_SRGB, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_B8G8R8A8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_B8G8R8A8_UINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_B8G8R8A8_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_B8G8R8A8_USCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_B8G8R8G8_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT_B8G8R8G8},
{VK_FORMAT_B8G8R8_SINT, FORMAT_COMPATIBILITY_CLASS::_24BIT},
{VK_FORMAT_B8G8R8_SNORM, FORMAT_COMPATIBILITY_CLASS::_24BIT},
{VK_FORMAT_B8G8R8_SRGB, FORMAT_COMPATIBILITY_CLASS::_24BIT},
{VK_FORMAT_B8G8R8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_24BIT},
{VK_FORMAT_B8G8R8_UINT, FORMAT_COMPATIBILITY_CLASS::_24BIT},
{VK_FORMAT_B8G8R8_UNORM, FORMAT_COMPATIBILITY_CLASS::_24BIT},
{VK_FORMAT_B8G8R8_USCALED, FORMAT_COMPATIBILITY_CLASS::_24BIT},
{VK_FORMAT_BC1_RGBA_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC1_RGBA},
{VK_FORMAT_BC1_RGBA_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC1_RGBA},
{VK_FORMAT_BC1_RGB_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC1_RGB},
{VK_FORMAT_BC1_RGB_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC1_RGB},
{VK_FORMAT_BC2_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC2},
{VK_FORMAT_BC2_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC2},
{VK_FORMAT_BC3_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC3},
{VK_FORMAT_BC3_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC3},
{VK_FORMAT_BC4_SNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC4},
{VK_FORMAT_BC4_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC4},
{VK_FORMAT_BC5_SNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC5},
{VK_FORMAT_BC5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC5},
{VK_FORMAT_BC6H_SFLOAT_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC6H},
{VK_FORMAT_BC6H_UFLOAT_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC6H},
{VK_FORMAT_BC7_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC7},
{VK_FORMAT_BC7_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC7},
{VK_FORMAT_D16_UNORM, FORMAT_COMPATIBILITY_CLASS::D16},
{VK_FORMAT_D16_UNORM_S8_UINT, FORMAT_COMPATIBILITY_CLASS::D16S8},
{VK_FORMAT_D24_UNORM_S8_UINT, FORMAT_COMPATIBILITY_CLASS::D24S8},
{VK_FORMAT_D32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::D32},
{VK_FORMAT_D32_SFLOAT_S8_UINT, FORMAT_COMPATIBILITY_CLASS::D32S8},
{VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_EAC_R11G11_SNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::EAC_RG},
{VK_FORMAT_EAC_R11G11_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::EAC_RG},
{VK_FORMAT_EAC_R11_SNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::EAC_R},
{VK_FORMAT_EAC_R11_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::EAC_R},
{VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_RGBA},
{VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_RGBA},
{VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_EAC_RGBA},
{VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_EAC_RGBA},
{VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_RGB},
{VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_RGB},
{VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16,
FORMAT_COMPATIBILITY_CLASS::_64BIT_G10B10G10R10},
{VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
FORMAT_COMPATIBILITY_CLASS::_10BIT_2PLANE_420},
{VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16,
FORMAT_COMPATIBILITY_CLASS::_10BIT_2PLANE_422},
{VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT,
FORMAT_COMPATIBILITY_CLASS::_10BIT_2PLANE_444},
{VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16,
FORMAT_COMPATIBILITY_CLASS::_10BIT_3PLANE_420},
{VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16,
FORMAT_COMPATIBILITY_CLASS::_10BIT_3PLANE_422},
{VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16,
FORMAT_COMPATIBILITY_CLASS::_10BIT_3PLANE_444},
{VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16,
FORMAT_COMPATIBILITY_CLASS::_64BIT_G12B12G12R12},
{VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16,
FORMAT_COMPATIBILITY_CLASS::_12BIT_2PLANE_420},
{VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16,
FORMAT_COMPATIBILITY_CLASS::_12BIT_2PLANE_422},
{VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT,
FORMAT_COMPATIBILITY_CLASS::_12BIT_2PLANE_444},
{VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16,
FORMAT_COMPATIBILITY_CLASS::_12BIT_3PLANE_420},
{VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16,
FORMAT_COMPATIBILITY_CLASS::_12BIT_3PLANE_422},
{VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16,
FORMAT_COMPATIBILITY_CLASS::_12BIT_3PLANE_444},
{VK_FORMAT_G16B16G16R16_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_64BIT_G16B16G16R16},
{VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_2PLANE_420},
{VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_2PLANE_422},
{VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT, FORMAT_COMPATIBILITY_CLASS::_16BIT_2PLANE_444},
{VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_3PLANE_420},
{VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_3PLANE_422},
{VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_3PLANE_444},
{VK_FORMAT_G8B8G8R8_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT_G8B8G8R8},
{VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_2PLANE_420},
{VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_2PLANE_422},
{VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT, FORMAT_COMPATIBILITY_CLASS::_8BIT_2PLANE_444},
{VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_3PLANE_420},
{VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_3PLANE_422},
{VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_3PLANE_444},
{VK_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC1_2BPP},
{VK_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC1_2BPP},
{VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC1_4BPP},
{VK_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC1_4BPP},
{VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC2_2BPP},
{VK_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC2_2BPP},
{VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC2_4BPP},
{VK_FORMAT_PVRTC2_4BPP_UNORM_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC2_4BPP},
{VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16, FORMAT_COMPATIBILITY_CLASS::_64BIT_R10G10B10A10},
{VK_FORMAT_R10X6G10X6_UNORM_2PACK16, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_R10X6_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16, FORMAT_COMPATIBILITY_CLASS::_64BIT_R12G12B12A12},
{VK_FORMAT_R12X4G12X4_UNORM_2PACK16, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_R12X4_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_R16G16B16A16_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
{VK_FORMAT_R16G16B16A16_SINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
{VK_FORMAT_R16G16B16A16_SNORM, FORMAT_COMPATIBILITY_CLASS::_64BIT},
{VK_FORMAT_R16G16B16A16_SSCALED, FORMAT_COMPATIBILITY_CLASS::_64BIT},
{VK_FORMAT_R16G16B16A16_UINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
{VK_FORMAT_R16G16B16A16_UNORM, FORMAT_COMPATIBILITY_CLASS::_64BIT},
{VK_FORMAT_R16G16B16A16_USCALED, FORMAT_COMPATIBILITY_CLASS::_64BIT},
{VK_FORMAT_R16G16B16_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_48BIT},
{VK_FORMAT_R16G16B16_SINT, FORMAT_COMPATIBILITY_CLASS::_48BIT},
{VK_FORMAT_R16G16B16_SNORM, FORMAT_COMPATIBILITY_CLASS::_48BIT},
{VK_FORMAT_R16G16B16_SSCALED, FORMAT_COMPATIBILITY_CLASS::_48BIT},
{VK_FORMAT_R16G16B16_UINT, FORMAT_COMPATIBILITY_CLASS::_48BIT},
{VK_FORMAT_R16G16B16_UNORM, FORMAT_COMPATIBILITY_CLASS::_48BIT},
{VK_FORMAT_R16G16B16_USCALED, FORMAT_COMPATIBILITY_CLASS::_48BIT},
{VK_FORMAT_R16G16_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_R16G16_SINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_R16G16_SNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_R16G16_SSCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_R16G16_UINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_R16G16_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_R16G16_USCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_R16_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_R16_SINT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_R16_SNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_R16_SSCALED, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_R16_UINT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_R16_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_R16_USCALED, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_R32G32B32A32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
{VK_FORMAT_R32G32B32A32_SINT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
{VK_FORMAT_R32G32B32A32_UINT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
{VK_FORMAT_R32G32B32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_96BIT},
{VK_FORMAT_R32G32B32_SINT, FORMAT_COMPATIBILITY_CLASS::_96BIT},
{VK_FORMAT_R32G32B32_UINT, FORMAT_COMPATIBILITY_CLASS::_96BIT},
{VK_FORMAT_R32G32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
{VK_FORMAT_R32G32_SINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
{VK_FORMAT_R32G32_UINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
{VK_FORMAT_R32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_R32_SINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_R32_UINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_R4G4B4A4_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_R4G4_UNORM_PACK8, FORMAT_COMPATIBILITY_CLASS::_8BIT},
{VK_FORMAT_R5G5B5A1_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_R5G6B5_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_R64G64B64A64_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_256BIT},
{VK_FORMAT_R64G64B64A64_SINT, FORMAT_COMPATIBILITY_CLASS::_256BIT},
{VK_FORMAT_R64G64B64A64_UINT, FORMAT_COMPATIBILITY_CLASS::_256BIT},
{VK_FORMAT_R64G64B64_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_192BIT},
{VK_FORMAT_R64G64B64_SINT, FORMAT_COMPATIBILITY_CLASS::_192BIT},
{VK_FORMAT_R64G64B64_UINT, FORMAT_COMPATIBILITY_CLASS::_192BIT},
{VK_FORMAT_R64G64_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
{VK_FORMAT_R64G64_SINT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
{VK_FORMAT_R64G64_UINT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
{VK_FORMAT_R64_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
{VK_FORMAT_R64_SINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
{VK_FORMAT_R64_UINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
{VK_FORMAT_R8G8B8A8_SINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_R8G8B8A8_SNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_R8G8B8A8_SRGB, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_R8G8B8A8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_R8G8B8A8_UINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_R8G8B8A8_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_R8G8B8A8_USCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
{VK_FORMAT_R8G8B8_SINT, FORMAT_COMPATIBILITY_CLASS::_24BIT},
{VK_FORMAT_R8G8B8_SNORM, FORMAT_COMPATIBILITY_CLASS::_24BIT},
{VK_FORMAT_R8G8B8_SRGB, FORMAT_COMPATIBILITY_CLASS::_24BIT},
{VK_FORMAT_R8G8B8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_24BIT},
{VK_FORMAT_R8G8B8_UINT, FORMAT_COMPATIBILITY_CLASS::_24BIT},
{VK_FORMAT_R8G8B8_UNORM, FORMAT_COMPATIBILITY_CLASS::_24BIT},
{VK_FORMAT_R8G8B8_USCALED, FORMAT_COMPATIBILITY_CLASS::_24BIT},
{VK_FORMAT_R8G8_SINT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_R8G8_SNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_R8G8_SRGB, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_R8G8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_R8G8_UINT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_R8G8_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_R8G8_USCALED, FORMAT_COMPATIBILITY_CLASS::_16BIT},
{VK_FORMAT_R8_SINT, FORMAT_COMPATIBILITY_CLASS::_8BIT},
{VK_FORMAT_R8_SNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT},
{VK_FORMAT_R8_SRGB, FORMAT_COMPATIBILITY_CLASS::_8BIT},
{VK_FORMAT_R8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_8BIT},
{VK_FORMAT_R8_UINT, FORMAT_COMPATIBILITY_CLASS::_8BIT},
{VK_FORMAT_R8_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT},
{VK_FORMAT_R8_USCALED, FORMAT_COMPATIBILITY_CLASS::_8BIT},
{VK_FORMAT_S8_UINT, FORMAT_COMPATIBILITY_CLASS::S8},
{VK_FORMAT_X8_D24_UNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::D24},
{VK_FORMAT_UNDEFINED, FORMAT_COMPATIBILITY_CLASS::NONE},
};
/**
* @return If the two formats are compatible according to Vulkan's format compatibility rules
* @url
* https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#formats-compatibility
*/
static bool IsVulkanFormatCompatible(vk::Format lhs, vk::Format rhs) {
if (lhs == rhs) {
return true;
}
return vkFormatClassTable.at(VkFormat(lhs)) == vkFormatClassTable.at(VkFormat(rhs));
}
} // namespace VideoCore } // namespace VideoCore

View File

@ -14,62 +14,6 @@ namespace VideoCore {
using namespace Vulkan; using namespace Vulkan;
bool ImageInfo::IsBlockCoded() const {
switch (pixel_format) {
case vk::Format::eBc1RgbaSrgbBlock:
case vk::Format::eBc1RgbaUnormBlock:
case vk::Format::eBc1RgbSrgbBlock:
case vk::Format::eBc1RgbUnormBlock:
case vk::Format::eBc2SrgbBlock:
case vk::Format::eBc2UnormBlock:
case vk::Format::eBc3SrgbBlock:
case vk::Format::eBc3UnormBlock:
case vk::Format::eBc4SnormBlock:
case vk::Format::eBc4UnormBlock:
case vk::Format::eBc5SnormBlock:
case vk::Format::eBc5UnormBlock:
case vk::Format::eBc6HSfloatBlock:
case vk::Format::eBc6HUfloatBlock:
case vk::Format::eBc7SrgbBlock:
case vk::Format::eBc7UnormBlock:
return true;
default:
return false;
}
}
bool ImageInfo::IsPacked() const {
switch (pixel_format) {
case vk::Format::eB5G5R5A1UnormPack16:
[[fallthrough]];
case vk::Format::eB5G6R5UnormPack16:
return true;
default:
return false;
}
}
bool ImageInfo::IsDepthStencil() const {
switch (pixel_format) {
case vk::Format::eD16Unorm:
case vk::Format::eD16UnormS8Uint:
case vk::Format::eD32Sfloat:
case vk::Format::eD32SfloatS8Uint:
return true;
default:
return false;
}
}
bool ImageInfo::HasStencil() const {
if (pixel_format == vk::Format::eD32SfloatS8Uint ||
pixel_format == vk::Format::eD24UnormS8Uint ||
pixel_format == vk::Format::eD16UnormS8Uint) {
return true;
}
return false;
}
static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) { static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) {
vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferSrc |
vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eTransferDst |
@ -161,6 +105,10 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
if (info.props.is_volume) { if (info.props.is_volume) {
flags |= vk::ImageCreateFlagBits::e2DArrayCompatible; flags |= vk::ImageCreateFlagBits::e2DArrayCompatible;
} }
// Not supported by MoltenVK.
if (info.props.is_block && instance->GetDriverID() != vk::DriverId::eMoltenvk) {
flags |= vk::ImageCreateFlagBits::eBlockTexelViewCompatible;
}
usage_flags = ImageUsageFlags(info); usage_flags = ImageUsageFlags(info);
format_features = FormatFeatureFlags(usage_flags); format_features = FormatFeatureFlags(usage_flags);
@ -372,9 +320,9 @@ void Image::CopyImage(const Image& image) {
boost::container::small_vector<vk::ImageCopy, 14> image_copy{}; boost::container::small_vector<vk::ImageCopy, 14> image_copy{};
for (u32 m = 0; m < image.info.resources.levels; ++m) { for (u32 m = 0; m < image.info.resources.levels; ++m) {
const auto mip_w = std::max(info.size.width >> m, 1u); const auto mip_w = std::max(image.info.size.width >> m, 1u);
const auto mip_h = std::max(info.size.height >> m, 1u); const auto mip_h = std::max(image.info.size.height >> m, 1u);
const auto mip_d = std::max(info.size.depth >> m, 1u); const auto mip_d = std::max(image.info.size.depth >> m, 1u);
image_copy.emplace_back(vk::ImageCopy{ image_copy.emplace_back(vk::ImageCopy{
.srcSubresource{ .srcSubresource{

View File

@ -81,7 +81,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
tiling_mode = buffer.GetTilingMode(); tiling_mode = buffer.GetTilingMode();
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()); pixel_format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt());
num_samples = buffer.NumSamples(); num_samples = buffer.NumSamples();
num_bits = NumBits(buffer.GetDataFmt()); num_bits = NumBitsPerBlock(buffer.GetDataFmt());
type = vk::ImageType::e2D; type = vk::ImageType::e2D;
size.width = hint.Valid() ? hint.width : buffer.Pitch(); size.width = hint.Valid() ? hint.width : buffer.Pitch();
size.height = hint.Valid() ? hint.height : buffer.Height(); size.height = hint.Valid() ? hint.height : buffer.Height();
@ -142,7 +142,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& de
resources.levels = image.NumLevels(); resources.levels = image.NumLevels();
resources.layers = image.NumLayers(); resources.layers = image.NumLayers();
num_samples = image.NumSamples(); num_samples = image.NumSamples();
num_bits = NumBits(image.GetDataFmt()); num_bits = NumBitsPerBlock(image.GetDataFmt());
guest_address = image.Address(); guest_address = image.Address();
@ -152,6 +152,80 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& de
UpdateSize(); UpdateSize();
} }
bool ImageInfo::IsBlockCoded() const {
switch (pixel_format) {
case vk::Format::eBc1RgbaSrgbBlock:
case vk::Format::eBc1RgbaUnormBlock:
case vk::Format::eBc1RgbSrgbBlock:
case vk::Format::eBc1RgbUnormBlock:
case vk::Format::eBc2SrgbBlock:
case vk::Format::eBc2UnormBlock:
case vk::Format::eBc3SrgbBlock:
case vk::Format::eBc3UnormBlock:
case vk::Format::eBc4SnormBlock:
case vk::Format::eBc4UnormBlock:
case vk::Format::eBc5SnormBlock:
case vk::Format::eBc5UnormBlock:
case vk::Format::eBc6HSfloatBlock:
case vk::Format::eBc6HUfloatBlock:
case vk::Format::eBc7SrgbBlock:
case vk::Format::eBc7UnormBlock:
return true;
default:
return false;
}
}
bool ImageInfo::IsPacked() const {
switch (pixel_format) {
case vk::Format::eB5G5R5A1UnormPack16:
[[fallthrough]];
case vk::Format::eB5G6R5UnormPack16:
return true;
default:
return false;
}
}
bool ImageInfo::IsDepthStencil() const {
switch (pixel_format) {
case vk::Format::eD16Unorm:
case vk::Format::eD16UnormS8Uint:
case vk::Format::eD32Sfloat:
case vk::Format::eD32SfloatS8Uint:
return true;
default:
return false;
}
}
bool ImageInfo::HasStencil() const {
if (pixel_format == vk::Format::eD32SfloatS8Uint ||
pixel_format == vk::Format::eD24UnormS8Uint ||
pixel_format == vk::Format::eD16UnormS8Uint) {
return true;
}
return false;
}
bool ImageInfo::IsCompatible(const ImageInfo& info) const {
return (pixel_format == info.pixel_format && num_samples == info.num_samples &&
num_bits == info.num_bits);
}
bool ImageInfo::IsTilingCompatible(u32 lhs, u32 rhs) const {
if (lhs == rhs) {
return true;
}
if (lhs == 0x0e && rhs == 0x0d) {
return true;
}
if (lhs == 0x0d && rhs == 0x0e) {
return true;
}
return false;
}
void ImageInfo::UpdateSize() { void ImageInfo::UpdateSize() {
mips_layout.clear(); mips_layout.clear();
MipInfo mip_info{}; MipInfo mip_info{};
@ -163,7 +237,6 @@ void ImageInfo::UpdateSize() {
if (props.is_block) { if (props.is_block) {
mip_w = (mip_w + 3) / 4; mip_w = (mip_w + 3) / 4;
mip_h = (mip_h + 3) / 4; mip_h = (mip_h + 3) / 4;
bpp *= 16;
} }
mip_w = std::max(mip_w, 1u); mip_w = std::max(mip_w, 1u);
mip_h = std::max(mip_h, 1u); mip_h = std::max(mip_h, 1u);

View File

@ -25,6 +25,11 @@ struct ImageInfo {
bool IsTiled() const { bool IsTiled() const {
return tiling_mode != AmdGpu::TilingMode::Display_Linear; return tiling_mode != AmdGpu::TilingMode::Display_Linear;
} }
Extent3D BlockDim() const {
const u32 shift = props.is_block ? 2 : 0;
return Extent3D{size.width >> shift, size.height >> shift, size.depth};
}
bool IsBlockCoded() const; bool IsBlockCoded() const;
bool IsPacked() const; bool IsPacked() const;
bool IsDepthStencil() const; bool IsDepthStencil() const;
@ -33,24 +38,8 @@ struct ImageInfo {
s32 MipOf(const ImageInfo& info) const; s32 MipOf(const ImageInfo& info) const;
s32 SliceOf(const ImageInfo& info, s32 mip) const; s32 SliceOf(const ImageInfo& info, s32 mip) const;
/// Verifies if images are compatible for subresource merging. bool IsCompatible(const ImageInfo& info) const;
bool IsCompatible(const ImageInfo& info) const { bool IsTilingCompatible(u32 lhs, u32 rhs) const;
return (pixel_format == info.pixel_format && num_samples == info.num_samples &&
num_bits == info.num_bits);
}
bool IsTilingCompatible(u32 lhs, u32 rhs) const {
if (lhs == rhs) {
return true;
}
if (lhs == 0x0e && rhs == 0x0d) {
return true;
}
if (lhs == 0x0d && rhs == 0x0e) {
return true;
}
return false;
}
void UpdateSize(); void UpdateSize();

View File

@ -199,7 +199,8 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
scheduler.CurrentTick() - tex_cache_image.tick_accessed_last > NumFramesBeforeRemoval; scheduler.CurrentTick() - tex_cache_image.tick_accessed_last > NumFramesBeforeRemoval;
if (image_info.guest_address == tex_cache_image.info.guest_address) { // Equal address if (image_info.guest_address == tex_cache_image.info.guest_address) { // Equal address
if (image_info.size != tex_cache_image.info.size) { if (image_info.BlockDim() != tex_cache_image.info.BlockDim() ||
image_info.num_bits != tex_cache_image.info.num_bits) {
// Very likely this kind of overlap is caused by allocation from a pool. // Very likely this kind of overlap is caused by allocation from a pool.
if (safe_to_delete) { if (safe_to_delete) {
FreeImage(cache_image_id); FreeImage(cache_image_id);
@ -211,15 +212,19 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
return {depth_image_id, -1, -1}; return {depth_image_id, -1, -1};
} }
if (image_info.IsBlockCoded() && !tex_cache_image.info.IsBlockCoded()) {
// Compressed view of uncompressed image with same block size.
// We need to recreate the image with compressed format and copy.
return {ExpandImage(image_info, cache_image_id), -1, -1};
}
if (image_info.pixel_format != tex_cache_image.info.pixel_format || if (image_info.pixel_format != tex_cache_image.info.pixel_format ||
image_info.guest_size <= tex_cache_image.info.guest_size) { image_info.guest_size <= tex_cache_image.info.guest_size) {
auto result_id = merged_image_id ? merged_image_id : cache_image_id; auto result_id = merged_image_id ? merged_image_id : cache_image_id;
const auto& result_image = slot_images[result_id]; const auto& result_image = slot_images[result_id];
return { const bool is_compatible =
IsVulkanFormatCompatible(image_info.pixel_format, result_image.info.pixel_format) IsVulkanFormatCompatible(result_image.info.pixel_format, image_info.pixel_format);
? result_id return {is_compatible ? result_id : ImageId{}, -1, -1};
: ImageId{},
-1, -1};
} }
if (image_info.type == tex_cache_image.info.type && if (image_info.type == tex_cache_image.info.type &&
@ -299,6 +304,7 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) {
auto& new_image = slot_images[new_image_id]; auto& new_image = slot_images[new_image_id];
src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
RefreshImage(new_image);
new_image.CopyImage(src_image); new_image.CopyImage(src_image);
if (src_image.binding.is_bound || src_image.binding.is_target) { if (src_image.binding.is_bound || src_image.binding.is_target) {
@ -339,7 +345,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) {
continue; continue;
} }
if (False(flags & FindFlags::RelaxFmt) && if (False(flags & FindFlags::RelaxFmt) &&
(!IsVulkanFormatCompatible(info.pixel_format, cache_image.info.pixel_format) || (!IsVulkanFormatCompatible(cache_image.info.pixel_format, info.pixel_format) ||
(cache_image.info.type != info.type && info.size != Extent3D{1, 1, 1}))) { (cache_image.info.type != info.type && info.size != Extent3D{1, 1, 1}))) {
continue; continue;
} }
@ -511,9 +517,9 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
// So this calculation should be very uncommon and reasonably fast // So this calculation should be very uncommon and reasonably fast
// For now we'll just check up to 64 first pixels // For now we'll just check up to 64 first pixels
const auto addr = std::bit_cast<u8*>(image.info.guest_address); const auto addr = std::bit_cast<u8*>(image.info.guest_address);
const auto w = std::min(image.info.size.width, u32(8)); const u32 w = std::min(image.info.size.width, u32(8));
const auto h = std::min(image.info.size.height, u32(8)); const u32 h = std::min(image.info.size.height, u32(8));
const auto size = w * h * image.info.num_bits / 8; const u32 size = w * h * image.info.num_bits >> (3 + image.info.props.is_block ? 4 : 0);
const u64 hash = XXH3_64bits(addr, size); const u64 hash = XXH3_64bits(addr, size);
if (image.hash == hash) { if (image.hash == hash) {
image.flags &= ~ImageFlagBits::MaybeCpuDirty; image.flags &= ~ImageFlagBits::MaybeCpuDirty;

View File

@ -25,10 +25,9 @@
namespace VideoCore { namespace VideoCore {
const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const { const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const {
const auto bpp = info.num_bits * (info.props.is_block ? 16 : 1);
switch (info.tiling_mode) { switch (info.tiling_mode) {
case AmdGpu::TilingMode::Texture_MicroTiled: case AmdGpu::TilingMode::Texture_MicroTiled:
switch (bpp) { switch (info.num_bits) {
case 8: case 8:
return &detilers[DetilerType::Micro8]; return &detilers[DetilerType::Micro8];
case 16: case 16:
@ -43,7 +42,7 @@ const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const {
return nullptr; return nullptr;
} }
case AmdGpu::TilingMode::Texture_Volume: case AmdGpu::TilingMode::Texture_Volume:
switch (bpp) { switch (info.num_bits) {
case 8: case 8:
return &detilers[DetilerType::Macro8]; return &detilers[DetilerType::Macro8];
case 32: case 32:
@ -55,7 +54,7 @@ const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const {
} }
break; break;
case AmdGpu::TilingMode::Display_MicroTiled: case AmdGpu::TilingMode::Display_MicroTiled:
switch (bpp) { switch (info.num_bits) {
case 64: case 64:
return &detilers[DetilerType::Display_Micro64]; return &detilers[DetilerType::Display_Micro64];
default: default:
@ -71,7 +70,7 @@ struct DetilerParams {
u32 num_levels; u32 num_levels;
u32 pitch0; u32 pitch0;
u32 height; u32 height;
u32 sizes[14]; std::array<u32, 16> sizes;
}; };
TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler) TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler)
@ -270,13 +269,16 @@ std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o
params.height = info.size.height; params.height = info.size.height;
if (info.tiling_mode == AmdGpu::TilingMode::Texture_Volume || if (info.tiling_mode == AmdGpu::TilingMode::Texture_Volume ||
info.tiling_mode == AmdGpu::TilingMode::Display_MicroTiled) { info.tiling_mode == AmdGpu::TilingMode::Display_MicroTiled) {
ASSERT(info.resources.levels == 1); if (info.resources.levels != 1) {
LOG_ERROR(Render_Vulkan, "Unexpected mipmaps for volume and display tilings {}",
info.resources.levels);
}
const auto tiles_per_row = info.pitch / 8u; const auto tiles_per_row = info.pitch / 8u;
const auto tiles_per_slice = tiles_per_row * ((info.size.height + 7u) / 8u); const auto tiles_per_slice = tiles_per_row * ((info.size.height + 7u) / 8u);
params.sizes[0] = tiles_per_row; params.sizes[0] = tiles_per_row;
params.sizes[1] = tiles_per_slice; params.sizes[1] = tiles_per_slice;
} else { } else {
ASSERT(info.resources.levels <= 14); ASSERT(info.resources.levels <= params.sizes.size());
std::memset(&params.sizes, 0, sizeof(params.sizes)); std::memset(&params.sizes, 0, sizeof(params.sizes));
for (int m = 0; m < info.resources.levels; ++m) { for (int m = 0; m < info.resources.levels; ++m) {
params.sizes[m] = info.mips_layout[m].size + (m > 0 ? params.sizes[m - 1] : 0); params.sizes[m] = info.mips_layout[m].size + (m > 0 ? params.sizes[m - 1] : 0);
@ -287,8 +289,7 @@ std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o
&params); &params);
ASSERT((image_size % 64) == 0); ASSERT((image_size % 64) == 0);
const auto bpp = info.num_bits * (info.props.is_block ? 16u : 1u); const auto num_tiles = image_size / (64 * (info.num_bits / 8));
const auto num_tiles = image_size / (64 * (bpp / 8));
cmdbuf.dispatch(num_tiles, 1, 1); cmdbuf.dispatch(num_tiles, 1, 1);
return {out_buffer.first, 0}; return {out_buffer.first, 0};
} }