mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-23 02:24:38 +00:00
Merge branch 'main' into m4aac
This commit is contained in:
commit
dd7c1cbd86
36
.github/workflows/build.yml
vendored
36
.github/workflows/build.yml
vendored
@ -76,18 +76,13 @@ jobs:
|
||||
${{ env.cache-name }}-
|
||||
|
||||
- name: Cache CMake Build
|
||||
uses: hendrikmuhs/ccache-action@v1.2.17
|
||||
uses: hendrikmuhs/ccache-action@v1.2.18
|
||||
env:
|
||||
cache-name: ${{ runner.os }}-sdl-cache-cmake-build
|
||||
with:
|
||||
append-timestamp: false
|
||||
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
|
||||
|
||||
- name: Setup VS Environment
|
||||
uses: ilammy/msvc-dev-cmd@v1.13.0
|
||||
with:
|
||||
arch: amd64
|
||||
|
||||
- name: Configure CMake
|
||||
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
|
||||
@ -111,7 +106,7 @@ jobs:
|
||||
- name: Setup Qt
|
||||
uses: jurplel/install-qt-action@v4
|
||||
with:
|
||||
version: 6.9.0
|
||||
version: 6.9.1
|
||||
host: windows
|
||||
target: desktop
|
||||
arch: win64_msvc2022_64
|
||||
@ -130,18 +125,13 @@ jobs:
|
||||
${{ env.cache-name }}-
|
||||
|
||||
- name: Cache CMake Build
|
||||
uses: hendrikmuhs/ccache-action@v1.2.17
|
||||
uses: hendrikmuhs/ccache-action@v1.2.18
|
||||
env:
|
||||
cache-name: ${{ runner.os }}-qt-cache-cmake-build
|
||||
with:
|
||||
append-timestamp: false
|
||||
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
|
||||
|
||||
- name: Setup VS Environment
|
||||
uses: ilammy/msvc-dev-cmd@v1.13.0
|
||||
with:
|
||||
arch: amd64
|
||||
|
||||
- name: Configure CMake
|
||||
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
|
||||
@ -186,7 +176,7 @@ jobs:
|
||||
${{ env.cache-name }}-
|
||||
|
||||
- name: Cache CMake Build
|
||||
uses: hendrikmuhs/ccache-action@v1.2.17
|
||||
uses: hendrikmuhs/ccache-action@v1.2.18
|
||||
env:
|
||||
cache-name: ${{runner.os}}-sdl-cache-cmake-build
|
||||
with:
|
||||
@ -228,7 +218,7 @@ jobs:
|
||||
- name: Setup Qt
|
||||
uses: jurplel/install-qt-action@v4
|
||||
with:
|
||||
version: 6.9.0
|
||||
version: 6.9.1
|
||||
host: mac
|
||||
target: desktop
|
||||
arch: clang_64
|
||||
@ -247,7 +237,7 @@ jobs:
|
||||
${{ env.cache-name }}-
|
||||
|
||||
- name: Cache CMake Build
|
||||
uses: hendrikmuhs/ccache-action@v1.2.17
|
||||
uses: hendrikmuhs/ccache-action@v1.2.18
|
||||
env:
|
||||
cache-name: ${{runner.os}}-qt-cache-cmake-build
|
||||
with:
|
||||
@ -301,7 +291,7 @@ jobs:
|
||||
${{ env.cache-name }}-
|
||||
|
||||
- name: Cache CMake Build
|
||||
uses: hendrikmuhs/ccache-action@v1.2.17
|
||||
uses: hendrikmuhs/ccache-action@v1.2.18
|
||||
env:
|
||||
cache-name: ${{ runner.os }}-sdl-cache-cmake-build
|
||||
with:
|
||||
@ -362,7 +352,7 @@ jobs:
|
||||
${{ env.cache-name }}-
|
||||
|
||||
- name: Cache CMake Build
|
||||
uses: hendrikmuhs/ccache-action@v1.2.17
|
||||
uses: hendrikmuhs/ccache-action@v1.2.18
|
||||
env:
|
||||
cache-name: ${{ runner.os }}-qt-cache-cmake-build
|
||||
with:
|
||||
@ -409,7 +399,7 @@ jobs:
|
||||
${{ env.cache-name }}-
|
||||
|
||||
- name: Cache CMake Build
|
||||
uses: hendrikmuhs/ccache-action@v1.2.17
|
||||
uses: hendrikmuhs/ccache-action@v1.2.18
|
||||
env:
|
||||
cache-name: ${{ runner.os }}-sdl-gcc-cache-cmake-build
|
||||
with:
|
||||
@ -445,7 +435,7 @@ jobs:
|
||||
${{ env.cache-name }}-
|
||||
|
||||
- name: Cache CMake Build
|
||||
uses: hendrikmuhs/ccache-action@v1.2.17
|
||||
uses: hendrikmuhs/ccache-action@v1.2.18
|
||||
env:
|
||||
cache-name: ${{ runner.os }}-qt-gcc-cache-cmake-build
|
||||
with:
|
||||
@ -494,7 +484,7 @@ jobs:
|
||||
with:
|
||||
token: ${{ secrets.SHADPS4_TOKEN_REPO }}
|
||||
name: "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}"
|
||||
tag: "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}"
|
||||
tag: "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.fullhash }}"
|
||||
draft: false
|
||||
prerelease: true
|
||||
body: "Full Changelog: [${{ env.last_release_tag }}...${{ needs.get-info.outputs.shorthash }}](https://github.com/shadps4-emu/shadPS4/compare/${{ env.last_release_tag }}...${{ needs.get-info.outputs.fullhash }})"
|
||||
@ -530,14 +520,14 @@ jobs:
|
||||
|
||||
# Check if release already exists and get ID
|
||||
release_id=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \
|
||||
"https://api.github.com/repos/$REPO/releases/tags/Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}" | jq -r '.id')
|
||||
"https://api.github.com/repos/$REPO/releases/tags/Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.fullhash }}" | jq -r '.id')
|
||||
|
||||
if [[ "$release_id" == "null" ]]; then
|
||||
echo "Creating release in $REPO for $filename"
|
||||
release_id=$(curl -s -X POST -H "Authorization: token $GITHUB_TOKEN" \
|
||||
-H "Accept: application/vnd.github.v3+json" \
|
||||
-d '{
|
||||
"tag_name": "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}",
|
||||
"tag_name": "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.fullhash }}",
|
||||
"name": "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}",
|
||||
"draft": false,
|
||||
"prerelease": true,
|
||||
|
@ -966,6 +966,7 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
|
||||
src/video_core/texture_cache/tile_manager.cpp
|
||||
src/video_core/texture_cache/tile_manager.h
|
||||
src/video_core/texture_cache/types.h
|
||||
src/video_core/texture_cache/host_compatibility.cpp
|
||||
src/video_core/texture_cache/host_compatibility.h
|
||||
src/video_core/page_manager.cpp
|
||||
src/video_core/page_manager.h
|
||||
|
@ -88,7 +88,8 @@ static bool FilterTcbAccess(const ZydisDecodedOperand* operands) {
|
||||
dst_op.reg.value <= ZYDIS_REGISTER_R15;
|
||||
}
|
||||
|
||||
static void GenerateTcbAccess(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
|
||||
static void GenerateTcbAccess(void* /* address */, const ZydisDecodedOperand* operands,
|
||||
Xbyak::CodeGenerator& c) {
|
||||
const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
|
||||
|
||||
#if defined(_WIN32)
|
||||
@ -126,7 +127,8 @@ static bool FilterNoSSE4a(const ZydisDecodedOperand*) {
|
||||
return !cpu.has(Cpu::tSSE4a);
|
||||
}
|
||||
|
||||
static void GenerateEXTRQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
|
||||
static void GenerateEXTRQ(void* /* address */, const ZydisDecodedOperand* operands,
|
||||
Xbyak::CodeGenerator& c) {
|
||||
bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
|
||||
operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
|
||||
|
||||
@ -245,7 +247,8 @@ static void GenerateEXTRQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenera
|
||||
}
|
||||
}
|
||||
|
||||
static void GenerateINSERTQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
|
||||
static void GenerateINSERTQ(void* /* address */, const ZydisDecodedOperand* operands,
|
||||
Xbyak::CodeGenerator& c) {
|
||||
bool immediateForm = operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
|
||||
operands[3].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
|
||||
|
||||
@ -383,8 +386,44 @@ static void GenerateINSERTQ(const ZydisDecodedOperand* operands, Xbyak::CodeGene
|
||||
}
|
||||
}
|
||||
|
||||
static void ReplaceMOVNT(void* address, u8 rep_prefix) {
|
||||
// Find the opcode byte
|
||||
// There can be any amount of prefixes but the instruction can't be more than 15 bytes
|
||||
// And we know for sure this is a MOVNTSS/MOVNTSD
|
||||
bool found = false;
|
||||
bool rep_prefix_found = false;
|
||||
int index = 0;
|
||||
u8* ptr = reinterpret_cast<u8*>(address);
|
||||
for (int i = 0; i < 15; i++) {
|
||||
if (ptr[i] == rep_prefix) {
|
||||
rep_prefix_found = true;
|
||||
} else if (ptr[i] == 0x2B) {
|
||||
index = i;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Some sanity checks
|
||||
ASSERT(found);
|
||||
ASSERT(index >= 2);
|
||||
ASSERT(ptr[index - 1] == 0x0F);
|
||||
ASSERT(rep_prefix_found);
|
||||
|
||||
// This turns the MOVNTSS/MOVNTSD to a MOVSS/MOVSD m, xmm
|
||||
ptr[index] = 0x11;
|
||||
}
|
||||
|
||||
static void ReplaceMOVNTSS(void* address, const ZydisDecodedOperand*, Xbyak::CodeGenerator&) {
|
||||
ReplaceMOVNT(address, 0xF3);
|
||||
}
|
||||
|
||||
static void ReplaceMOVNTSD(void* address, const ZydisDecodedOperand*, Xbyak::CodeGenerator&) {
|
||||
ReplaceMOVNT(address, 0xF2);
|
||||
}
|
||||
|
||||
using PatchFilter = bool (*)(const ZydisDecodedOperand*);
|
||||
using InstructionGenerator = void (*)(const ZydisDecodedOperand*, Xbyak::CodeGenerator&);
|
||||
using InstructionGenerator = void (*)(void*, const ZydisDecodedOperand*, Xbyak::CodeGenerator&);
|
||||
struct PatchInfo {
|
||||
/// Filter for more granular patch conditions past just the instruction mnemonic.
|
||||
PatchFilter filter;
|
||||
@ -400,6 +439,8 @@ static const std::unordered_map<ZydisMnemonic, PatchInfo> Patches = {
|
||||
// SSE4a
|
||||
{ZYDIS_MNEMONIC_EXTRQ, {FilterNoSSE4a, GenerateEXTRQ, true}},
|
||||
{ZYDIS_MNEMONIC_INSERTQ, {FilterNoSSE4a, GenerateINSERTQ, true}},
|
||||
{ZYDIS_MNEMONIC_MOVNTSS, {FilterNoSSE4a, ReplaceMOVNTSS, false}},
|
||||
{ZYDIS_MNEMONIC_MOVNTSD, {FilterNoSSE4a, ReplaceMOVNTSD, false}},
|
||||
|
||||
#if defined(_WIN32)
|
||||
// Windows needs a trampoline.
|
||||
@ -477,7 +518,7 @@ static std::pair<bool, u64> TryPatch(u8* code, PatchModule* module) {
|
||||
auto& trampoline_gen = module->trampoline_gen;
|
||||
const auto trampoline_ptr = trampoline_gen.getCurr();
|
||||
|
||||
patch_info.generator(operands, trampoline_gen);
|
||||
patch_info.generator(code, operands, trampoline_gen);
|
||||
|
||||
// Return to the following instruction at the end of the trampoline.
|
||||
trampoline_gen.jmp(code + instruction.length);
|
||||
@ -485,7 +526,7 @@ static std::pair<bool, u64> TryPatch(u8* code, PatchModule* module) {
|
||||
// Replace instruction with near jump to the trampoline.
|
||||
patch_gen.jmp(trampoline_ptr, Xbyak::CodeGenerator::LabelType::T_NEAR);
|
||||
} else {
|
||||
patch_info.generator(operands, patch_gen);
|
||||
patch_info.generator(code, operands, patch_gen);
|
||||
}
|
||||
|
||||
const auto patch_size = patch_gen.getCurr() - code;
|
||||
|
@ -273,6 +273,10 @@ void RegisterKernel(Core::Loader::SymbolsResolver* sym) {
|
||||
Libraries::Net::sceNetInetNtop); // TODO fix it to sys_ ...
|
||||
LIB_FUNCTION("4n51s0zEf0c", "libScePosix", 1, "libkernel", 1, 1,
|
||||
Libraries::Net::sceNetInetPton); // TODO fix it to sys_ ...
|
||||
LIB_FUNCTION("XVL8So3QJUk", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_connect);
|
||||
LIB_FUNCTION("3e+4Iv7IJ8U", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_accept);
|
||||
LIB_FUNCTION("aNeavPDNKzA", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_sendmsg);
|
||||
LIB_FUNCTION("pxnCmagrtao", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_listen);
|
||||
}
|
||||
|
||||
} // namespace Libraries::Kernel
|
||||
|
@ -222,9 +222,10 @@ s32 PS4_SYSV_ABI sceKernelMapDirectMemory2(void** addr, u64 len, s32 type, s32 p
|
||||
return ret;
|
||||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t len, int prot,
|
||||
int flags, const char* name) {
|
||||
|
||||
s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags,
|
||||
const char* name) {
|
||||
LOG_INFO(Kernel_Vmm, "in_addr = {}, len = {:#x}, prot = {:#x}, flags = {:#x}, name = '{}'",
|
||||
fmt::ptr(*addr_in_out), len, prot, flags, name);
|
||||
if (len == 0 || !Common::Is16KBAligned(len)) {
|
||||
LOG_ERROR(Kernel_Vmm, "len is 0 or not 16kb multiple");
|
||||
return ORBIS_KERNEL_ERROR_EINVAL;
|
||||
@ -243,18 +244,14 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t
|
||||
const VAddr in_addr = reinterpret_cast<VAddr>(*addr_in_out);
|
||||
const auto mem_prot = static_cast<Core::MemoryProt>(prot);
|
||||
const auto map_flags = static_cast<Core::MemoryMapFlags>(flags);
|
||||
SCOPE_EXIT {
|
||||
LOG_INFO(Kernel_Vmm,
|
||||
"in_addr = {:#x}, out_addr = {}, len = {:#x}, prot = {:#x}, flags = {:#x}",
|
||||
in_addr, fmt::ptr(*addr_in_out), len, prot, flags);
|
||||
};
|
||||
auto* memory = Core::Memory::Instance();
|
||||
return memory->MapMemory(addr_in_out, in_addr, len, mem_prot, map_flags,
|
||||
Core::VMAType::Flexible, name);
|
||||
const auto ret = memory->MapMemory(addr_in_out, in_addr, len, mem_prot, map_flags,
|
||||
Core::VMAType::Flexible, name);
|
||||
LOG_INFO(Kernel_Vmm, "out_addr = {}", fmt::ptr(*addr_in_out));
|
||||
return ret;
|
||||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot,
|
||||
int flags) {
|
||||
s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags) {
|
||||
return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, "anon");
|
||||
}
|
||||
|
||||
@ -663,6 +660,9 @@ int PS4_SYSV_ABI sceKernelSetPrtAperture(int id, VAddr address, size_t size) {
|
||||
"PRT aperture id = {}, address = {:#x}, size = {:#x} is set but not used", id,
|
||||
address, size);
|
||||
|
||||
auto* memory = Core::Memory::Instance();
|
||||
memory->SetPrtArea(id, address, size);
|
||||
|
||||
PrtApertures[id] = {address, size};
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
@ -141,10 +141,9 @@ s32 PS4_SYSV_ABI sceKernelAvailableDirectMemorySize(u64 searchStart, u64 searchE
|
||||
s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtualQueryInfo* info,
|
||||
size_t infoSize);
|
||||
s32 PS4_SYSV_ABI sceKernelReserveVirtualRange(void** addr, u64 len, int flags, u64 alignment);
|
||||
s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addrInOut, std::size_t len, int prot,
|
||||
int flags, const char* name);
|
||||
s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot,
|
||||
int flags);
|
||||
s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags,
|
||||
const char* name);
|
||||
s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags);
|
||||
int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot);
|
||||
|
||||
s32 PS4_SYSV_ABI sceKernelMprotect(const void* addr, u64 size, s32 prot);
|
||||
|
@ -426,6 +426,7 @@ void RegisterMutex(Core::Loader::SymbolsResolver* sym) {
|
||||
// Posix
|
||||
LIB_FUNCTION("ttHNfU+qDBU", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_init);
|
||||
LIB_FUNCTION("7H0iTOciTLo", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_lock);
|
||||
LIB_FUNCTION("Io9+nTKXZtA", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_timedlock);
|
||||
LIB_FUNCTION("2Z+PpY6CaJg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_unlock);
|
||||
LIB_FUNCTION("ltCfaGr2JGE", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_destroy);
|
||||
LIB_FUNCTION("dQHWEsJtoE4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutexattr_init);
|
||||
|
@ -282,7 +282,12 @@ s32 PS4_SYSV_ABI sceVideoOutGetVblankStatus(int handle, SceVideoOutVblankStatus*
|
||||
|
||||
s32 PS4_SYSV_ABI sceVideoOutGetResolutionStatus(s32 handle, SceVideoOutResolutionStatus* status) {
|
||||
LOG_INFO(Lib_VideoOut, "called");
|
||||
*status = driver->GetPort(handle)->resolution;
|
||||
auto* port = driver->GetPort(handle);
|
||||
if (!port || !port->is_open) {
|
||||
return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
*status = port->resolution;
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
@ -51,7 +51,7 @@ void ZlibTaskThread(const std::stop_token& stop) {
|
||||
if (!task_queue_cv.wait(lock, stop, [&] { return !task_queue.empty(); })) {
|
||||
break;
|
||||
}
|
||||
task = task_queue.back();
|
||||
task = task_queue.front();
|
||||
task_queue.pop();
|
||||
}
|
||||
|
||||
@ -136,7 +136,7 @@ s32 PS4_SYSV_ABI sceZlibWaitForDone(u64* request_id, const u32* timeout) {
|
||||
} else {
|
||||
done_queue_cv.wait(lock, pred);
|
||||
}
|
||||
*request_id = done_queue.back();
|
||||
*request_id = done_queue.front();
|
||||
done_queue.pop();
|
||||
}
|
||||
return ORBIS_OK;
|
||||
|
@ -332,21 +332,22 @@ bool Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul
|
||||
sr.type = sym_type;
|
||||
|
||||
const auto* record = m_hle_symbols.FindSymbol(sr);
|
||||
if (!record) {
|
||||
// Check if it an export function
|
||||
const auto* p = FindExportedModule(*module, *library);
|
||||
if (p && p->export_sym.GetSize() > 0) {
|
||||
record = p->export_sym.FindSymbol(sr);
|
||||
}
|
||||
}
|
||||
if (record) {
|
||||
*return_info = *record;
|
||||
|
||||
Core::Devtools::Widget::ModuleList::AddModule(sr.library);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if it an export function
|
||||
const auto* p = FindExportedModule(*module, *library);
|
||||
if (p && p->export_sym.GetSize() > 0) {
|
||||
record = p->export_sym.FindSymbol(sr);
|
||||
if (record) {
|
||||
*return_info = *record;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
const auto aeronid = AeroLib::FindByNid(sr.name.c_str());
|
||||
if (aeronid) {
|
||||
return_info->name = aeronid->name;
|
||||
|
@ -95,6 +95,46 @@ u64 MemoryManager::ClampRangeSize(VAddr virtual_addr, u64 size) {
|
||||
return clamped_size;
|
||||
}
|
||||
|
||||
void MemoryManager::SetPrtArea(u32 id, VAddr address, u64 size) {
|
||||
PrtArea& area = prt_areas[id];
|
||||
if (area.mapped) {
|
||||
rasterizer->UnmapMemory(area.start, area.end - area.start);
|
||||
}
|
||||
|
||||
area.start = address;
|
||||
area.end = address + size;
|
||||
area.mapped = true;
|
||||
|
||||
// Pretend the entire PRT area is mapped to avoid GPU tracking errors.
|
||||
// The caches will use CopySparseMemory to fetch data which avoids unmapped areas.
|
||||
rasterizer->MapMemory(address, size);
|
||||
}
|
||||
|
||||
void MemoryManager::CopySparseMemory(VAddr virtual_addr, u8* dest, u64 size) {
|
||||
const bool is_sparse = std::ranges::any_of(
|
||||
prt_areas, [&](const PrtArea& area) { return area.Overlaps(virtual_addr, size); });
|
||||
if (!is_sparse) {
|
||||
std::memcpy(dest, std::bit_cast<const u8*>(virtual_addr), size);
|
||||
return;
|
||||
}
|
||||
|
||||
auto vma = FindVMA(virtual_addr);
|
||||
ASSERT_MSG(vma->second.Contains(virtual_addr, 0),
|
||||
"Attempted to access invalid GPU address {:#x}", virtual_addr);
|
||||
while (size) {
|
||||
u64 copy_size = std::min<u64>(vma->second.size - (virtual_addr - vma->first), size);
|
||||
if (vma->second.IsFree()) {
|
||||
std::memset(dest, 0, copy_size);
|
||||
} else {
|
||||
std::memcpy(dest, std::bit_cast<const u8*>(virtual_addr), copy_size);
|
||||
}
|
||||
size -= copy_size;
|
||||
virtual_addr += copy_size;
|
||||
dest += copy_size;
|
||||
++vma;
|
||||
}
|
||||
}
|
||||
|
||||
bool MemoryManager::TryWriteBacking(void* address, const void* data, u32 num_bytes) {
|
||||
const VAddr virtual_addr = std::bit_cast<VAddr>(address);
|
||||
const auto& vma = FindVMA(virtual_addr)->second;
|
||||
@ -182,7 +222,6 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size,
|
||||
auto& area = CarveDmemArea(mapping_start, size)->second;
|
||||
area.memory_type = memory_type;
|
||||
area.is_free = false;
|
||||
MergeAdjacent(dmem_map, dmem_area);
|
||||
return mapping_start;
|
||||
}
|
||||
|
||||
|
@ -172,6 +172,10 @@ public:
|
||||
|
||||
u64 ClampRangeSize(VAddr virtual_addr, u64 size);
|
||||
|
||||
void SetPrtArea(u32 id, VAddr address, u64 size);
|
||||
|
||||
void CopySparseMemory(VAddr source, u8* dest, u64 size);
|
||||
|
||||
bool TryWriteBacking(void* address, const void* data, u32 num_bytes);
|
||||
|
||||
void SetupMemoryRegions(u64 flexible_size, bool use_extended_mem1, bool use_extended_mem2);
|
||||
@ -275,6 +279,18 @@ private:
|
||||
size_t pool_budget{};
|
||||
Vulkan::Rasterizer* rasterizer{};
|
||||
|
||||
struct PrtArea {
|
||||
VAddr start;
|
||||
VAddr end;
|
||||
bool mapped;
|
||||
|
||||
bool Overlaps(VAddr test_address, u64 test_size) const {
|
||||
const VAddr overlap_end = test_address + test_size;
|
||||
return start < overlap_end && test_address < end;
|
||||
}
|
||||
};
|
||||
std::array<PrtArea, 3> prt_areas{};
|
||||
|
||||
friend class ::Core::Devtools::Widget::MemoryMapViewer;
|
||||
};
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
#include <set>
|
||||
#include "common/singleton.h"
|
||||
#include "common/types.h"
|
||||
|
||||
namespace Core {
|
||||
|
||||
|
@ -137,7 +137,7 @@ tr("The Auto Updater allows up to 60 update checks per hour.\\nYou have reached
|
||||
}
|
||||
}
|
||||
|
||||
latestRev = latestVersion.right(7);
|
||||
latestRev = latestVersion.right(40);
|
||||
latestDate = jsonObj["published_at"].toString();
|
||||
|
||||
QJsonArray assets = jsonObj["assets"].toArray();
|
||||
@ -167,7 +167,7 @@ tr("The Auto Updater allows up to 60 update checks per hour.\\nYou have reached
|
||||
QDateTime dateTime = QDateTime::fromString(latestDate, Qt::ISODate);
|
||||
latestDate = dateTime.isValid() ? dateTime.toString("yyyy-MM-dd HH:mm:ss") : "Unknown date";
|
||||
|
||||
if (latestRev == currentRev.left(7)) {
|
||||
if (latestRev == currentRev) {
|
||||
if (showMessage) {
|
||||
QMessageBox::information(this, tr("Auto Updater"),
|
||||
tr("Your version is already up to date!"));
|
||||
@ -215,7 +215,7 @@ void CheckUpdate::setupUI(const QString& downloadUrl, const QString& latestDate,
|
||||
"<td>%3</td>"
|
||||
"<td>(%4)</td>"
|
||||
"</tr></table></p>")
|
||||
.arg(currentRev.left(7), currentDate, latestRev, latestDate);
|
||||
.arg(currentRev.left(7), currentDate, latestRev.left(7), latestDate);
|
||||
|
||||
QLabel* updateLabel = new QLabel(updateText, this);
|
||||
layout->addWidget(updateLabel);
|
||||
|
@ -2048,7 +2048,7 @@
|
||||
</message>
|
||||
<message>
|
||||
<source> * Unsupported Vulkan Version</source>
|
||||
<translation type="unfinished"> * Unsupported Vulkan Version</translation>
|
||||
<translation> * Versão do Vulkan não suportada</translation>
|
||||
</message>
|
||||
</context>
|
||||
<context>
|
||||
|
@ -138,7 +138,7 @@
|
||||
</message>
|
||||
<message>
|
||||
<source>File Exists</source>
|
||||
<translation>Dosya mevcut</translation>
|
||||
<translation>Dosya Mevcut</translation>
|
||||
</message>
|
||||
<message>
|
||||
<source>File already exists. Do you want to replace it?</source>
|
||||
@ -1221,7 +1221,7 @@
|
||||
</message>
|
||||
<message>
|
||||
<source>Exit shadPS4</source>
|
||||
<translation>shadPS4'ten Çık</translation>
|
||||
<translation>shadPS4 Çıkış</translation>
|
||||
</message>
|
||||
<message>
|
||||
<source>Exit the application.</source>
|
||||
@ -1381,7 +1381,7 @@
|
||||
</message>
|
||||
<message>
|
||||
<source>Game Boot</source>
|
||||
<translation>Oyun Başlatma</translation>
|
||||
<translation>Oyun Başlat</translation>
|
||||
</message>
|
||||
<message>
|
||||
<source>Only one file can be selected!</source>
|
||||
|
@ -303,6 +303,11 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
|
||||
ctx.AddCapability(spv::Capability::PhysicalStorageBufferAddresses);
|
||||
ctx.AddExtension("SPV_KHR_physical_storage_buffer");
|
||||
}
|
||||
if (info.uses_shared && profile.supports_workgroup_explicit_memory_layout) {
|
||||
ctx.AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
|
||||
ctx.AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
|
||||
ctx.AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
|
||||
}
|
||||
}
|
||||
|
||||
void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
|
||||
|
@ -1,6 +1,8 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/div_ceil.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv_bounds.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||
|
||||
@ -15,42 +17,40 @@ std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
|
||||
Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value,
|
||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
||||
const Id shift_id{ctx.ConstU32(2U)};
|
||||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
||||
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
|
||||
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
|
||||
const Id pointer{
|
||||
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)};
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
|
||||
return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
|
||||
});
|
||||
}
|
||||
|
||||
Id SharedAtomicU64(EmitContext& ctx, Id offset, Id value,
|
||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
||||
const Id shift_id{ctx.ConstU32(3U)};
|
||||
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
|
||||
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
|
||||
const Id pointer{
|
||||
ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||
return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value);
|
||||
});
|
||||
}
|
||||
|
||||
Id SharedAtomicU32_IncDec(EmitContext& ctx, Id offset,
|
||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) {
|
||||
const Id shift_id{ctx.ConstU32(2U)};
|
||||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
||||
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
|
||||
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
|
||||
const Id pointer{
|
||||
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)};
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics);
|
||||
}
|
||||
|
||||
Id BufferAtomicU32BoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto emit_func) {
|
||||
if (Sirit::ValidId(buffer_size)) {
|
||||
// Bounds checking enabled, wrap in a conditional branch to make sure that
|
||||
// the atomic is not mistakenly executed when the index is out of bounds.
|
||||
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer_size);
|
||||
const Id ib_label = ctx.OpLabel();
|
||||
const Id oob_label = ctx.OpLabel();
|
||||
const Id end_label = ctx.OpLabel();
|
||||
ctx.OpSelectionMerge(end_label, spv::SelectionControlMask::MaskNone);
|
||||
ctx.OpBranchConditional(in_bounds, ib_label, oob_label);
|
||||
ctx.AddLabel(ib_label);
|
||||
const Id ib_result = emit_func();
|
||||
ctx.OpBranch(end_label);
|
||||
ctx.AddLabel(oob_label);
|
||||
const Id oob_result = ctx.u32_zero_value;
|
||||
ctx.OpBranch(end_label);
|
||||
ctx.AddLabel(end_label);
|
||||
return ctx.OpPhi(ctx.U32[1], ib_result, ib_label, oob_result, oob_label);
|
||||
}
|
||||
// Bounds checking not enabled, just perform the atomic operation.
|
||||
return emit_func();
|
||||
return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics);
|
||||
});
|
||||
}
|
||||
|
||||
Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||
@ -63,11 +63,42 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
|
||||
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return BufferAtomicU32BoundsCheck(ctx, index, buffer.size_dwords, [&] {
|
||||
return AccessBoundsCheck<32>(ctx, index, buffer.size_dwords, [&] {
|
||||
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, value);
|
||||
});
|
||||
}
|
||||
|
||||
Id BufferAtomicU32CmpSwap(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||
Id cmp_value,
|
||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id, Id, Id)) {
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
}
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return AccessBoundsCheck<32>(ctx, index, buffer.size_dwords, [&] {
|
||||
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, semantics, value, cmp_value);
|
||||
});
|
||||
}
|
||||
|
||||
Id BufferAtomicU64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
}
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(3u));
|
||||
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U64];
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return AccessBoundsCheck<64>(ctx, index, buffer.size_qwords, [&] {
|
||||
return (ctx.*atomic_func)(ctx.U64, ptr, scope, semantics, value);
|
||||
});
|
||||
}
|
||||
|
||||
Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value,
|
||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||
@ -89,6 +120,10 @@ Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) {
|
||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicIAdd);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicIAdd64(EmitContext& ctx, Id offset, Id value) {
|
||||
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicIAdd);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) {
|
||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax);
|
||||
}
|
||||
@ -133,6 +168,10 @@ Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
|
||||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd);
|
||||
}
|
||||
|
||||
Id EmitBufferAtomicIAdd64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
return BufferAtomicU64(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd);
|
||||
}
|
||||
|
||||
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin);
|
||||
}
|
||||
@ -175,6 +214,12 @@ Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
|
||||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicExchange);
|
||||
}
|
||||
|
||||
Id EmitBufferAtomicCmpSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||
Id cmp_value) {
|
||||
return BufferAtomicU32CmpSwap(ctx, inst, handle, address, value, cmp_value,
|
||||
&Sirit::Module::OpAtomicCompareExchange);
|
||||
}
|
||||
|
||||
Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
|
||||
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicIAdd);
|
||||
}
|
||||
|
48
src/shader_recompiler/backend/spirv/emit_spirv_bounds.h
Normal file
48
src/shader_recompiler/backend/spirv/emit_spirv_bounds.h
Normal file
@ -0,0 +1,48 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
|
||||
template <u32 bit_size>
|
||||
auto AccessBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto emit_func) {
|
||||
Id zero_value{};
|
||||
Id result_type{};
|
||||
if constexpr (bit_size == 64) {
|
||||
zero_value = ctx.u64_zero_value;
|
||||
result_type = ctx.U64;
|
||||
} else if constexpr (bit_size == 32) {
|
||||
zero_value = ctx.u32_zero_value;
|
||||
result_type = ctx.U32[1];
|
||||
} else if constexpr (bit_size == 16) {
|
||||
zero_value = ctx.u16_zero_value;
|
||||
result_type = ctx.U16;
|
||||
} else {
|
||||
static_assert(false, "type not supported");
|
||||
}
|
||||
if (Sirit::ValidId(buffer_size)) {
|
||||
// Bounds checking enabled, wrap in a conditional branch to make sure that
|
||||
// the atomic is not mistakenly executed when the index is out of bounds.
|
||||
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer_size);
|
||||
const Id ib_label = ctx.OpLabel();
|
||||
const Id end_label = ctx.OpLabel();
|
||||
ctx.OpSelectionMerge(end_label, spv::SelectionControlMask::MaskNone);
|
||||
ctx.OpBranchConditional(in_bounds, ib_label, end_label);
|
||||
const auto last_label = ctx.last_label;
|
||||
ctx.AddLabel(ib_label);
|
||||
const auto ib_result = emit_func();
|
||||
ctx.OpBranch(end_label);
|
||||
ctx.AddLabel(end_label);
|
||||
if (Sirit::ValidId(ib_result)) {
|
||||
return ctx.OpPhi(result_type, ib_result, ib_label, zero_value, last_label);
|
||||
} else {
|
||||
return Id{0};
|
||||
}
|
||||
}
|
||||
// Bounds checking not enabled, just perform the atomic operation.
|
||||
return emit_func();
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
@ -86,6 +86,7 @@ void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
|
||||
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicIAdd64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
@ -96,6 +97,8 @@ Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
|
||||
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicCmpSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||
Id cmp_value);
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index);
|
||||
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
|
||||
@ -118,11 +121,14 @@ Id EmitUndefU8(EmitContext& ctx);
|
||||
Id EmitUndefU16(EmitContext& ctx);
|
||||
Id EmitUndefU32(EmitContext& ctx);
|
||||
Id EmitUndefU64(EmitContext& ctx);
|
||||
Id EmitLoadSharedU16(EmitContext& ctx, Id offset);
|
||||
Id EmitLoadSharedU32(EmitContext& ctx, Id offset);
|
||||
Id EmitLoadSharedU64(EmitContext& ctx, Id offset);
|
||||
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value);
|
||||
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value);
|
||||
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicIAdd64(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value);
|
||||
@ -372,6 +378,7 @@ Id EmitBitCount64(EmitContext& ctx, Id value);
|
||||
Id EmitBitwiseNot32(EmitContext& ctx, Id value);
|
||||
Id EmitFindSMsb32(EmitContext& ctx, Id value);
|
||||
Id EmitFindUMsb32(EmitContext& ctx, Id value);
|
||||
Id EmitFindUMsb64(EmitContext& ctx, Id value);
|
||||
Id EmitFindILsb32(EmitContext& ctx, Id value);
|
||||
Id EmitFindILsb64(EmitContext& ctx, Id value);
|
||||
Id EmitSMin32(EmitContext& ctx, Id a, Id b);
|
||||
|
@ -229,6 +229,20 @@ Id EmitFindUMsb32(EmitContext& ctx, Id value) {
|
||||
return ctx.OpFindUMsb(ctx.U32[1], value);
|
||||
}
|
||||
|
||||
Id EmitFindUMsb64(EmitContext& ctx, Id value) {
|
||||
// Vulkan restricts some bitwise operations to 32-bit only, so decompose into
|
||||
// two 32-bit values and select the correct result.
|
||||
const Id unpacked{ctx.OpBitcast(ctx.U32[2], value)};
|
||||
const Id hi{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 1U)};
|
||||
const Id lo{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 0U)};
|
||||
const Id hi_msb{ctx.OpFindUMsb(ctx.U32[1], hi)};
|
||||
const Id lo_msb{ctx.OpFindUMsb(ctx.U32[1], lo)};
|
||||
const Id found_hi{ctx.OpINotEqual(ctx.U1[1], hi_msb, ctx.ConstU32(u32(-1)))};
|
||||
const Id shifted_hi{ctx.OpIAdd(ctx.U32[1], hi_msb, ctx.ConstU32(32u))};
|
||||
// value == 0 case is checked in IREmitter
|
||||
return ctx.OpSelect(ctx.U32[1], found_hi, shifted_hi, lo_msb);
|
||||
}
|
||||
|
||||
Id EmitFindILsb32(EmitContext& ctx, Id value) {
|
||||
return ctx.OpFindILsb(ctx.U32[1], value);
|
||||
}
|
||||
|
@ -1,43 +1,86 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/div_ceil.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv_bounds.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
|
||||
Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
|
||||
const Id shift_id{ctx.ConstU32(1U)};
|
||||
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
|
||||
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 2u)};
|
||||
|
||||
return AccessBoundsCheck<16>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||
const Id pointer =
|
||||
ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index);
|
||||
return ctx.OpLoad(ctx.U16, pointer);
|
||||
});
|
||||
}
|
||||
|
||||
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
|
||||
const Id shift_id{ctx.ConstU32(2U)};
|
||||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index);
|
||||
return ctx.OpLoad(ctx.U32[1], pointer);
|
||||
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
|
||||
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
|
||||
|
||||
return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||
const Id pointer =
|
||||
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index);
|
||||
return ctx.OpLoad(ctx.U32[1], pointer);
|
||||
});
|
||||
}
|
||||
|
||||
Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
|
||||
const Id shift_id{ctx.ConstU32(2U)};
|
||||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))};
|
||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
|
||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
|
||||
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
|
||||
ctx.OpLoad(ctx.U32[1], rhs_pointer));
|
||||
const Id shift_id{ctx.ConstU32(3U)};
|
||||
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
|
||||
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
|
||||
|
||||
return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||
const Id pointer{
|
||||
ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
|
||||
return ctx.OpLoad(ctx.U64, pointer);
|
||||
});
|
||||
}
|
||||
|
||||
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
|
||||
const Id shift{ctx.ConstU32(1U)};
|
||||
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift)};
|
||||
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 2u)};
|
||||
|
||||
AccessBoundsCheck<16>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||
const Id pointer =
|
||||
ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index);
|
||||
ctx.OpStore(pointer, value);
|
||||
return Id{0};
|
||||
});
|
||||
}
|
||||
|
||||
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
|
||||
const Id shift{ctx.ConstU32(2U)};
|
||||
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
|
||||
ctx.OpStore(pointer, value);
|
||||
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift)};
|
||||
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
|
||||
|
||||
AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||
const Id pointer =
|
||||
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index);
|
||||
ctx.OpStore(pointer, value);
|
||||
return Id{0};
|
||||
});
|
||||
}
|
||||
|
||||
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
|
||||
const Id shift{ctx.ConstU32(2U)};
|
||||
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||
const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.ConstU32(1U))};
|
||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
|
||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
|
||||
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
|
||||
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
|
||||
const Id shift{ctx.ConstU32(3U)};
|
||||
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift)};
|
||||
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
|
||||
|
||||
AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||
const Id pointer{
|
||||
ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
|
||||
ctx.OpStore(pointer, value);
|
||||
return Id{0};
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
@ -146,6 +146,7 @@ void EmitContext::DefineArithmeticTypes() {
|
||||
false_value = ConstantFalse(U1[1]);
|
||||
u8_one_value = Constant(U8, 1U);
|
||||
u8_zero_value = Constant(U8, 0U);
|
||||
u16_zero_value = Constant(U16, 0U);
|
||||
u32_one_value = ConstU32(1U);
|
||||
u32_zero_value = ConstU32(0U);
|
||||
f32_zero_value = ConstF32(0.0f);
|
||||
@ -285,6 +286,8 @@ void EmitContext::DefineBufferProperties() {
|
||||
Name(buffer.size_shorts, fmt::format("buf{}_short_size", binding));
|
||||
buffer.size_dwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(2U));
|
||||
Name(buffer.size_dwords, fmt::format("buf{}_dword_size", binding));
|
||||
buffer.size_qwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(3U));
|
||||
Name(buffer.size_qwords, fmt::format("buf{}_qword_size", binding));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -307,7 +310,9 @@ void EmitContext::DefineInterpolatedAttribs() {
|
||||
const Id p2{OpCompositeExtract(F32[4], p_array, 2U)};
|
||||
const Id p10{OpFSub(F32[4], p1, p0)};
|
||||
const Id p20{OpFSub(F32[4], p2, p0)};
|
||||
const Id bary_coord{OpLoad(F32[3], gl_bary_coord_id)};
|
||||
const Id bary_coord{OpLoad(F32[3], IsLinear(info.interp_qualifiers[i])
|
||||
? bary_coord_linear_id
|
||||
: bary_coord_persp_id)};
|
||||
const Id bary_coord_y{OpCompositeExtract(F32[1], bary_coord, 1)};
|
||||
const Id bary_coord_z{OpCompositeExtract(F32[1], bary_coord, 2)};
|
||||
const Id p10_y{OpVectorTimesScalar(F32[4], p10, bary_coord_y)};
|
||||
@ -411,8 +416,14 @@ void EmitContext::DefineInputs() {
|
||||
DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
||||
}
|
||||
if (profile.needs_manual_interpolation) {
|
||||
gl_bary_coord_id =
|
||||
DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input);
|
||||
if (info.has_perspective_interp) {
|
||||
bary_coord_persp_id =
|
||||
DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input);
|
||||
}
|
||||
if (info.has_linear_interp) {
|
||||
bary_coord_linear_id = DefineVariable(F32[3], spv::BuiltIn::BaryCoordNoPerspKHR,
|
||||
spv::StorageClass::Input);
|
||||
}
|
||||
}
|
||||
for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
|
||||
const auto& input = runtime_info.fs_info.inputs[i];
|
||||
@ -435,9 +446,12 @@ void EmitContext::DefineInputs() {
|
||||
} else {
|
||||
attr_id = DefineInput(type, semantic);
|
||||
Name(attr_id, fmt::format("fs_in_attr{}", semantic));
|
||||
}
|
||||
if (input.is_flat) {
|
||||
Decorate(attr_id, spv::Decoration::Flat);
|
||||
|
||||
if (input.is_flat) {
|
||||
Decorate(attr_id, spv::Decoration::Flat);
|
||||
} else if (IsLinear(info.interp_qualifiers[i])) {
|
||||
Decorate(attr_id, spv::Decoration::NoPerspective);
|
||||
}
|
||||
}
|
||||
input_params[semantic] =
|
||||
GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false);
|
||||
@ -634,7 +648,8 @@ void EmitContext::DefineOutputs() {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case LogicalStage::Fragment:
|
||||
case LogicalStage::Fragment: {
|
||||
u32 num_render_targets = 0;
|
||||
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
|
||||
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
|
||||
if (!info.stores.GetAny(mrt)) {
|
||||
@ -643,11 +658,21 @@ void EmitContext::DefineOutputs() {
|
||||
const u32 num_components = info.stores.NumComponents(mrt);
|
||||
const AmdGpu::NumberFormat num_format{runtime_info.fs_info.color_buffers[i].num_format};
|
||||
const Id type{GetAttributeType(*this, num_format)[num_components]};
|
||||
const Id id{DefineOutput(type, i)};
|
||||
Id id;
|
||||
if (runtime_info.fs_info.dual_source_blending) {
|
||||
id = DefineOutput(type, 0);
|
||||
Decorate(id, spv::Decoration::Index, i);
|
||||
} else {
|
||||
id = DefineOutput(type, i);
|
||||
}
|
||||
Name(id, fmt::format("frag_color{}", i));
|
||||
frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true);
|
||||
++num_render_targets;
|
||||
}
|
||||
ASSERT_MSG(!runtime_info.fs_info.dual_source_blending || num_render_targets == 2,
|
||||
"Dual source blending enabled, there must be exactly two MRT exports");
|
||||
break;
|
||||
}
|
||||
case LogicalStage::Geometry: {
|
||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
||||
|
||||
@ -957,13 +982,27 @@ void EmitContext::DefineSharedMemory() {
|
||||
}
|
||||
ASSERT(info.stage == Stage::Compute);
|
||||
const u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
|
||||
const u32 num_elements{Common::DivCeil(shared_memory_size, 4U)};
|
||||
const Id type{TypeArray(U32[1], ConstU32(num_elements))};
|
||||
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
|
||||
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
|
||||
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
|
||||
Name(shared_memory_u32, "shared_mem");
|
||||
interfaces.push_back(shared_memory_u32);
|
||||
|
||||
const auto make_type = [&](Id element_type, u32 element_size) {
|
||||
const u32 num_elements{Common::DivCeil(shared_memory_size, element_size)};
|
||||
const Id array_type{TypeArray(element_type, ConstU32(num_elements))};
|
||||
Decorate(array_type, spv::Decoration::ArrayStride, element_size);
|
||||
|
||||
const Id struct_type{TypeStruct(array_type)};
|
||||
MemberDecorate(struct_type, 0u, spv::Decoration::Offset, 0u);
|
||||
Decorate(struct_type, spv::Decoration::Block);
|
||||
|
||||
const Id pointer = TypePointer(spv::StorageClass::Workgroup, struct_type);
|
||||
const Id element_pointer = TypePointer(spv::StorageClass::Workgroup, element_type);
|
||||
const Id variable = AddGlobalVariable(pointer, spv::StorageClass::Workgroup);
|
||||
Decorate(variable, spv::Decoration::Aliased);
|
||||
interfaces.push_back(variable);
|
||||
|
||||
return std::make_tuple(variable, element_pointer, pointer);
|
||||
};
|
||||
std::tie(shared_memory_u16, shared_u16, shared_memory_u16_type) = make_type(U16, 2u);
|
||||
std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make_type(U32[1], 4u);
|
||||
std::tie(shared_memory_u64, shared_u64, shared_memory_u64_type) = make_type(U64, 8u);
|
||||
}
|
||||
|
||||
Id EmitContext::DefineFloat32ToUfloatM5(u32 mantissa_bits, const std::string_view name) {
|
||||
|
@ -235,17 +235,16 @@ public:
|
||||
Id false_value{};
|
||||
Id u8_one_value{};
|
||||
Id u8_zero_value{};
|
||||
Id u16_zero_value{};
|
||||
Id u32_one_value{};
|
||||
Id u32_zero_value{};
|
||||
Id f32_zero_value{};
|
||||
Id u64_one_value{};
|
||||
Id u64_zero_value{};
|
||||
|
||||
Id shared_u8{};
|
||||
Id shared_u16{};
|
||||
Id shared_u32{};
|
||||
Id shared_u32x2{};
|
||||
Id shared_u32x4{};
|
||||
Id shared_u64{};
|
||||
|
||||
Id input_u32{};
|
||||
Id input_f32{};
|
||||
@ -285,16 +284,16 @@ public:
|
||||
Id image_u32{};
|
||||
Id image_f32{};
|
||||
|
||||
Id shared_memory_u8{};
|
||||
Id shared_memory_u16{};
|
||||
Id shared_memory_u32{};
|
||||
Id shared_memory_u32x2{};
|
||||
Id shared_memory_u32x4{};
|
||||
Id shared_memory_u64{};
|
||||
|
||||
Id shared_memory_u16_type{};
|
||||
Id shared_memory_u32_type{};
|
||||
Id shared_memory_u64_type{};
|
||||
|
||||
Id interpolate_func{};
|
||||
Id gl_bary_coord_id{};
|
||||
Id bary_coord_persp_id{};
|
||||
Id bary_coord_linear_id{};
|
||||
|
||||
struct TextureDefinition {
|
||||
const VectorIds* data_types;
|
||||
@ -320,6 +319,7 @@ public:
|
||||
Id size;
|
||||
Id size_shorts;
|
||||
Id size_dwords;
|
||||
Id size_qwords;
|
||||
std::array<BufferSpv, u32(PointerType::NumAlias)> aliases;
|
||||
|
||||
const BufferSpv& operator[](PointerType alias) const {
|
||||
|
@ -67,6 +67,9 @@ CopyShaderData ParseCopyShader(std::span<const u32> code) {
|
||||
|
||||
if (last_attr != IR::Attribute::Position0) {
|
||||
data.num_attrs = static_cast<u32>(last_attr) - static_cast<u32>(IR::Attribute::Param0) + 1;
|
||||
const auto it = data.attr_map.begin();
|
||||
const u32 comp_stride = std::next(it)->first - it->first;
|
||||
data.output_vertices = comp_stride / 64;
|
||||
}
|
||||
|
||||
return data;
|
||||
|
@ -3,8 +3,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <span>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
@ -12,8 +12,9 @@
|
||||
namespace Shader {
|
||||
|
||||
struct CopyShaderData {
|
||||
std::unordered_map<u32, std::pair<Shader::IR::Attribute, u32>> attr_map;
|
||||
std::map<u32, std::pair<Shader::IR::Attribute, u32>> attr_map;
|
||||
u32 num_attrs{0};
|
||||
u32 output_vertices{0};
|
||||
};
|
||||
|
||||
CopyShaderData ParseCopyShader(std::span<const u32> code);
|
||||
|
@ -605,11 +605,12 @@ public:
|
||||
Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_)
|
||||
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_},
|
||||
syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_},
|
||||
runtime_info{runtime_info_}, profile{profile_} {
|
||||
runtime_info{runtime_info_}, profile{profile_},
|
||||
translator{info_, runtime_info_, profile_} {
|
||||
Visit(root_stmt, nullptr, nullptr);
|
||||
|
||||
IR::Block& first_block{*syntax_list.front().data.block};
|
||||
Translator{&first_block, info, runtime_info, profile}.EmitPrologue();
|
||||
IR::Block* first_block = syntax_list.front().data.block;
|
||||
translator.EmitPrologue(first_block);
|
||||
}
|
||||
|
||||
private:
|
||||
@ -637,8 +638,8 @@ private:
|
||||
current_block->has_multiple_predecessors = stmt.block->num_predecessors > 1;
|
||||
const u32 start = stmt.block->begin_index;
|
||||
const u32 size = stmt.block->end_index - start + 1;
|
||||
Translate(current_block, stmt.block->begin, inst_list.subspan(start, size),
|
||||
info, runtime_info, profile);
|
||||
translator.Translate(current_block, stmt.block->begin,
|
||||
inst_list.subspan(start, size));
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -820,6 +821,7 @@ private:
|
||||
Info& info;
|
||||
const RuntimeInfo& runtime_info;
|
||||
const Profile& profile;
|
||||
Translator translator;
|
||||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
|
@ -13,6 +13,8 @@ void Translator::EmitDataShare(const GcnInst& inst) {
|
||||
// DS
|
||||
case Opcode::DS_ADD_U32:
|
||||
return DS_ADD_U32(inst, false);
|
||||
case Opcode::DS_ADD_U64:
|
||||
return DS_ADD_U64(inst, false);
|
||||
case Opcode::DS_SUB_U32:
|
||||
return DS_SUB_U32(inst, false);
|
||||
case Opcode::DS_INC_U32:
|
||||
@ -61,10 +63,14 @@ void Translator::EmitDataShare(const GcnInst& inst) {
|
||||
return DS_READ(32, false, true, false, inst);
|
||||
case Opcode::DS_READ2ST64_B32:
|
||||
return DS_READ(32, false, true, true, inst);
|
||||
case Opcode::DS_READ_U16:
|
||||
return DS_READ(16, false, false, false, inst);
|
||||
case Opcode::DS_CONSUME:
|
||||
return DS_CONSUME(inst);
|
||||
case Opcode::DS_APPEND:
|
||||
return DS_APPEND(inst);
|
||||
case Opcode::DS_WRITE_B16:
|
||||
return DS_WRITE(16, false, false, false, inst);
|
||||
case Opcode::DS_WRITE_B64:
|
||||
return DS_WRITE(64, false, false, false, inst);
|
||||
case Opcode::DS_WRITE2_B64:
|
||||
@ -123,6 +129,18 @@ void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) {
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_ADD_U64(const GcnInst& inst, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U64 data{GetSrc64(inst.src[1])};
|
||||
const IR::U32 offset =
|
||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
|
||||
if (rtn) {
|
||||
SetDst64(inst.dst[0], IR::U64{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
@ -201,23 +219,28 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid
|
||||
if (bit_size == 32) {
|
||||
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
|
||||
} else {
|
||||
ir.WriteShared(
|
||||
64, ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)),
|
||||
addr0);
|
||||
ir.WriteShared(64,
|
||||
ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data0),
|
||||
ir.GetVectorReg(data0 + 1))),
|
||||
addr0);
|
||||
}
|
||||
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
|
||||
if (bit_size == 32) {
|
||||
ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
|
||||
} else {
|
||||
ir.WriteShared(
|
||||
64, ir.CompositeConstruct(ir.GetVectorReg(data1), ir.GetVectorReg(data1 + 1)),
|
||||
addr1);
|
||||
ir.WriteShared(64,
|
||||
ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data1),
|
||||
ir.GetVectorReg(data1 + 1))),
|
||||
addr1);
|
||||
}
|
||||
} else if (bit_size == 64) {
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
||||
const IR::Value data =
|
||||
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
|
||||
ir.WriteShared(bit_size, data, addr0);
|
||||
ir.WriteShared(bit_size, ir.PackUint2x32(data), addr0);
|
||||
} else if (bit_size == 16) {
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
||||
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
|
||||
} else {
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
||||
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
|
||||
@ -289,22 +312,29 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride
|
||||
if (bit_size == 32) {
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{data0});
|
||||
} else {
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 0)});
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 1)});
|
||||
const auto vector = ir.UnpackUint2x32(IR::U64{data0});
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)});
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 1)});
|
||||
}
|
||||
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
|
||||
const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1);
|
||||
if (bit_size == 32) {
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{data1});
|
||||
} else {
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 0)});
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 1)});
|
||||
const auto vector = ir.UnpackUint2x32(IR::U64{data1});
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)});
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 1)});
|
||||
}
|
||||
} else if (bit_size == 64) {
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
||||
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0);
|
||||
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)});
|
||||
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(data, 1)});
|
||||
const auto vector = ir.UnpackUint2x32(IR::U64{data});
|
||||
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(vector, 0)});
|
||||
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(vector, 1)});
|
||||
} else if (bit_size == 16) {
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
||||
const IR::U16 data = IR::U16{ir.LoadShared(bit_size, is_signed, addr0)};
|
||||
ir.SetVectorReg(dst_reg, ir.UConvert(32, data));
|
||||
} else {
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
||||
const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr0)};
|
||||
|
@ -26,8 +26,11 @@ void Translator::ExportMrtValue(IR::Attribute attribute, u32 comp, const IR::F32
|
||||
}
|
||||
|
||||
void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value) {
|
||||
const u32 color_buffer_idx =
|
||||
u32 color_buffer_idx =
|
||||
static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0);
|
||||
if (runtime_info.fs_info.dual_source_blending && attribute == IR::Attribute::RenderTarget1) {
|
||||
color_buffer_idx = 0;
|
||||
}
|
||||
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
|
||||
|
||||
AmdGpu::NumberFormat num_format;
|
||||
@ -68,8 +71,11 @@ void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR:
|
||||
}
|
||||
|
||||
void Translator::ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) {
|
||||
const u32 color_buffer_idx =
|
||||
u32 color_buffer_idx =
|
||||
static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0);
|
||||
if (runtime_info.fs_info.dual_source_blending && attribute == IR::Attribute::RenderTarget1) {
|
||||
color_buffer_idx = 0;
|
||||
}
|
||||
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
|
||||
const auto swizzled_comp = SwizzleMrtComponent(color_buffer, comp);
|
||||
|
||||
|
@ -114,6 +114,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
|
||||
return S_FF1_I32_B64(inst);
|
||||
case Opcode::S_FLBIT_I32_B32:
|
||||
return S_FLBIT_I32_B32(inst);
|
||||
case Opcode::S_FLBIT_I32_B64:
|
||||
return S_FLBIT_I32_B64(inst);
|
||||
case Opcode::S_BITSET0_B32:
|
||||
return S_BITSET_B32(inst, 0);
|
||||
case Opcode::S_BITSET1_B32:
|
||||
@ -686,6 +688,17 @@ void Translator::S_FLBIT_I32_B32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], IR::U32{ir.Select(cond, pos_from_left, ir.Imm32(~0U))});
|
||||
}
|
||||
|
||||
void Translator::S_FLBIT_I32_B64(const GcnInst& inst) {
|
||||
const IR::U64 src0{GetSrc64(inst.src[0])};
|
||||
// Gcn wants the MSB position counting from the left, but SPIR-V counts from the rightmost (LSB)
|
||||
// position
|
||||
const IR::U32 msb_pos = ir.FindUMsb(src0);
|
||||
const IR::U32 pos_from_left = ir.ISub(ir.Imm32(63), msb_pos);
|
||||
// Select 0xFFFFFFFF if src0 was 0
|
||||
const IR::U1 cond = ir.INotEqual(src0, ir.Imm64(u64(0u)));
|
||||
SetDst(inst.dst[0], IR::U32{ir.Select(cond, pos_from_left, ir.Imm32(~0U))});
|
||||
}
|
||||
|
||||
void Translator::S_BITSET_B32(const GcnInst& inst, u32 bit_value) {
|
||||
const IR::U32 old_value{GetSrc(inst.dst[0])};
|
||||
const IR::U32 offset{ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0U), ir.Imm32(5U))};
|
||||
|
@ -21,16 +21,60 @@
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
static u32 next_vgpr_num;
|
||||
static std::unordered_map<u32, IR::VectorReg> vgpr_map;
|
||||
|
||||
Translator::Translator(IR::Block* block_, Info& info_, const RuntimeInfo& runtime_info_,
|
||||
const Profile& profile_)
|
||||
: ir{*block_, block_->begin()}, info{info_}, runtime_info{runtime_info_}, profile{profile_} {
|
||||
next_vgpr_num = vgpr_map.empty() ? runtime_info.num_allocated_vgprs : next_vgpr_num;
|
||||
Translator::Translator(Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_)
|
||||
: info{info_}, runtime_info{runtime_info_}, profile{profile_},
|
||||
next_vgpr_num{runtime_info.num_allocated_vgprs} {
|
||||
if (info.l_stage == LogicalStage::Fragment) {
|
||||
dst_frag_vreg = GatherInterpQualifiers();
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::EmitPrologue() {
|
||||
IR::VectorReg Translator::GatherInterpQualifiers() {
|
||||
u32 dst_vreg{};
|
||||
if (runtime_info.fs_info.addr_flags.persp_sample_ena) {
|
||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveSample; // I
|
||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveSample; // J
|
||||
info.has_perspective_interp = true;
|
||||
}
|
||||
if (runtime_info.fs_info.addr_flags.persp_center_ena) {
|
||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCenter; // I
|
||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCenter; // J
|
||||
info.has_perspective_interp = true;
|
||||
}
|
||||
if (runtime_info.fs_info.addr_flags.persp_centroid_ena) {
|
||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCentroid; // I
|
||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCentroid; // J
|
||||
info.has_perspective_interp = true;
|
||||
}
|
||||
if (runtime_info.fs_info.addr_flags.persp_pull_model_ena) {
|
||||
++dst_vreg; // I/W
|
||||
++dst_vreg; // J/W
|
||||
++dst_vreg; // 1/W
|
||||
}
|
||||
if (runtime_info.fs_info.addr_flags.linear_sample_ena) {
|
||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearSample; // I
|
||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearSample; // J
|
||||
info.has_linear_interp = true;
|
||||
}
|
||||
if (runtime_info.fs_info.addr_flags.linear_center_ena) {
|
||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCenter; // I
|
||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCenter; // J
|
||||
info.has_linear_interp = true;
|
||||
}
|
||||
if (runtime_info.fs_info.addr_flags.linear_centroid_ena) {
|
||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCentroid; // I
|
||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCentroid; // J
|
||||
info.has_linear_interp = true;
|
||||
}
|
||||
if (runtime_info.fs_info.addr_flags.line_stipple_tex_ena) {
|
||||
++dst_vreg;
|
||||
}
|
||||
return IR::VectorReg(dst_vreg);
|
||||
}
|
||||
|
||||
void Translator::EmitPrologue(IR::Block* first_block) {
|
||||
ir = IR::IREmitter(*first_block, first_block->begin());
|
||||
|
||||
ir.Prologue();
|
||||
ir.SetExec(ir.Imm1(true));
|
||||
|
||||
@ -60,39 +104,7 @@ void Translator::EmitPrologue() {
|
||||
}
|
||||
break;
|
||||
case LogicalStage::Fragment:
|
||||
dst_vreg = IR::VectorReg::V0;
|
||||
if (runtime_info.fs_info.addr_flags.persp_sample_ena) {
|
||||
++dst_vreg; // I
|
||||
++dst_vreg; // J
|
||||
}
|
||||
if (runtime_info.fs_info.addr_flags.persp_center_ena) {
|
||||
++dst_vreg; // I
|
||||
++dst_vreg; // J
|
||||
}
|
||||
if (runtime_info.fs_info.addr_flags.persp_centroid_ena) {
|
||||
++dst_vreg; // I
|
||||
++dst_vreg; // J
|
||||
}
|
||||
if (runtime_info.fs_info.addr_flags.persp_pull_model_ena) {
|
||||
++dst_vreg; // I/W
|
||||
++dst_vreg; // J/W
|
||||
++dst_vreg; // 1/W
|
||||
}
|
||||
if (runtime_info.fs_info.addr_flags.linear_sample_ena) {
|
||||
++dst_vreg; // I
|
||||
++dst_vreg; // J
|
||||
}
|
||||
if (runtime_info.fs_info.addr_flags.linear_center_ena) {
|
||||
++dst_vreg; // I
|
||||
++dst_vreg; // J
|
||||
}
|
||||
if (runtime_info.fs_info.addr_flags.linear_centroid_ena) {
|
||||
++dst_vreg; // I
|
||||
++dst_vreg; // J
|
||||
}
|
||||
if (runtime_info.fs_info.addr_flags.line_stipple_tex_ena) {
|
||||
++dst_vreg;
|
||||
}
|
||||
dst_vreg = dst_frag_vreg;
|
||||
if (runtime_info.fs_info.addr_flags.pos_x_float_ena) {
|
||||
if (runtime_info.fs_info.en_flags.pos_x_float_ena) {
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 0));
|
||||
@ -543,6 +555,26 @@ void Translator::LogMissingOpcode(const GcnInst& inst) {
|
||||
info.translation_failed = true;
|
||||
}
|
||||
|
||||
void Translator::Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list) {
|
||||
if (inst_list.empty()) {
|
||||
return;
|
||||
}
|
||||
ir = IR::IREmitter{*block, block->begin()};
|
||||
for (const auto& inst : inst_list) {
|
||||
pc += inst.length;
|
||||
|
||||
// Special case for emitting fetch shader.
|
||||
if (inst.opcode == Opcode::S_SWAPPC_B64) {
|
||||
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export ||
|
||||
info.stage == Stage::Local);
|
||||
EmitFetch(inst);
|
||||
continue;
|
||||
}
|
||||
|
||||
TranslateInstruction(inst, pc);
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::TranslateInstruction(const GcnInst& inst, const u32 pc) {
|
||||
// Emit instructions for each category.
|
||||
switch (inst.category) {
|
||||
@ -577,25 +609,4 @@ void Translator::TranslateInstruction(const GcnInst& inst, const u32 pc) {
|
||||
}
|
||||
}
|
||||
|
||||
void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Info& info,
|
||||
const RuntimeInfo& runtime_info, const Profile& profile) {
|
||||
if (inst_list.empty()) {
|
||||
return;
|
||||
}
|
||||
Translator translator{block, info, runtime_info, profile};
|
||||
for (const auto& inst : inst_list) {
|
||||
pc += inst.length;
|
||||
|
||||
// Special case for emitting fetch shader.
|
||||
if (inst.opcode == Opcode::S_SWAPPC_B64) {
|
||||
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export ||
|
||||
info.stage == Stage::Local);
|
||||
translator.EmitFetch(inst);
|
||||
continue;
|
||||
}
|
||||
|
||||
translator.TranslateInstruction(inst, pc);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
@ -4,6 +4,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include <unordered_map>
|
||||
#include "shader_recompiler/frontend/instruction.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
@ -53,15 +54,17 @@ enum class NegateMode : u32 {
|
||||
Result,
|
||||
};
|
||||
|
||||
static constexpr size_t MaxInterpVgpr = 16;
|
||||
|
||||
class Translator {
|
||||
public:
|
||||
explicit Translator(IR::Block* block_, Info& info, const RuntimeInfo& runtime_info,
|
||||
const Profile& profile);
|
||||
explicit Translator(Info& info, const RuntimeInfo& runtime_info, const Profile& profile);
|
||||
|
||||
void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list);
|
||||
void TranslateInstruction(const GcnInst& inst, u32 pc);
|
||||
|
||||
// Instruction categories
|
||||
void EmitPrologue();
|
||||
void EmitPrologue(IR::Block* first_block);
|
||||
void EmitFetch(const GcnInst& inst);
|
||||
void EmitExport(const GcnInst& inst);
|
||||
void EmitFlowControl(u32 pc, const GcnInst& inst);
|
||||
@ -121,6 +124,7 @@ public:
|
||||
void S_FF1_I32_B32(const GcnInst& inst);
|
||||
void S_FF1_I32_B64(const GcnInst& inst);
|
||||
void S_FLBIT_I32_B32(const GcnInst& inst);
|
||||
void S_FLBIT_I32_B64(const GcnInst& inst);
|
||||
void S_BITSET_B32(const GcnInst& inst, u32 bit_value);
|
||||
void S_GETPC_B64(u32 pc, const GcnInst& inst);
|
||||
void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst);
|
||||
@ -204,6 +208,7 @@ public:
|
||||
void V_EXP_F32(const GcnInst& inst);
|
||||
void V_LOG_F32(const GcnInst& inst);
|
||||
void V_RCP_F32(const GcnInst& inst);
|
||||
void V_RCP_LEGACY_F32(const GcnInst& inst);
|
||||
void V_RCP_F64(const GcnInst& inst);
|
||||
void V_RSQ_F32(const GcnInst& inst);
|
||||
void V_SQRT_F32(const GcnInst& inst);
|
||||
@ -266,6 +271,7 @@ public:
|
||||
// Data share
|
||||
// DS
|
||||
void DS_ADD_U32(const GcnInst& inst, bool rtn);
|
||||
void DS_ADD_U64(const GcnInst& inst, bool rtn);
|
||||
void DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn);
|
||||
void DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn);
|
||||
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
|
||||
@ -324,16 +330,18 @@ private:
|
||||
void LogMissingOpcode(const GcnInst& inst);
|
||||
|
||||
IR::VectorReg GetScratchVgpr(u32 offset);
|
||||
IR::VectorReg GatherInterpQualifiers();
|
||||
|
||||
private:
|
||||
IR::IREmitter ir;
|
||||
Info& info;
|
||||
const RuntimeInfo& runtime_info;
|
||||
const Profile& profile;
|
||||
u32 next_vgpr_num;
|
||||
std::unordered_map<u32, IR::VectorReg> vgpr_map;
|
||||
std::array<IR::Interpolation, MaxInterpVgpr> vgpr_to_interp{};
|
||||
IR::VectorReg dst_frag_vreg{};
|
||||
bool opcode_missing = false;
|
||||
};
|
||||
|
||||
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info,
|
||||
const RuntimeInfo& runtime_info, const Profile& profile);
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
@ -158,6 +158,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
||||
return V_LOG_F32(inst);
|
||||
case Opcode::V_RCP_F32:
|
||||
return V_RCP_F32(inst);
|
||||
case Opcode::V_RCP_LEGACY_F32:
|
||||
return V_RCP_LEGACY_F32(inst);
|
||||
case Opcode::V_RCP_F64:
|
||||
return V_RCP_F64(inst);
|
||||
case Opcode::V_RCP_IFLAG_F32:
|
||||
@ -798,6 +800,20 @@ void Translator::V_RCP_F32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], ir.FPRecip(src0));
|
||||
}
|
||||
|
||||
void Translator::V_RCP_LEGACY_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
const auto result = ir.FPRecip(src0);
|
||||
const auto inf = ir.FPIsInf(result);
|
||||
|
||||
const auto raw_result = ir.ConvertFToU(32, result);
|
||||
const auto sign_bit = ir.ShiftRightLogical(raw_result, ir.Imm32(31u));
|
||||
const auto sign_bit_set = ir.INotEqual(sign_bit, ir.Imm32(0u));
|
||||
const IR::F32 inf_result{ir.Select(sign_bit_set, ir.Imm32(-0.0f), ir.Imm32(0.0f))};
|
||||
const IR::F32 val{ir.Select(inf, inf_result, result)};
|
||||
|
||||
SetDst(inst.dst[0], val);
|
||||
}
|
||||
|
||||
void Translator::V_RCP_F64(const GcnInst& inst) {
|
||||
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
|
||||
SetDst64(inst.dst[0], ir.FPRecip(src0));
|
||||
|
@ -22,13 +22,14 @@ void Translator::EmitVectorInterpolation(const GcnInst& inst) {
|
||||
// VINTRP
|
||||
|
||||
void Translator::V_INTERP_P2_F32(const GcnInst& inst) {
|
||||
auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr);
|
||||
const auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr);
|
||||
info.interp_qualifiers[attr.param_index] = vgpr_to_interp[inst.src[0].code];
|
||||
const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index};
|
||||
SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan));
|
||||
}
|
||||
|
||||
void Translator::V_INTERP_MOV_F32(const GcnInst& inst) {
|
||||
auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr);
|
||||
const auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr);
|
||||
const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index};
|
||||
SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan));
|
||||
}
|
||||
|
@ -70,6 +70,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
||||
return BUFFER_ATOMIC(AtomicOp::Add, inst);
|
||||
case Opcode::BUFFER_ATOMIC_SWAP:
|
||||
return BUFFER_ATOMIC(AtomicOp::Swap, inst);
|
||||
case Opcode::BUFFER_ATOMIC_CMPSWAP:
|
||||
return BUFFER_ATOMIC(AtomicOp::CmpSwap, inst);
|
||||
case Opcode::BUFFER_ATOMIC_SMIN:
|
||||
return BUFFER_ATOMIC(AtomicOp::Smin, inst);
|
||||
case Opcode::BUFFER_ATOMIC_UMIN:
|
||||
@ -331,6 +333,10 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
|
||||
switch (op) {
|
||||
case AtomicOp::Swap:
|
||||
return ir.BufferAtomicSwap(handle, address, vdata_val, buffer_info);
|
||||
case AtomicOp::CmpSwap: {
|
||||
const IR::Value cmp_val = ir.GetVectorReg(vdata + 1);
|
||||
return ir.BufferAtomicCmpSwap(handle, address, vdata_val, cmp_val, buffer_info);
|
||||
}
|
||||
case AtomicOp::Add:
|
||||
return ir.BufferAtomicIAdd(handle, address, vdata_val, buffer_info);
|
||||
case AtomicOp::Smin:
|
||||
|
@ -193,6 +193,8 @@ struct Info {
|
||||
PersistentSrtInfo srt_info;
|
||||
std::vector<u32> flattened_ud_buf;
|
||||
|
||||
std::array<IR::Interpolation, 32> interp_qualifiers{};
|
||||
|
||||
IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max;
|
||||
s32 tess_consts_dword_offset = -1;
|
||||
|
||||
@ -206,6 +208,8 @@ struct Info {
|
||||
bool has_discard{};
|
||||
bool has_image_gather{};
|
||||
bool has_image_query{};
|
||||
bool has_perspective_interp{};
|
||||
bool has_linear_interp{};
|
||||
bool uses_atomic_float_min_max{};
|
||||
bool uses_lane_id{};
|
||||
bool uses_group_quad{};
|
||||
|
@ -83,6 +83,16 @@ enum class Attribute : u64 {
|
||||
Max,
|
||||
};
|
||||
|
||||
enum class Interpolation {
|
||||
Invalid = 0,
|
||||
PerspectiveSample = 1,
|
||||
PerspectiveCenter = 2,
|
||||
PerspectiveCentroid = 3,
|
||||
LinearSample = 4,
|
||||
LinearCenter = 5,
|
||||
LinearCentroid = 6,
|
||||
};
|
||||
|
||||
constexpr size_t NumAttributes = static_cast<size_t>(Attribute::Max);
|
||||
constexpr size_t NumRenderTargets = 8;
|
||||
constexpr size_t NumParams = 32;
|
||||
@ -104,6 +114,15 @@ constexpr bool IsMrt(Attribute attribute) noexcept {
|
||||
return attribute >= Attribute::RenderTarget0 && attribute <= Attribute::RenderTarget7;
|
||||
}
|
||||
|
||||
constexpr bool IsLinear(Interpolation interp) noexcept {
|
||||
return interp >= Interpolation::LinearSample && interp <= Interpolation::LinearCentroid;
|
||||
}
|
||||
|
||||
constexpr bool IsPerspective(Interpolation interp) noexcept {
|
||||
return interp >= Interpolation::PerspectiveSample &&
|
||||
interp <= Interpolation::PerspectiveCentroid;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::string NameOf(Attribute attribute);
|
||||
|
||||
[[nodiscard]] constexpr Attribute operator+(Attribute attr, int num) {
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <array>
|
||||
#include <bit>
|
||||
#include <source_location>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include "common/assert.h"
|
||||
@ -294,10 +293,12 @@ void IREmitter::SetPatch(Patch patch, const F32& value) {
|
||||
|
||||
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
||||
switch (bit_size) {
|
||||
case 16:
|
||||
return Inst<U16>(Opcode::LoadSharedU16, offset);
|
||||
case 32:
|
||||
return Inst<U32>(Opcode::LoadSharedU32, offset);
|
||||
case 64:
|
||||
return Inst(Opcode::LoadSharedU64, offset);
|
||||
return Inst<U64>(Opcode::LoadSharedU64, offset);
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
|
||||
}
|
||||
@ -305,6 +306,9 @@ Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
||||
|
||||
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) {
|
||||
switch (bit_size) {
|
||||
case 16:
|
||||
Inst(Opcode::WriteSharedU16, offset, value);
|
||||
break;
|
||||
case 32:
|
||||
Inst(Opcode::WriteSharedU32, offset, value);
|
||||
break;
|
||||
@ -316,10 +320,12 @@ void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset)
|
||||
}
|
||||
}
|
||||
|
||||
U32F32 IREmitter::SharedAtomicIAdd(const U32& address, const U32F32& data) {
|
||||
U32U64 IREmitter::SharedAtomicIAdd(const U32& address, const U32U64& data) {
|
||||
switch (data.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::SharedAtomicIAdd32, address, data);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::SharedAtomicIAdd64, address, data);
|
||||
default:
|
||||
ThrowInvalidType(data.Type());
|
||||
}
|
||||
@ -513,6 +519,11 @@ Value IREmitter::BufferAtomicSwap(const Value& handle, const Value& address, con
|
||||
return Inst(Opcode::BufferAtomicSwap32, Flags{info}, handle, address, value);
|
||||
}
|
||||
|
||||
Value IREmitter::BufferAtomicCmpSwap(const Value& handle, const Value& address, const Value& vdata,
|
||||
const Value& cmp_value, BufferInstInfo info) {
|
||||
return Inst(Opcode::BufferAtomicCmpSwap32, Flags{info}, handle, address, vdata, cmp_value);
|
||||
}
|
||||
|
||||
U32 IREmitter::DataAppend(const U32& counter) {
|
||||
return Inst<U32>(Opcode::DataAppend, counter, Imm32(0));
|
||||
}
|
||||
@ -1546,8 +1557,15 @@ U32 IREmitter::FindSMsb(const U32& value) {
|
||||
return Inst<U32>(Opcode::FindSMsb32, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::FindUMsb(const U32& value) {
|
||||
return Inst<U32>(Opcode::FindUMsb32, value);
|
||||
U32 IREmitter::FindUMsb(const U32U64& value) {
|
||||
switch (value.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::FindUMsb32, value);
|
||||
case Type::U64:
|
||||
return Inst<U32>(Opcode::FindUMsb64, value);
|
||||
default:
|
||||
ThrowInvalidType(value.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::FindILsb(const U32U64& value) {
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <cstring>
|
||||
#include <type_traits>
|
||||
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/condition.h"
|
||||
@ -17,6 +16,7 @@ namespace Shader::IR {
|
||||
|
||||
class IREmitter {
|
||||
public:
|
||||
explicit IREmitter() = default;
|
||||
explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {}
|
||||
explicit IREmitter(Block& block_, Block::iterator insertion_point_)
|
||||
: block{&block_}, insertion_point{insertion_point_} {}
|
||||
@ -99,7 +99,7 @@ public:
|
||||
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
|
||||
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
||||
|
||||
[[nodiscard]] U32F32 SharedAtomicIAdd(const U32& address, const U32F32& data);
|
||||
[[nodiscard]] U32U64 SharedAtomicIAdd(const U32& address, const U32U64& data);
|
||||
[[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed);
|
||||
[[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed);
|
||||
[[nodiscard]] U32 SharedAtomicAnd(const U32& address, const U32& data);
|
||||
@ -150,6 +150,9 @@ public:
|
||||
const Value& value, BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicSwap(const Value& handle, const Value& address,
|
||||
const Value& value, BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicCmpSwap(const Value& handle, const Value& address,
|
||||
const Value& value, const Value& cmp_value,
|
||||
BufferInstInfo info);
|
||||
|
||||
[[nodiscard]] U32 DataAppend(const U32& counter);
|
||||
[[nodiscard]] U32 DataConsume(const U32& counter);
|
||||
@ -266,7 +269,7 @@ public:
|
||||
[[nodiscard]] U32 BitwiseNot(const U32& value);
|
||||
|
||||
[[nodiscard]] U32 FindSMsb(const U32& value);
|
||||
[[nodiscard]] U32 FindUMsb(const U32& value);
|
||||
[[nodiscard]] U32 FindUMsb(const U32U64& value);
|
||||
[[nodiscard]] U32 FindILsb(const U32U64& value);
|
||||
[[nodiscard]] U32 SMin(const U32& a, const U32& b);
|
||||
[[nodiscard]] U32 UMin(const U32& a, const U32& b);
|
||||
|
@ -30,13 +30,16 @@ OPCODE(EmitVertex, Void,
|
||||
OPCODE(EmitPrimitive, Void, )
|
||||
|
||||
// Shared memory operations
|
||||
OPCODE(LoadSharedU16, U16, U32, )
|
||||
OPCODE(LoadSharedU32, U32, U32, )
|
||||
OPCODE(LoadSharedU64, U32x2, U32, )
|
||||
OPCODE(LoadSharedU64, U64, U32, )
|
||||
OPCODE(WriteSharedU16, Void, U32, U16, )
|
||||
OPCODE(WriteSharedU32, Void, U32, U32, )
|
||||
OPCODE(WriteSharedU64, Void, U32, U32x2, )
|
||||
OPCODE(WriteSharedU64, Void, U32, U64, )
|
||||
|
||||
// Shared atomic operations
|
||||
OPCODE(SharedAtomicIAdd32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicIAdd64, U64, U32, U64, )
|
||||
OPCODE(SharedAtomicSMin32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicUMin32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicSMax32, U32, U32, U32, )
|
||||
@ -116,6 +119,7 @@ OPCODE(StoreBufferFormatF32, Void, Opaq
|
||||
|
||||
// Buffer atomic operations
|
||||
OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicIAdd64, U64, Opaque, Opaque, U64 )
|
||||
OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 )
|
||||
@ -126,6 +130,7 @@ OPCODE(BufferAtomicAnd32, U32, Opaq
|
||||
OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicSwap32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicCmpSwap32, U32, Opaque, Opaque, U32, U32, )
|
||||
|
||||
// Vector utility
|
||||
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
|
||||
@ -349,6 +354,7 @@ OPCODE(BitwiseNot32, U32, U32,
|
||||
|
||||
OPCODE(FindSMsb32, U32, U32, )
|
||||
OPCODE(FindUMsb32, U32, U32, )
|
||||
OPCODE(FindUMsb64, U32, U64, )
|
||||
OPCODE(FindILsb32, U32, U32, )
|
||||
OPCODE(FindILsb64, U32, U64, )
|
||||
OPCODE(SMin32, U32, U32, U32, )
|
||||
|
@ -10,6 +10,8 @@
|
||||
#include "common/io_file.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/path_util.h"
|
||||
#include "common/signal_context.h"
|
||||
#include "core/signals.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||
#include "shader_recompiler/ir/opcodes.h"
|
||||
@ -24,6 +26,7 @@
|
||||
using namespace Xbyak::util;
|
||||
|
||||
static Xbyak::CodeGenerator g_srt_codegen(32_MB);
|
||||
static const u8* g_srt_codegen_start = nullptr;
|
||||
|
||||
namespace {
|
||||
|
||||
@ -54,6 +57,57 @@ static void DumpSrtProgram(const Shader::Info& info, const u8* code, size_t code
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool SrtWalkerSignalHandler(void* context, void* fault_address) {
|
||||
// Only handle if the fault address is within the SRT code range
|
||||
const u8* code_start = g_srt_codegen_start;
|
||||
const u8* code_end = code_start + g_srt_codegen.getSize();
|
||||
const void* code = Common::GetRip(context);
|
||||
if (code < code_start || code >= code_end) {
|
||||
return false; // Not in SRT code range
|
||||
}
|
||||
|
||||
// Patch instruction to zero register
|
||||
ZydisDecodedInstruction instruction;
|
||||
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
|
||||
ZyanStatus status = Common::Decoder::Instance()->decodeInstruction(instruction, operands,
|
||||
const_cast<void*>(code), 15);
|
||||
|
||||
ASSERT(ZYAN_SUCCESS(status) && instruction.mnemonic == ZYDIS_MNEMONIC_MOV &&
|
||||
operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER &&
|
||||
operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY);
|
||||
|
||||
size_t len = instruction.length;
|
||||
const size_t patch_size = 3;
|
||||
u8* code_patch = const_cast<u8*>(reinterpret_cast<const u8*>(code));
|
||||
|
||||
// We can only encounter rdi or r10d as the first operand in a
|
||||
// fault memory access for SRT walker.
|
||||
switch (operands[0].reg.value) {
|
||||
case ZYDIS_REGISTER_RDI:
|
||||
// mov rdi, [rdi + (off_dw << 2)] -> xor rdi, rdi
|
||||
code_patch[0] = 0x48;
|
||||
code_patch[1] = 0x31;
|
||||
code_patch[2] = 0xFF;
|
||||
break;
|
||||
case ZYDIS_REGISTER_R10D:
|
||||
// mov r10d, [rdi + (off_dw << 2)] -> xor r10d, r10d
|
||||
code_patch[0] = 0x45;
|
||||
code_patch[1] = 0x31;
|
||||
code_patch[2] = 0xD2;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unsupported register for SRT walker patch");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Fill nops
|
||||
memset(code_patch + patch_size, 0x90, len - patch_size);
|
||||
|
||||
LOG_DEBUG(Render_Recompiler, "Patched SRT walker at {}", code);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
using namespace Shader;
|
||||
|
||||
struct PassInfo {
|
||||
@ -141,6 +195,15 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Register the signal handler for SRT walker, if not already registered
|
||||
if (g_srt_codegen_start == nullptr) {
|
||||
g_srt_codegen_start = c.getCurr();
|
||||
auto* signals = Core::Signals::Instance();
|
||||
// Call after the memory invalidation handler
|
||||
constexpr u32 priority = 1;
|
||||
signals->RegisterAccessViolationHandler(SrtWalkerSignalHandler, priority);
|
||||
}
|
||||
|
||||
info.srt_info.walker_func = c.getCurr<PFN_SrtWalker>();
|
||||
|
||||
pass_info.dst_off_dw = NumUserDataRegs;
|
||||
|
@ -15,7 +15,7 @@ struct FormatInfo {
|
||||
AmdGpu::NumberFormat num_format;
|
||||
AmdGpu::CompMapping swizzle;
|
||||
AmdGpu::NumberConversion num_conversion;
|
||||
int num_components;
|
||||
u32 num_components;
|
||||
};
|
||||
|
||||
static bool IsBufferFormatLoad(const IR::Inst& inst) {
|
||||
|
@ -1,6 +1,7 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <unordered_map>
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
@ -39,11 +39,13 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
||||
ASSERT(addr->Arg(1).IsImmediate());
|
||||
offset = addr->Arg(1).U32();
|
||||
}
|
||||
IR::Value data = inst.Arg(1).Resolve();
|
||||
IR::Value data = is_composite ? ir.UnpackUint2x32(IR::U64{inst.Arg(1).Resolve()})
|
||||
: inst.Arg(1).Resolve();
|
||||
for (s32 i = 0; i < num_components; i++) {
|
||||
const auto attrib = IR::Attribute::Param0 + (offset / 16);
|
||||
const auto comp = (offset / 4) % 4;
|
||||
const IR::U32 value = IR::U32{is_composite ? data.Inst()->Arg(i) : data};
|
||||
const IR::U32 value =
|
||||
IR::U32{is_composite ? ir.CompositeExtract(data, i) : data};
|
||||
ir.SetAttribute(attrib, ir.BitCast<IR::F32, IR::U32>(value), comp);
|
||||
offset += 4;
|
||||
}
|
||||
@ -91,6 +93,19 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
||||
const auto& gs_info = runtime_info.gs_info;
|
||||
info.gs_copy_data = Shader::ParseCopyShader(gs_info.vs_copy);
|
||||
|
||||
u32 output_vertices = gs_info.output_vertices;
|
||||
if (info.gs_copy_data.output_vertices &&
|
||||
info.gs_copy_data.output_vertices != output_vertices) {
|
||||
ASSERT_MSG(output_vertices > info.gs_copy_data.output_vertices &&
|
||||
gs_info.mode == AmdGpu::Liverpool::GsMode::Mode::ScenarioG,
|
||||
"Invalid geometry shader vertex configuration scenario = {}, max_vert_out = "
|
||||
"{}, output_vertices = {}",
|
||||
u32(gs_info.mode), output_vertices, info.gs_copy_data.output_vertices);
|
||||
LOG_WARNING(Render_Vulkan, "MAX_VERT_OUT {} is larger than actual output vertices {}",
|
||||
output_vertices, info.gs_copy_data.output_vertices);
|
||||
output_vertices = info.gs_copy_data.output_vertices;
|
||||
}
|
||||
|
||||
ForEachInstruction([&](IR::IREmitter& ir, IR::Inst& inst) {
|
||||
const auto opcode = inst.GetOpcode();
|
||||
switch (opcode) {
|
||||
@ -122,7 +137,7 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
||||
|
||||
const auto offset = inst.Flags<IR::BufferInstInfo>().inst_offset.Value();
|
||||
const auto data = ir.BitCast<IR::F32>(IR::U32{inst.Arg(2)});
|
||||
const auto comp_ofs = gs_info.output_vertices * 4u;
|
||||
const auto comp_ofs = output_vertices * 4u;
|
||||
const auto output_size = comp_ofs * gs_info.out_vertex_data_size;
|
||||
|
||||
const auto vc_read_ofs = (((offset / comp_ofs) * comp_ofs) % output_size) * 16u;
|
||||
|
@ -34,8 +34,10 @@ void Visit(Info& info, const IR::Inst& inst) {
|
||||
info.uses_patches |= 1U << IR::GenericPatchIndex(patch);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::LoadSharedU16:
|
||||
case IR::Opcode::LoadSharedU32:
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::WriteSharedU16:
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
info.uses_shared = true;
|
||||
|
@ -16,6 +16,7 @@ static bool IsSharedAccess(const IR::Inst& inst) {
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
case IR::Opcode::SharedAtomicAnd32:
|
||||
case IR::Opcode::SharedAtomicIAdd32:
|
||||
case IR::Opcode::SharedAtomicIAdd64:
|
||||
case IR::Opcode::SharedAtomicOr32:
|
||||
case IR::Opcode::SharedAtomicSMax32:
|
||||
case IR::Opcode::SharedAtomicUMax32:
|
||||
@ -33,9 +34,11 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
|
||||
if (program.info.stage != Stage::Compute) {
|
||||
return;
|
||||
}
|
||||
// Only perform the transform if the host shared memory is insufficient.
|
||||
// Only perform the transform if the host shared memory is insufficient
|
||||
// or the device does not support VK_KHR_workgroup_memory_explicit_layout
|
||||
const u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
|
||||
if (shared_memory_size <= profile.max_shared_memory_size) {
|
||||
if (shared_memory_size <= profile.max_shared_memory_size &&
|
||||
profile.supports_workgroup_explicit_memory_layout) {
|
||||
return;
|
||||
}
|
||||
// Add buffer binding for shared memory storage buffer.
|
||||
@ -60,6 +63,7 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
|
||||
ir.BufferAtomicAnd(handle, inst.Arg(0), inst.Arg(1), {}));
|
||||
continue;
|
||||
case IR::Opcode::SharedAtomicIAdd32:
|
||||
case IR::Opcode::SharedAtomicIAdd64:
|
||||
inst.ReplaceUsesWithAndRemove(
|
||||
ir.BufferAtomicIAdd(handle, inst.Arg(0), inst.Arg(1), {}));
|
||||
continue;
|
||||
@ -93,12 +97,19 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
|
||||
ir.Imm32(shared_memory_size));
|
||||
const IR::U32 address = ir.IAdd(IR::U32{inst.Arg(0)}, offset);
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadSharedU16:
|
||||
inst.ReplaceUsesWithAndRemove(ir.LoadBufferU16(handle, address, {}));
|
||||
break;
|
||||
case IR::Opcode::LoadSharedU32:
|
||||
inst.ReplaceUsesWithAndRemove(ir.LoadBufferU32(1, handle, address, {}));
|
||||
break;
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
inst.ReplaceUsesWithAndRemove(ir.LoadBufferU32(2, handle, address, {}));
|
||||
break;
|
||||
case IR::Opcode::WriteSharedU16:
|
||||
ir.StoreBufferU16(handle, address, IR::U32{inst.Arg(1)}, {});
|
||||
inst.Invalidate();
|
||||
break;
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
ir.StoreBufferU32(1, handle, address, inst.Arg(1), {});
|
||||
inst.Invalidate();
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include "common/bit_field.h"
|
||||
#include "common/enum.h"
|
||||
#include "common/types.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
#include "video_core/amdgpu/pixel_format.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
|
@ -23,13 +23,13 @@ struct Profile {
|
||||
bool support_fp32_denorm_preserve{};
|
||||
bool support_fp32_denorm_flush{};
|
||||
bool support_fp32_round_to_zero{};
|
||||
bool support_explicit_workgroup_layout{};
|
||||
bool support_legacy_vertex_attributes{};
|
||||
bool supports_image_load_store_lod{};
|
||||
bool supports_native_cube_calc{};
|
||||
bool supports_trinary_minmax{};
|
||||
bool supports_robust_buffer_access{};
|
||||
bool supports_image_fp32_atomic_min_max{};
|
||||
bool supports_workgroup_explicit_memory_layout{};
|
||||
bool has_broken_spirv_clamp{};
|
||||
bool lower_left_origin_mode{};
|
||||
bool needs_manual_interpolation{};
|
||||
|
@ -149,6 +149,7 @@ struct GeometryRuntimeInfo {
|
||||
u32 out_vertex_data_size{};
|
||||
AmdGpu::PrimitiveType in_primitive;
|
||||
GsOutputPrimTypes out_primitive;
|
||||
AmdGpu::Liverpool::GsMode::Mode mode;
|
||||
std::span<const u32> vs_copy;
|
||||
u64 vs_copy_hash;
|
||||
|
||||
@ -196,11 +197,13 @@ struct FragmentRuntimeInfo {
|
||||
u32 num_inputs;
|
||||
std::array<PsInput, 32> inputs;
|
||||
std::array<PsColorBuffer, MaxColorBuffers> color_buffers;
|
||||
bool dual_source_blending;
|
||||
|
||||
bool operator==(const FragmentRuntimeInfo& other) const noexcept {
|
||||
return std::ranges::equal(color_buffers, other.color_buffers) &&
|
||||
en_flags.raw == other.en_flags.raw && addr_flags.raw == other.addr_flags.raw &&
|
||||
num_inputs == other.num_inputs &&
|
||||
dual_source_blending == other.dual_source_blending &&
|
||||
std::ranges::equal(inputs.begin(), inputs.begin() + num_inputs, other.inputs.begin(),
|
||||
other.inputs.begin() + num_inputs);
|
||||
}
|
||||
|
@ -228,9 +228,12 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
const u32 type = header->type;
|
||||
|
||||
switch (type) {
|
||||
default:
|
||||
UNREACHABLE_MSG("Wrong PM4 type {}", type);
|
||||
break;
|
||||
case 0:
|
||||
case 1:
|
||||
UNREACHABLE_MSG("Unsupported PM4 type {}", type);
|
||||
UNREACHABLE_MSG("Unimplemented PM4 type 0, base reg: {}, size: {}",
|
||||
header->type0.base.Value(), header->type0.NumWords());
|
||||
break;
|
||||
case 2:
|
||||
// Type-2 packet are used for padding purposes
|
||||
@ -394,7 +397,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::SetPredication: {
|
||||
LOG_WARNING(Render_Vulkan, "Unimplemented IT_SET_PREDICATION");
|
||||
LOG_WARNING(Render, "Unimplemented IT_SET_PREDICATION");
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::IndexType: {
|
||||
@ -586,8 +589,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
}
|
||||
case PM4ItOpcode::EventWrite: {
|
||||
const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
|
||||
LOG_DEBUG(Render_Vulkan,
|
||||
"Encountered EventWrite: event_type = {}, event_index = {}",
|
||||
LOG_DEBUG(Render, "Encountered EventWrite: event_type = {}, event_index = {}",
|
||||
magic_enum::enum_name(event->event_type.Value()),
|
||||
magic_enum::enum_name(event->event_index.Value()));
|
||||
if (event->event_type.Value() == EventType::SoVgtStreamoutFlush) {
|
||||
@ -673,6 +675,16 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::CopyData: {
|
||||
const auto* copy_data = reinterpret_cast<const PM4CmdCopyData*>(header);
|
||||
LOG_WARNING(Render,
|
||||
"unhandled IT_COPY_DATA src_sel = {}, dst_sel = {}, "
|
||||
"count_sel = {}, wr_confirm = {}, engine_sel = {}",
|
||||
u32(copy_data->src_sel.Value()), u32(copy_data->dst_sel.Value()),
|
||||
copy_data->count_sel.Value(), copy_data->wr_confirm.Value(),
|
||||
u32(copy_data->engine_sel.Value()));
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::MemSemaphore: {
|
||||
const auto* mem_semaphore = reinterpret_cast<const PM4CmdMemSemaphore*>(header);
|
||||
if (mem_semaphore->IsSignaling()) {
|
||||
@ -756,6 +768,19 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
LOG_WARNING(Render_Vulkan, "Unimplemented IT_GET_LOD_STATS");
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::CondExec: {
|
||||
const auto* cond_exec = reinterpret_cast<const PM4CmdCondExec*>(header);
|
||||
if (cond_exec->command.Value() != 0) {
|
||||
LOG_WARNING(Render, "IT_COND_EXEC used a reserved command");
|
||||
}
|
||||
const auto skip = *cond_exec->Address() == false;
|
||||
if (skip) {
|
||||
dcb = NextPacket(dcb,
|
||||
header->type3.NumWords() + 1 + cond_exec->exec_count.Value());
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}",
|
||||
static_cast<u32>(opcode), count);
|
||||
@ -804,6 +829,19 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq
|
||||
break;
|
||||
}
|
||||
|
||||
if (header->type == 2) {
|
||||
// Type-2 packet are used for padding purposes
|
||||
next_dw_off = 1;
|
||||
acb += next_dw_off;
|
||||
acb_dwords -= next_dw_off;
|
||||
|
||||
if constexpr (!is_indirect) {
|
||||
*queue.read_addr += next_dw_off;
|
||||
*queue.read_addr %= queue.ring_size_dw;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (header->type != 3) {
|
||||
// No other types of packets were spotted so far
|
||||
UNREACHABLE_MSG("Invalid PM4 type {}", header->type.Value());
|
||||
|
@ -914,7 +914,7 @@ struct Liverpool {
|
||||
}
|
||||
|
||||
size_t GetColorSliceSize() const {
|
||||
const auto num_bytes_per_element = NumBits(info.format) / 8u;
|
||||
const auto num_bytes_per_element = NumBitsPerBlock(info.format) / 8u;
|
||||
const auto slice_size =
|
||||
num_bytes_per_element * (slice.tile_max + 1) * 64u * NumSamples();
|
||||
return slice_size;
|
||||
@ -1179,8 +1179,16 @@ struct Liverpool {
|
||||
};
|
||||
|
||||
union GsMode {
|
||||
enum class Mode : u32 {
|
||||
Off = 0,
|
||||
ScenarioA = 1,
|
||||
ScenarioB = 2,
|
||||
ScenarioG = 3,
|
||||
ScenarioC = 4,
|
||||
};
|
||||
|
||||
u32 raw;
|
||||
BitField<0, 3, u32> mode;
|
||||
BitField<0, 3, Mode> mode;
|
||||
BitField<3, 2, u32> cut_mode;
|
||||
BitField<22, 2, u32> onchip;
|
||||
};
|
||||
|
@ -111,136 +111,106 @@ std::string_view NameOf(NumberFormat fmt) {
|
||||
}
|
||||
}
|
||||
|
||||
int NumComponents(DataFormat format) {
|
||||
constexpr std::array num_components_per_element = {
|
||||
0, 1, 1, 2, 1, 2, 3, 3, 4, 4, 4, 2, 4, 3, 4, -1, 3, 4, 4, 4, 2,
|
||||
2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 3, 3, 4, 4, 4, 1, 2, 3, 4,
|
||||
-1, -1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 1, 1};
|
||||
|
||||
const u32 index = static_cast<u32>(format);
|
||||
if (index >= num_components_per_element.size()) {
|
||||
return 0;
|
||||
}
|
||||
return num_components_per_element[index];
|
||||
}
|
||||
|
||||
int NumBits(DataFormat format) {
|
||||
const std::array num_bits_per_element = {
|
||||
0, 8, 16, 16, 32, 32, 32, 32, 32, 32, 32, 64, 64, 96, 128, -1, 16, 16, 16, 16, 32,
|
||||
32, 64, -1, -1, -1, -1, -1, -1, -1, -1, -1, 16, 16, 32, 4, 8, 8, 4, 8, 8, 8,
|
||||
-1, -1, 8, 8, 8, 8, 8, 8, 16, 16, 32, 32, 32, 64, 64, 8, 16, 1, 1};
|
||||
|
||||
const u32 index = static_cast<u32>(format);
|
||||
if (index >= num_bits_per_element.size()) {
|
||||
return 0;
|
||||
}
|
||||
return num_bits_per_element[index];
|
||||
}
|
||||
|
||||
static constexpr std::array component_bits = {
|
||||
std::array{0, 0, 0, 0}, // 0 FormatInvalid
|
||||
std::array{8, 0, 0, 0}, // 1 Format8
|
||||
std::array{16, 0, 0, 0}, // 2 Format16
|
||||
std::array{8, 8, 0, 0}, // 3 Format8_8
|
||||
std::array{32, 0, 0, 0}, // 4 Format32
|
||||
std::array{16, 16, 0, 0}, // 5 Format16_16
|
||||
std::array{11, 11, 10, 0}, // 6 Format10_11_11
|
||||
std::array{10, 11, 11, 0}, // 7 Format11_11_10
|
||||
std::array{2, 10, 10, 10}, // 8 Format10_10_10_2
|
||||
std::array{10, 10, 10, 2}, // 9 Format2_10_10_10
|
||||
std::array{8, 8, 8, 8}, // 10 Format8_8_8_8
|
||||
std::array{32, 32, 0, 0}, // 11 Format32_32
|
||||
std::array{16, 16, 16, 16}, // 12 Format16_16_16_16
|
||||
std::array{32, 32, 32, 0}, // 13 Format32_32_32
|
||||
std::array{32, 32, 32, 32}, // 14 Format32_32_32_32
|
||||
std::array{0, 0, 0, 0}, // 15
|
||||
std::array{5, 6, 5, 0}, // 16 Format5_6_5
|
||||
std::array{5, 5, 5, 1}, // 17 Format1_5_5_5
|
||||
std::array{1, 5, 5, 5}, // 18 Format5_5_5_1
|
||||
std::array{4, 4, 4, 4}, // 19 Format4_4_4_4
|
||||
std::array{24, 8, 0, 0}, // 20 Format8_24
|
||||
std::array{8, 24, 0, 0}, // 21 Format24_8
|
||||
std::array{8, 24, 0, 0}, // 22 FormatX24_8_32
|
||||
std::array{0, 0, 0, 0}, // 23
|
||||
std::array{0, 0, 0, 0}, // 24
|
||||
std::array{0, 0, 0, 0}, // 25
|
||||
std::array{0, 0, 0, 0}, // 26
|
||||
std::array{0, 0, 0, 0}, // 27
|
||||
std::array{0, 0, 0, 0}, // 28
|
||||
std::array{0, 0, 0, 0}, // 29
|
||||
std::array{0, 0, 0, 0}, // 30
|
||||
std::array{0, 0, 0, 0}, // 31
|
||||
std::array{0, 0, 0, 0}, // 32 FormatGB_GR
|
||||
std::array{0, 0, 0, 0}, // 33 FormatBG_RG
|
||||
std::array{0, 0, 0, 0}, // 34 Format5_9_9_9
|
||||
std::array{0, 0, 0, 0}, // 35 FormatBc1
|
||||
std::array{0, 0, 0, 0}, // 36 FormatBc2
|
||||
std::array{0, 0, 0, 0}, // 37 FormatBc3
|
||||
std::array{0, 0, 0, 0}, // 38 FormatBc4
|
||||
std::array{0, 0, 0, 0}, // 39 FormatBc5
|
||||
std::array{0, 0, 0, 0}, // 40 FormatBc6
|
||||
std::array{0, 0, 0, 0}, // 41 FormatBc7
|
||||
static constexpr std::array NUM_COMPONENTS = {
|
||||
0, // 0 FormatInvalid
|
||||
1, // 1 Format8
|
||||
1, // 2 Format16
|
||||
2, // 3 Format8_8
|
||||
1, // 4 Format32
|
||||
2, // 5 Format16_16
|
||||
3, // 6 Format10_11_11
|
||||
3, // 7 Format11_11_10
|
||||
4, // 8 Format10_10_10_2
|
||||
4, // 9 Format2_10_10_10
|
||||
4, // 10 Format8_8_8_8
|
||||
2, // 11 Format32_32
|
||||
4, // 12 Format16_16_16_16
|
||||
3, // 13 Format32_32_32
|
||||
4, // 14 Format32_32_32_32
|
||||
0, // 15
|
||||
3, // 16 Format5_6_5
|
||||
4, // 17 Format1_5_5_5
|
||||
4, // 18 Format5_5_5_1
|
||||
4, // 19 Format4_4_4_4
|
||||
2, // 20 Format8_24
|
||||
2, // 21 Format24_8
|
||||
2, // 22 FormatX24_8_32
|
||||
0, // 23
|
||||
0, // 24
|
||||
0, // 25
|
||||
0, // 26
|
||||
0, // 27
|
||||
0, // 28
|
||||
0, // 29
|
||||
0, // 30
|
||||
0, // 31
|
||||
3, // 32 FormatGB_GR
|
||||
3, // 33 FormatBG_RG
|
||||
4, // 34 Format5_9_9_9
|
||||
4, // 35 FormatBc1
|
||||
4, // 36 FormatBc2
|
||||
4, // 37 FormatBc3
|
||||
1, // 38 FormatBc4
|
||||
2, // 39 FormatBc5
|
||||
3, // 40 FormatBc6
|
||||
4, // 41 FormatBc7
|
||||
};
|
||||
|
||||
u32 ComponentBits(DataFormat format, u32 comp) {
|
||||
u32 NumComponents(DataFormat format) {
|
||||
const u32 index = static_cast<u32>(format);
|
||||
if (index >= component_bits.size() || comp >= 4) {
|
||||
return 0;
|
||||
}
|
||||
return component_bits[index][comp];
|
||||
ASSERT_MSG(index < NUM_COMPONENTS.size(), "Invalid data format = {}", format);
|
||||
return NUM_COMPONENTS[index];
|
||||
}
|
||||
|
||||
static constexpr std::array component_offset = {
|
||||
std::array{-1, -1, -1, -1}, // 0 FormatInvalid
|
||||
std::array{0, -1, -1, -1}, // 1 Format8
|
||||
std::array{0, -1, -1, -1}, // 2 Format16
|
||||
std::array{0, 8, -1, -1}, // 3 Format8_8
|
||||
std::array{0, -1, -1, -1}, // 4 Format32
|
||||
std::array{0, 16, -1, -1}, // 5 Format16_16
|
||||
std::array{0, 11, 22, -1}, // 6 Format10_11_11
|
||||
std::array{0, 10, 21, -1}, // 7 Format11_11_10
|
||||
std::array{0, 2, 12, 22}, // 8 Format10_10_10_2
|
||||
std::array{0, 10, 20, 30}, // 9 Format2_10_10_10
|
||||
std::array{0, 8, 16, 24}, // 10 Format8_8_8_8
|
||||
std::array{0, 32, -1, -1}, // 11 Format32_32
|
||||
std::array{0, 16, 32, 48}, // 12 Format16_16_16_16
|
||||
std::array{0, 32, 64, -1}, // 13 Format32_32_32
|
||||
std::array{0, 32, 64, 96}, // 14 Format32_32_32_32
|
||||
std::array{-1, -1, -1, -1}, // 15
|
||||
std::array{0, 5, 11, -1}, // 16 Format5_6_5
|
||||
std::array{0, 5, 10, 15}, // 17 Format1_5_5_5
|
||||
std::array{0, 1, 6, 11}, // 18 Format5_5_5_1
|
||||
std::array{0, 4, 8, 12}, // 19 Format4_4_4_4
|
||||
std::array{0, 24, -1, -1}, // 20 Format8_24
|
||||
std::array{0, 8, -1, -1}, // 21 Format24_8
|
||||
std::array{0, 8, -1, -1}, // 22 FormatX24_8_32
|
||||
std::array{-1, -1, -1, -1}, // 23
|
||||
std::array{-1, -1, -1, -1}, // 24
|
||||
std::array{-1, -1, -1, -1}, // 25
|
||||
std::array{-1, -1, -1, -1}, // 26
|
||||
std::array{-1, -1, -1, -1}, // 27
|
||||
std::array{-1, -1, -1, -1}, // 28
|
||||
std::array{-1, -1, -1, -1}, // 29
|
||||
std::array{-1, -1, -1, -1}, // 30
|
||||
std::array{-1, -1, -1, -1}, // 31
|
||||
std::array{-1, -1, -1, -1}, // 32 FormatGB_GR
|
||||
std::array{-1, -1, -1, -1}, // 33 FormatBG_RG
|
||||
std::array{-1, -1, -1, -1}, // 34 Format5_9_9_9
|
||||
std::array{-1, -1, -1, -1}, // 35 FormatBc1
|
||||
std::array{-1, -1, -1, -1}, // 36 FormatBc2
|
||||
std::array{-1, -1, -1, -1}, // 37 FormatBc3
|
||||
std::array{-1, -1, -1, -1}, // 38 FormatBc4
|
||||
std::array{-1, -1, -1, -1}, // 39 FormatBc5
|
||||
std::array{-1, -1, -1, -1}, // 40 FormatBc6
|
||||
std::array{-1, -1, -1, -1}, // 41 FormatBc7
|
||||
static constexpr std::array BITS_PER_BLOCK = {
|
||||
0, // 0 FormatInvalid
|
||||
8, // 1 Format8
|
||||
16, // 2 Format16
|
||||
16, // 3 Format8_8
|
||||
32, // 4 Format32
|
||||
32, // 5 Format16_16
|
||||
32, // 6 Format10_11_11
|
||||
32, // 7 Format11_11_10
|
||||
32, // 8 Format10_10_10_2
|
||||
32, // 9 Format2_10_10_10
|
||||
32, // 10 Format8_8_8_8
|
||||
64, // 11 Format32_32
|
||||
64, // 12 Format16_16_16_16
|
||||
96, // 13 Format32_32_32
|
||||
128, // 14 Format32_32_32_32
|
||||
0, // 15
|
||||
16, // 16 Format5_6_5
|
||||
16, // 17 Format1_5_5_5
|
||||
16, // 18 Format5_5_5_1
|
||||
16, // 19 Format4_4_4_4
|
||||
32, // 20 Format8_24
|
||||
32, // 21 Format24_8
|
||||
64, // 22 FormatX24_8_32
|
||||
0, // 23
|
||||
0, // 24
|
||||
0, // 25
|
||||
0, // 26
|
||||
0, // 27
|
||||
0, // 28
|
||||
0, // 29
|
||||
0, // 30
|
||||
0, // 31
|
||||
16, // 32 FormatGB_GR
|
||||
16, // 33 FormatBG_RG
|
||||
32, // 34 Format5_9_9_9
|
||||
64, // 35 FormatBc1
|
||||
128, // 36 FormatBc2
|
||||
128, // 37 FormatBc3
|
||||
64, // 38 FormatBc4
|
||||
128, // 39 FormatBc5
|
||||
128, // 40 FormatBc6
|
||||
128, // 41 FormatBc7
|
||||
};
|
||||
|
||||
s32 ComponentOffset(DataFormat format, u32 comp) {
|
||||
u32 NumBitsPerBlock(DataFormat format) {
|
||||
const u32 index = static_cast<u32>(format);
|
||||
if (index >= component_offset.size() || comp >= 4) {
|
||||
return -1;
|
||||
}
|
||||
return component_offset[index][comp];
|
||||
ASSERT_MSG(index < BITS_PER_BLOCK.size(), "Invalid data format = {}", format);
|
||||
return BITS_PER_BLOCK[index];
|
||||
}
|
||||
|
||||
} // namespace AmdGpu
|
||||
|
@ -5,39 +5,313 @@
|
||||
|
||||
#include <string_view>
|
||||
#include <fmt/format.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/types.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
|
||||
enum NumberClass {
|
||||
// Table 8.13 Data and Image Formats [Sea Islands Series Instruction Set Architecture]
|
||||
enum class DataFormat : u32 {
|
||||
FormatInvalid = 0,
|
||||
Format8 = 1,
|
||||
Format16 = 2,
|
||||
Format8_8 = 3,
|
||||
Format32 = 4,
|
||||
Format16_16 = 5,
|
||||
Format10_11_11 = 6,
|
||||
Format11_11_10 = 7,
|
||||
Format10_10_10_2 = 8,
|
||||
Format2_10_10_10 = 9,
|
||||
Format8_8_8_8 = 10,
|
||||
Format32_32 = 11,
|
||||
Format16_16_16_16 = 12,
|
||||
Format32_32_32 = 13,
|
||||
Format32_32_32_32 = 14,
|
||||
Format5_6_5 = 16,
|
||||
Format1_5_5_5 = 17,
|
||||
Format5_5_5_1 = 18,
|
||||
Format4_4_4_4 = 19,
|
||||
Format8_24 = 20,
|
||||
Format24_8 = 21,
|
||||
FormatX24_8_32 = 22,
|
||||
FormatGB_GR = 32,
|
||||
FormatBG_RG = 33,
|
||||
Format5_9_9_9 = 34,
|
||||
FormatBc1 = 35,
|
||||
FormatBc2 = 36,
|
||||
FormatBc3 = 37,
|
||||
FormatBc4 = 38,
|
||||
FormatBc5 = 39,
|
||||
FormatBc6 = 40,
|
||||
FormatBc7 = 41,
|
||||
FormatFmask8_1 = 47,
|
||||
FormatFmask8_2 = 48,
|
||||
FormatFmask8_4 = 49,
|
||||
FormatFmask16_1 = 50,
|
||||
FormatFmask16_2 = 51,
|
||||
FormatFmask32_2 = 52,
|
||||
FormatFmask32_4 = 53,
|
||||
FormatFmask32_8 = 54,
|
||||
FormatFmask64_4 = 55,
|
||||
FormatFmask64_8 = 56,
|
||||
Format4_4 = 57,
|
||||
Format6_5_5 = 58,
|
||||
Format1 = 59,
|
||||
Format1_Reversed = 60,
|
||||
Format32_As_8 = 61,
|
||||
Format32_As_8_8 = 62,
|
||||
Format32_As_32_32_32_32 = 63,
|
||||
};
|
||||
|
||||
enum class NumberFormat : u32 {
|
||||
Unorm = 0,
|
||||
Snorm = 1,
|
||||
Uscaled = 2,
|
||||
Sscaled = 3,
|
||||
Uint = 4,
|
||||
Sint = 5,
|
||||
SnormNz = 6,
|
||||
Float = 7,
|
||||
Srgb = 9,
|
||||
Ubnorm = 10,
|
||||
UbnormNz = 11,
|
||||
Ubint = 12,
|
||||
Ubscaled = 13,
|
||||
};
|
||||
|
||||
enum class NumberClass {
|
||||
Float,
|
||||
Sint,
|
||||
Uint,
|
||||
};
|
||||
|
||||
[[nodiscard]] constexpr NumberClass GetNumberClass(const NumberFormat nfmt) {
|
||||
switch (nfmt) {
|
||||
case NumberFormat::Sint:
|
||||
return Sint;
|
||||
case NumberFormat::Uint:
|
||||
return Uint;
|
||||
enum class CompSwizzle : u8 {
|
||||
Zero = 0,
|
||||
One = 1,
|
||||
Red = 4,
|
||||
Green = 5,
|
||||
Blue = 6,
|
||||
Alpha = 7,
|
||||
};
|
||||
|
||||
enum class NumberConversion : u32 {
|
||||
None = 0,
|
||||
UintToUscaled = 1,
|
||||
SintToSscaled = 2,
|
||||
UnormToUbnorm = 3,
|
||||
Sint8ToSnormNz = 4,
|
||||
Sint16ToSnormNz = 5,
|
||||
Uint32ToUnorm = 6,
|
||||
};
|
||||
|
||||
struct CompMapping {
|
||||
CompSwizzle r;
|
||||
CompSwizzle g;
|
||||
CompSwizzle b;
|
||||
CompSwizzle a;
|
||||
|
||||
auto operator<=>(const CompMapping& other) const = default;
|
||||
|
||||
template <typename T>
|
||||
[[nodiscard]] std::array<T, 4> Apply(const std::array<T, 4>& data) const {
|
||||
return {
|
||||
ApplySingle(data, r),
|
||||
ApplySingle(data, g),
|
||||
ApplySingle(data, b),
|
||||
ApplySingle(data, a),
|
||||
};
|
||||
}
|
||||
|
||||
[[nodiscard]] CompMapping Inverse() const {
|
||||
CompMapping result{};
|
||||
InverseSingle(result.r, CompSwizzle::Red);
|
||||
InverseSingle(result.g, CompSwizzle::Green);
|
||||
InverseSingle(result.b, CompSwizzle::Blue);
|
||||
InverseSingle(result.a, CompSwizzle::Alpha);
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
T ApplySingle(const std::array<T, 4>& data, const CompSwizzle swizzle) const {
|
||||
switch (swizzle) {
|
||||
case CompSwizzle::Zero:
|
||||
return T(0);
|
||||
case CompSwizzle::One:
|
||||
return T(1);
|
||||
case CompSwizzle::Red:
|
||||
return data[0];
|
||||
case CompSwizzle::Green:
|
||||
return data[1];
|
||||
case CompSwizzle::Blue:
|
||||
return data[2];
|
||||
case CompSwizzle::Alpha:
|
||||
return data[3];
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void InverseSingle(CompSwizzle& dst, const CompSwizzle target) const {
|
||||
if (r == target) {
|
||||
dst = CompSwizzle::Red;
|
||||
} else if (g == target) {
|
||||
dst = CompSwizzle::Green;
|
||||
} else if (b == target) {
|
||||
dst = CompSwizzle::Blue;
|
||||
} else if (a == target) {
|
||||
dst = CompSwizzle::Alpha;
|
||||
} else {
|
||||
dst = CompSwizzle::Zero;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static constexpr CompMapping IdentityMapping = {
|
||||
.r = CompSwizzle::Red,
|
||||
.g = CompSwizzle::Green,
|
||||
.b = CompSwizzle::Blue,
|
||||
.a = CompSwizzle::Alpha,
|
||||
};
|
||||
|
||||
constexpr DataFormat RemapDataFormat(const DataFormat format) {
|
||||
switch (format) {
|
||||
case DataFormat::Format11_11_10:
|
||||
return DataFormat::Format10_11_11;
|
||||
case DataFormat::Format10_10_10_2:
|
||||
return DataFormat::Format2_10_10_10;
|
||||
case DataFormat::Format5_5_5_1:
|
||||
return DataFormat::Format1_5_5_5;
|
||||
default:
|
||||
return Float;
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr bool IsInteger(const NumberFormat nfmt) {
|
||||
constexpr NumberFormat RemapNumberFormat(const NumberFormat format, const DataFormat data_format) {
|
||||
switch (format) {
|
||||
case NumberFormat::Unorm: {
|
||||
switch (data_format) {
|
||||
case DataFormat::Format32:
|
||||
case DataFormat::Format32_32:
|
||||
case DataFormat::Format32_32_32:
|
||||
case DataFormat::Format32_32_32_32:
|
||||
return NumberFormat::Uint;
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
}
|
||||
case NumberFormat::Uscaled:
|
||||
return NumberFormat::Uint;
|
||||
case NumberFormat::Sscaled:
|
||||
case NumberFormat::SnormNz:
|
||||
return NumberFormat::Sint;
|
||||
case NumberFormat::Ubnorm:
|
||||
return NumberFormat::Unorm;
|
||||
case NumberFormat::Float:
|
||||
if (data_format == DataFormat::Format8) {
|
||||
// Games may ask for 8-bit float when they want to access the stencil component
|
||||
// of a depth-stencil image. Change to unsigned int to match the stencil format.
|
||||
// This is also the closest approximation to pass the bits through unconverted.
|
||||
return NumberFormat::Uint;
|
||||
}
|
||||
[[fallthrough]];
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr CompMapping RemapSwizzle(const DataFormat format, const CompMapping swizzle) {
|
||||
switch (format) {
|
||||
case DataFormat::Format1_5_5_5:
|
||||
case DataFormat::Format11_11_10: {
|
||||
CompMapping result;
|
||||
result.r = swizzle.b;
|
||||
result.g = swizzle.g;
|
||||
result.b = swizzle.r;
|
||||
result.a = swizzle.a;
|
||||
return result;
|
||||
}
|
||||
case DataFormat::Format10_10_10_2: {
|
||||
CompMapping result;
|
||||
result.r = swizzle.a;
|
||||
result.g = swizzle.b;
|
||||
result.b = swizzle.g;
|
||||
result.a = swizzle.r;
|
||||
return result;
|
||||
}
|
||||
case DataFormat::Format4_4_4_4: {
|
||||
// Remap to a more supported component order.
|
||||
CompMapping result;
|
||||
result.r = swizzle.g;
|
||||
result.g = swizzle.b;
|
||||
result.b = swizzle.a;
|
||||
result.a = swizzle.r;
|
||||
return result;
|
||||
}
|
||||
default:
|
||||
return swizzle;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr NumberConversion MapNumberConversion(const NumberFormat num_fmt,
|
||||
const DataFormat data_fmt) {
|
||||
switch (num_fmt) {
|
||||
case NumberFormat::Unorm: {
|
||||
switch (data_fmt) {
|
||||
case DataFormat::Format32:
|
||||
case DataFormat::Format32_32:
|
||||
case DataFormat::Format32_32_32:
|
||||
case DataFormat::Format32_32_32_32:
|
||||
return NumberConversion::Uint32ToUnorm;
|
||||
default:
|
||||
return NumberConversion::None;
|
||||
}
|
||||
}
|
||||
case NumberFormat::Uscaled:
|
||||
return NumberConversion::UintToUscaled;
|
||||
case NumberFormat::Sscaled:
|
||||
return NumberConversion::SintToSscaled;
|
||||
case NumberFormat::Ubnorm:
|
||||
return NumberConversion::UnormToUbnorm;
|
||||
case NumberFormat::SnormNz: {
|
||||
switch (data_fmt) {
|
||||
case DataFormat::Format8:
|
||||
case DataFormat::Format8_8:
|
||||
case DataFormat::Format8_8_8_8:
|
||||
return NumberConversion::Sint8ToSnormNz;
|
||||
case DataFormat::Format16:
|
||||
case DataFormat::Format16_16:
|
||||
case DataFormat::Format16_16_16_16:
|
||||
return NumberConversion::Sint16ToSnormNz;
|
||||
default:
|
||||
UNREACHABLE_MSG("data_fmt = {}", u32(data_fmt));
|
||||
}
|
||||
}
|
||||
default:
|
||||
return NumberConversion::None;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr NumberClass GetNumberClass(const NumberFormat nfmt) {
|
||||
switch (nfmt) {
|
||||
case NumberFormat::Sint:
|
||||
return NumberClass::Sint;
|
||||
case NumberFormat::Uint:
|
||||
return NumberClass::Uint;
|
||||
default:
|
||||
return NumberClass::Float;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr bool IsInteger(const NumberFormat nfmt) {
|
||||
return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::string_view NameOf(DataFormat fmt);
|
||||
[[nodiscard]] std::string_view NameOf(NumberFormat fmt);
|
||||
std::string_view NameOf(DataFormat fmt);
|
||||
std::string_view NameOf(NumberFormat fmt);
|
||||
|
||||
int NumComponents(DataFormat format);
|
||||
int NumBits(DataFormat format);
|
||||
u32 ComponentBits(DataFormat format, u32 comp);
|
||||
s32 ComponentOffset(DataFormat format, u32 comp);
|
||||
u32 NumComponents(DataFormat format);
|
||||
u32 NumBitsPerBlock(DataFormat format);
|
||||
|
||||
} // namespace AmdGpu
|
||||
|
||||
|
@ -554,6 +554,61 @@ struct PM4DmaData {
|
||||
}
|
||||
};
|
||||
|
||||
enum class CopyDataSrc : u32 {
|
||||
MappedRegister = 0,
|
||||
Memory = 1,
|
||||
TCL2 = 2,
|
||||
Gds = 3,
|
||||
// Reserved = 4,
|
||||
Immediate = 5,
|
||||
Atomic = 6,
|
||||
GdsAtomic0 = 7,
|
||||
GdsAtomic1 = 8,
|
||||
GpuClock = 9,
|
||||
};
|
||||
|
||||
enum class CopyDataDst : u32 {
|
||||
MappedRegister = 0,
|
||||
MemorySync = 1,
|
||||
TCL2 = 2,
|
||||
Gds = 3,
|
||||
// Reserved = 4,
|
||||
MemoryAsync = 5,
|
||||
};
|
||||
|
||||
enum class CopyDataEngine : u32 {
|
||||
Me = 0,
|
||||
Pfp = 1,
|
||||
Ce = 2,
|
||||
// Reserved = 3
|
||||
};
|
||||
|
||||
struct PM4CmdCopyData {
|
||||
PM4Type3Header header;
|
||||
union {
|
||||
BitField<0, 4, CopyDataSrc> src_sel;
|
||||
BitField<8, 4, CopyDataDst> dst_sel;
|
||||
BitField<16, 1, u32> count_sel;
|
||||
BitField<20, 1, u32> wr_confirm;
|
||||
BitField<30, 2, CopyDataEngine> engine_sel;
|
||||
u32 control;
|
||||
};
|
||||
u32 src_addr_lo;
|
||||
u32 src_addr_hi;
|
||||
u32 dst_addr_lo;
|
||||
u32 dst_addr_hi;
|
||||
|
||||
template <typename T>
|
||||
T SrcAddress() const {
|
||||
return std::bit_cast<T>(src_addr_lo | u64(src_addr_hi) << 32);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T DstAddress() const {
|
||||
return std::bit_cast<T>(dst_addr_lo | u64(dst_addr_hi) << 32);
|
||||
}
|
||||
};
|
||||
|
||||
struct PM4CmdRewind {
|
||||
PM4Type3Header header;
|
||||
union {
|
||||
@ -1104,4 +1159,25 @@ struct PM4CmdMemSemaphore {
|
||||
}
|
||||
};
|
||||
|
||||
struct PM4CmdCondExec {
|
||||
PM4Type3Header header;
|
||||
union {
|
||||
BitField<2, 30, u32> bool_addr_lo; ///< low 32 address bits for the block in memory from
|
||||
///< where the CP will fetch the condition
|
||||
};
|
||||
union {
|
||||
BitField<0, 16, u32> bool_addr_hi; ///< high address bits for the condition
|
||||
BitField<28, 4, u32> command;
|
||||
};
|
||||
union {
|
||||
BitField<0, 14, u32> exec_count; ///< Number of DWords that the CP will skip
|
||||
///< if bool pointed to is zero
|
||||
};
|
||||
|
||||
bool* Address() const {
|
||||
return std::bit_cast<bool*>(u64(bool_addr_hi.Value()) << 32 | u64(bool_addr_lo.Value())
|
||||
<< 2);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace AmdGpu
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/types.h"
|
||||
#include "video_core/amdgpu/pixel_format.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
|
@ -5,7 +5,6 @@
|
||||
|
||||
#include <string_view>
|
||||
#include <fmt/format.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/types.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
@ -114,281 +113,6 @@ enum class GsOutputPrimitiveType : u32 {
|
||||
TriangleStrip = 2,
|
||||
};
|
||||
|
||||
// Table 8.13 Data and Image Formats [Sea Islands Series Instruction Set Architecture]
|
||||
enum class DataFormat : u32 {
|
||||
FormatInvalid = 0,
|
||||
Format8 = 1,
|
||||
Format16 = 2,
|
||||
Format8_8 = 3,
|
||||
Format32 = 4,
|
||||
Format16_16 = 5,
|
||||
Format10_11_11 = 6,
|
||||
Format11_11_10 = 7,
|
||||
Format10_10_10_2 = 8,
|
||||
Format2_10_10_10 = 9,
|
||||
Format8_8_8_8 = 10,
|
||||
Format32_32 = 11,
|
||||
Format16_16_16_16 = 12,
|
||||
Format32_32_32 = 13,
|
||||
Format32_32_32_32 = 14,
|
||||
Format5_6_5 = 16,
|
||||
Format1_5_5_5 = 17,
|
||||
Format5_5_5_1 = 18,
|
||||
Format4_4_4_4 = 19,
|
||||
Format8_24 = 20,
|
||||
Format24_8 = 21,
|
||||
FormatX24_8_32 = 22,
|
||||
FormatGB_GR = 32,
|
||||
FormatBG_RG = 33,
|
||||
Format5_9_9_9 = 34,
|
||||
FormatBc1 = 35,
|
||||
FormatBc2 = 36,
|
||||
FormatBc3 = 37,
|
||||
FormatBc4 = 38,
|
||||
FormatBc5 = 39,
|
||||
FormatBc6 = 40,
|
||||
FormatBc7 = 41,
|
||||
FormatFmask8_1 = 47,
|
||||
FormatFmask8_2 = 48,
|
||||
FormatFmask8_4 = 49,
|
||||
FormatFmask16_1 = 50,
|
||||
FormatFmask16_2 = 51,
|
||||
FormatFmask32_2 = 52,
|
||||
FormatFmask32_4 = 53,
|
||||
FormatFmask32_8 = 54,
|
||||
FormatFmask64_4 = 55,
|
||||
FormatFmask64_8 = 56,
|
||||
Format4_4 = 57,
|
||||
Format6_5_5 = 58,
|
||||
Format1 = 59,
|
||||
Format1_Reversed = 60,
|
||||
Format32_As_8 = 61,
|
||||
Format32_As_8_8 = 62,
|
||||
Format32_As_32_32_32_32 = 63,
|
||||
};
|
||||
|
||||
enum class NumberFormat : u32 {
|
||||
Unorm = 0,
|
||||
Snorm = 1,
|
||||
Uscaled = 2,
|
||||
Sscaled = 3,
|
||||
Uint = 4,
|
||||
Sint = 5,
|
||||
SnormNz = 6,
|
||||
Float = 7,
|
||||
Srgb = 9,
|
||||
Ubnorm = 10,
|
||||
UbnormNz = 11,
|
||||
Ubint = 12,
|
||||
Ubscaled = 13,
|
||||
};
|
||||
|
||||
enum class CompSwizzle : u8 {
|
||||
Zero = 0,
|
||||
One = 1,
|
||||
Red = 4,
|
||||
Green = 5,
|
||||
Blue = 6,
|
||||
Alpha = 7,
|
||||
};
|
||||
|
||||
enum class NumberConversion : u32 {
|
||||
None = 0,
|
||||
UintToUscaled = 1,
|
||||
SintToSscaled = 2,
|
||||
UnormToUbnorm = 3,
|
||||
Sint8ToSnormNz = 4,
|
||||
Sint16ToSnormNz = 5,
|
||||
Uint32ToUnorm = 6,
|
||||
};
|
||||
|
||||
struct CompMapping {
|
||||
CompSwizzle r;
|
||||
CompSwizzle g;
|
||||
CompSwizzle b;
|
||||
CompSwizzle a;
|
||||
|
||||
auto operator<=>(const CompMapping& other) const = default;
|
||||
|
||||
template <typename T>
|
||||
[[nodiscard]] std::array<T, 4> Apply(const std::array<T, 4>& data) const {
|
||||
return {
|
||||
ApplySingle(data, r),
|
||||
ApplySingle(data, g),
|
||||
ApplySingle(data, b),
|
||||
ApplySingle(data, a),
|
||||
};
|
||||
}
|
||||
|
||||
[[nodiscard]] CompMapping Inverse() const {
|
||||
CompMapping result{};
|
||||
InverseSingle(result.r, CompSwizzle::Red);
|
||||
InverseSingle(result.g, CompSwizzle::Green);
|
||||
InverseSingle(result.b, CompSwizzle::Blue);
|
||||
InverseSingle(result.a, CompSwizzle::Alpha);
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
T ApplySingle(const std::array<T, 4>& data, const CompSwizzle swizzle) const {
|
||||
switch (swizzle) {
|
||||
case CompSwizzle::Zero:
|
||||
return T(0);
|
||||
case CompSwizzle::One:
|
||||
return T(1);
|
||||
case CompSwizzle::Red:
|
||||
return data[0];
|
||||
case CompSwizzle::Green:
|
||||
return data[1];
|
||||
case CompSwizzle::Blue:
|
||||
return data[2];
|
||||
case CompSwizzle::Alpha:
|
||||
return data[3];
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void InverseSingle(CompSwizzle& dst, const CompSwizzle target) const {
|
||||
if (r == target) {
|
||||
dst = CompSwizzle::Red;
|
||||
} else if (g == target) {
|
||||
dst = CompSwizzle::Green;
|
||||
} else if (b == target) {
|
||||
dst = CompSwizzle::Blue;
|
||||
} else if (a == target) {
|
||||
dst = CompSwizzle::Alpha;
|
||||
} else {
|
||||
dst = CompSwizzle::Zero;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static constexpr CompMapping IdentityMapping = {
|
||||
.r = CompSwizzle::Red,
|
||||
.g = CompSwizzle::Green,
|
||||
.b = CompSwizzle::Blue,
|
||||
.a = CompSwizzle::Alpha,
|
||||
};
|
||||
|
||||
inline DataFormat RemapDataFormat(const DataFormat format) {
|
||||
switch (format) {
|
||||
case DataFormat::Format11_11_10:
|
||||
return DataFormat::Format10_11_11;
|
||||
case DataFormat::Format10_10_10_2:
|
||||
return DataFormat::Format2_10_10_10;
|
||||
case DataFormat::Format5_5_5_1:
|
||||
return DataFormat::Format1_5_5_5;
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
inline NumberFormat RemapNumberFormat(const NumberFormat format, const DataFormat data_format) {
|
||||
switch (format) {
|
||||
case NumberFormat::Unorm: {
|
||||
switch (data_format) {
|
||||
case DataFormat::Format32:
|
||||
case DataFormat::Format32_32:
|
||||
case DataFormat::Format32_32_32:
|
||||
case DataFormat::Format32_32_32_32:
|
||||
return NumberFormat::Uint;
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
}
|
||||
case NumberFormat::Uscaled:
|
||||
return NumberFormat::Uint;
|
||||
case NumberFormat::Sscaled:
|
||||
case NumberFormat::SnormNz:
|
||||
return NumberFormat::Sint;
|
||||
case NumberFormat::Ubnorm:
|
||||
return NumberFormat::Unorm;
|
||||
case NumberFormat::Float:
|
||||
if (data_format == DataFormat::Format8) {
|
||||
// Games may ask for 8-bit float when they want to access the stencil component
|
||||
// of a depth-stencil image. Change to unsigned int to match the stencil format.
|
||||
// This is also the closest approximation to pass the bits through unconverted.
|
||||
return NumberFormat::Uint;
|
||||
}
|
||||
[[fallthrough]];
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
inline CompMapping RemapSwizzle(const DataFormat format, const CompMapping swizzle) {
|
||||
switch (format) {
|
||||
case DataFormat::Format1_5_5_5:
|
||||
case DataFormat::Format11_11_10: {
|
||||
CompMapping result;
|
||||
result.r = swizzle.b;
|
||||
result.g = swizzle.g;
|
||||
result.b = swizzle.r;
|
||||
result.a = swizzle.a;
|
||||
return result;
|
||||
}
|
||||
case DataFormat::Format10_10_10_2: {
|
||||
CompMapping result;
|
||||
result.r = swizzle.a;
|
||||
result.g = swizzle.b;
|
||||
result.b = swizzle.g;
|
||||
result.a = swizzle.r;
|
||||
return result;
|
||||
}
|
||||
case DataFormat::Format4_4_4_4: {
|
||||
// Remap to a more supported component order.
|
||||
CompMapping result;
|
||||
result.r = swizzle.g;
|
||||
result.g = swizzle.b;
|
||||
result.b = swizzle.a;
|
||||
result.a = swizzle.r;
|
||||
return result;
|
||||
}
|
||||
default:
|
||||
return swizzle;
|
||||
}
|
||||
}
|
||||
|
||||
inline NumberConversion MapNumberConversion(const NumberFormat num_fmt, const DataFormat data_fmt) {
|
||||
switch (num_fmt) {
|
||||
case NumberFormat::Unorm: {
|
||||
switch (data_fmt) {
|
||||
case DataFormat::Format32:
|
||||
case DataFormat::Format32_32:
|
||||
case DataFormat::Format32_32_32:
|
||||
case DataFormat::Format32_32_32_32:
|
||||
return NumberConversion::Uint32ToUnorm;
|
||||
default:
|
||||
return NumberConversion::None;
|
||||
}
|
||||
}
|
||||
case NumberFormat::Uscaled:
|
||||
return NumberConversion::UintToUscaled;
|
||||
case NumberFormat::Sscaled:
|
||||
return NumberConversion::SintToSscaled;
|
||||
case NumberFormat::Ubnorm:
|
||||
return NumberConversion::UnormToUbnorm;
|
||||
case NumberFormat::SnormNz: {
|
||||
switch (data_fmt) {
|
||||
case DataFormat::Format8:
|
||||
case DataFormat::Format8_8:
|
||||
case DataFormat::Format8_8_8_8:
|
||||
return NumberConversion::Sint8ToSnormNz;
|
||||
case DataFormat::Format16:
|
||||
case DataFormat::Format16_16:
|
||||
case DataFormat::Format16_16_16_16:
|
||||
return NumberConversion::Sint16ToSnormNz;
|
||||
default:
|
||||
UNREACHABLE_MSG("data_fmt = {}", u32(data_fmt));
|
||||
}
|
||||
}
|
||||
default:
|
||||
return NumberConversion::None;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace AmdGpu
|
||||
|
||||
template <>
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "common/debug.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/types.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/host_shaders/fault_buffer_process_comp.h"
|
||||
@ -28,7 +29,7 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
||||
Vulkan::Rasterizer& rasterizer_, AmdGpu::Liverpool* liverpool_,
|
||||
TextureCache& texture_cache_, PageManager& tracker_)
|
||||
: instance{instance_}, scheduler{scheduler_}, rasterizer{rasterizer_}, liverpool{liverpool_},
|
||||
texture_cache{texture_cache_}, tracker{tracker_},
|
||||
memory{Core::Memory::Instance()}, texture_cache{texture_cache_}, tracker{tracker_},
|
||||
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
||||
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
||||
download_buffer(instance, scheduler, MemoryUsage::Download, DownloadBufferSize),
|
||||
@ -293,7 +294,7 @@ void BufferCache::BindIndexBuffer(u32 index_offset) {
|
||||
|
||||
void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
|
||||
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
|
||||
if (!is_gds && !IsRegionRegistered(address, num_bytes)) {
|
||||
if (!is_gds && !IsRegionGpuModified(address, num_bytes)) {
|
||||
memcpy(std::bit_cast<void*>(address), value, num_bytes);
|
||||
return;
|
||||
}
|
||||
@ -365,7 +366,9 @@ std::pair<Buffer*, u32> BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size,
|
||||
return ObtainBuffer(gpu_addr, size, false, false);
|
||||
}
|
||||
// In all other cases, just do a CPU copy to the staging buffer.
|
||||
const u32 offset = staging_buffer.Copy(gpu_addr, size, 16);
|
||||
const auto [data, offset] = staging_buffer.Map(size, 16);
|
||||
memory->CopySparseMemory(gpu_addr, data, size);
|
||||
staging_buffer.Commit();
|
||||
return {&staging_buffer, offset};
|
||||
}
|
||||
|
||||
@ -798,24 +801,45 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
||||
}
|
||||
|
||||
bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) {
|
||||
static constexpr FindFlags find_flags =
|
||||
FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize;
|
||||
TextureCache::BaseDesc desc{};
|
||||
desc.info.guest_address = device_addr;
|
||||
desc.info.guest_size = size;
|
||||
const ImageId image_id = texture_cache.FindImage(desc, find_flags);
|
||||
if (!image_id) {
|
||||
boost::container::small_vector<ImageId, 6> image_ids;
|
||||
texture_cache.ForEachImageInRegion(device_addr, size, [&](ImageId image_id, Image& image) {
|
||||
if (image.info.guest_address != device_addr) {
|
||||
return;
|
||||
}
|
||||
// Only perform sync if image is:
|
||||
// - GPU modified; otherwise there are no changes to synchronize.
|
||||
// - Not CPU dirty; otherwise we could overwrite CPU changes with stale GPU changes.
|
||||
// - Not GPU dirty; otherwise we could overwrite GPU changes with stale image data.
|
||||
if (False(image.flags & ImageFlagBits::GpuModified) ||
|
||||
True(image.flags & ImageFlagBits::Dirty)) {
|
||||
return;
|
||||
}
|
||||
image_ids.push_back(image_id);
|
||||
});
|
||||
if (image_ids.empty()) {
|
||||
return false;
|
||||
}
|
||||
ImageId image_id{};
|
||||
if (image_ids.size() == 1) {
|
||||
// Sometimes image size might not exactly match with requested buffer size
|
||||
// If we only found 1 candidate image use it without too many questions.
|
||||
image_id = image_ids[0];
|
||||
} else {
|
||||
for (s32 i = 0; i < image_ids.size(); ++i) {
|
||||
Image& image = texture_cache.GetImage(image_ids[i]);
|
||||
if (image.info.guest_size == size) {
|
||||
image_id = image_ids[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!image_id) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Failed to find exact image match for copy addr={:#x}, size={:#x}",
|
||||
device_addr, size);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
Image& image = texture_cache.GetImage(image_id);
|
||||
// Only perform sync if image is:
|
||||
// - GPU modified; otherwise there are no changes to synchronize.
|
||||
// - Not CPU dirty; otherwise we could overwrite CPU changes with stale GPU changes.
|
||||
// - Not GPU dirty; otherwise we could overwrite GPU changes with stale image data.
|
||||
if (False(image.flags & ImageFlagBits::GpuModified) ||
|
||||
True(image.flags & ImageFlagBits::Dirty)) {
|
||||
return false;
|
||||
}
|
||||
ASSERT_MSG(device_addr == image.info.guest_address,
|
||||
"Texel buffer aliases image subresources {:x} : {:x}", device_addr,
|
||||
image.info.guest_address);
|
||||
|
@ -17,6 +17,10 @@ namespace AmdGpu {
|
||||
struct Liverpool;
|
||||
}
|
||||
|
||||
namespace Core {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace Shader {
|
||||
namespace Gcn {
|
||||
struct FetchShaderData;
|
||||
@ -183,6 +187,7 @@ private:
|
||||
Vulkan::Scheduler& scheduler;
|
||||
Vulkan::Rasterizer& rasterizer;
|
||||
AmdGpu::Liverpool* liverpool;
|
||||
Core::MemoryManager* memory;
|
||||
TextureCache& texture_cache;
|
||||
PageManager& tracker;
|
||||
StreamBuffer staging_buffer;
|
||||
|
@ -16,7 +16,7 @@ layout(push_constant) uniform image_info {
|
||||
uint num_levels;
|
||||
uint pitch;
|
||||
uint height;
|
||||
uint sizes[14];
|
||||
uint sizes[16];
|
||||
} info;
|
||||
|
||||
// Inverse morton LUT, small enough to fit into K$
|
||||
|
@ -18,7 +18,7 @@ layout(push_constant) uniform image_info {
|
||||
uint num_levels;
|
||||
uint pitch;
|
||||
uint height;
|
||||
uint sizes[14];
|
||||
uint sizes[16];
|
||||
} info;
|
||||
|
||||
#define MICRO_TILE_DIM 8
|
||||
|
@ -16,7 +16,7 @@ layout(push_constant) uniform image_info {
|
||||
uint num_levels;
|
||||
uint pitch;
|
||||
uint height;
|
||||
uint sizes[14];
|
||||
uint sizes[16];
|
||||
} info;
|
||||
|
||||
// Inverse morton LUT, small enough to fit into K$
|
||||
|
@ -16,7 +16,7 @@ layout(push_constant) uniform image_info {
|
||||
uint num_levels;
|
||||
uint pitch;
|
||||
uint height;
|
||||
uint sizes[14];
|
||||
uint sizes[16];
|
||||
} info;
|
||||
|
||||
// Inverse morton LUT, small enough to fit into K$
|
||||
|
@ -19,7 +19,7 @@ layout(push_constant) uniform image_info {
|
||||
uint num_levels;
|
||||
uint pitch;
|
||||
uint height;
|
||||
uint sizes[14];
|
||||
uint sizes[16];
|
||||
} info;
|
||||
|
||||
#define MICRO_TILE_DIM 8
|
||||
|
@ -214,6 +214,19 @@ vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor) {
|
||||
}
|
||||
}
|
||||
|
||||
bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor) {
|
||||
using BlendFactor = Liverpool::BlendControl::BlendFactor;
|
||||
switch (factor) {
|
||||
case BlendFactor::Src1Color:
|
||||
case BlendFactor::Src1Alpha:
|
||||
case BlendFactor::InvSrc1Color:
|
||||
case BlendFactor::InvSrc1Alpha:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) {
|
||||
using BlendFunc = Liverpool::BlendControl::BlendFunc;
|
||||
switch (func) {
|
||||
|
@ -30,6 +30,8 @@ vk::FrontFace FrontFace(Liverpool::FrontFace mode);
|
||||
|
||||
vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor);
|
||||
|
||||
bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor);
|
||||
|
||||
vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func);
|
||||
|
||||
vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode);
|
||||
|
@ -212,7 +212,8 @@ bool Instance::CreateDevice() {
|
||||
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT,
|
||||
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
|
||||
vk::PhysicalDevicePortabilitySubsetFeaturesKHR,
|
||||
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
|
||||
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT,
|
||||
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR>();
|
||||
features = feature_chain.get().features;
|
||||
|
||||
const vk::StructureChain properties_chain = physical_device.getProperties2<
|
||||
@ -283,6 +284,20 @@ bool Instance::CreateDevice() {
|
||||
LOG_INFO(Render_Vulkan, "- shaderImageFloat32AtomicMinMax: {}",
|
||||
shader_atomic_float2_features.shaderImageFloat32AtomicMinMax);
|
||||
}
|
||||
workgroup_memory_explicit_layout =
|
||||
add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
|
||||
if (workgroup_memory_explicit_layout) {
|
||||
workgroup_memory_explicit_layout_features =
|
||||
feature_chain.get<vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR>();
|
||||
LOG_INFO(Render_Vulkan, "- workgroupMemoryExplicitLayout: {}",
|
||||
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout);
|
||||
LOG_INFO(Render_Vulkan, "- workgroupMemoryExplicitLayoutScalarBlockLayout: {}",
|
||||
workgroup_memory_explicit_layout_features
|
||||
.workgroupMemoryExplicitLayoutScalarBlockLayout);
|
||||
LOG_INFO(
|
||||
Render_Vulkan, "- workgroupMemoryExplicitLayout16BitAccess: {}",
|
||||
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess);
|
||||
}
|
||||
const bool calibrated_timestamps =
|
||||
TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false;
|
||||
|
||||
@ -420,6 +435,15 @@ bool Instance::CreateDevice() {
|
||||
.shaderImageFloat32AtomicMinMax =
|
||||
shader_atomic_float2_features.shaderImageFloat32AtomicMinMax,
|
||||
},
|
||||
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR{
|
||||
.workgroupMemoryExplicitLayout =
|
||||
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout,
|
||||
.workgroupMemoryExplicitLayoutScalarBlockLayout =
|
||||
workgroup_memory_explicit_layout_features
|
||||
.workgroupMemoryExplicitLayoutScalarBlockLayout,
|
||||
.workgroupMemoryExplicitLayout16BitAccess =
|
||||
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess,
|
||||
},
|
||||
#ifdef __APPLE__
|
||||
portability_features,
|
||||
#endif
|
||||
@ -452,6 +476,9 @@ bool Instance::CreateDevice() {
|
||||
if (!shader_atomic_float2) {
|
||||
device_chain.unlink<vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
|
||||
}
|
||||
if (!workgroup_memory_explicit_layout) {
|
||||
device_chain.unlink<vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR>();
|
||||
}
|
||||
|
||||
auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get());
|
||||
if (device_result != vk::Result::eSuccess) {
|
||||
|
@ -171,6 +171,12 @@ public:
|
||||
return shader_atomic_float2 && shader_atomic_float2_features.shaderImageFloat32AtomicMinMax;
|
||||
}
|
||||
|
||||
/// Returns true when VK_KHR_workgroup_memory_explicit_layout is supported.
|
||||
bool IsWorkgroupMemoryExplicitLayoutSupported() const {
|
||||
return workgroup_memory_explicit_layout &&
|
||||
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess;
|
||||
}
|
||||
|
||||
/// Returns true when geometry shaders are supported by the device
|
||||
bool IsGeometryStageSupported() const {
|
||||
return features.geometryShader;
|
||||
@ -349,6 +355,8 @@ private:
|
||||
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3_features;
|
||||
vk::PhysicalDeviceRobustness2FeaturesEXT robustness2_features;
|
||||
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT shader_atomic_float2_features;
|
||||
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR
|
||||
workgroup_memory_explicit_layout_features;
|
||||
vk::DriverIdKHR driver_id;
|
||||
vk::UniqueDebugUtilsMessengerEXT debug_callback{};
|
||||
std::string vendor_name;
|
||||
@ -374,6 +382,7 @@ private:
|
||||
bool amd_gcn_shader{};
|
||||
bool amd_shader_trinary_minmax{};
|
||||
bool shader_atomic_float2{};
|
||||
bool workgroup_memory_explicit_layout{};
|
||||
bool portability_subset{};
|
||||
};
|
||||
|
||||
|
@ -146,6 +146,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
|
||||
}
|
||||
gs_info.in_vertex_data_size = regs.vgt_esgs_ring_itemsize;
|
||||
gs_info.out_vertex_data_size = regs.vgt_gs_vert_itemsize[0];
|
||||
gs_info.mode = regs.vgt_gs_mode.mode;
|
||||
const auto params_vc = Liverpool::GetParams(regs.vs_program);
|
||||
gs_info.vs_copy = params_vc.code;
|
||||
gs_info.vs_copy_hash = params_vc.hash;
|
||||
@ -158,6 +159,15 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
|
||||
info.fs_info.addr_flags = regs.ps_input_addr;
|
||||
const auto& ps_inputs = regs.ps_inputs;
|
||||
info.fs_info.num_inputs = regs.num_interp;
|
||||
const auto& cb0_blend = regs.blend_control[0];
|
||||
info.fs_info.dual_source_blending =
|
||||
LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.color_dst_factor) ||
|
||||
LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.color_src_factor);
|
||||
if (cb0_blend.separate_alpha_blend) {
|
||||
info.fs_info.dual_source_blending |=
|
||||
LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.alpha_dst_factor) ||
|
||||
LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.alpha_src_factor);
|
||||
}
|
||||
for (u32 i = 0; i < regs.num_interp; i++) {
|
||||
info.fs_info.inputs[i] = {
|
||||
.param_index = u8(ps_inputs[i].input_offset.Value()),
|
||||
@ -200,7 +210,6 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
||||
.support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32),
|
||||
.support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32),
|
||||
.support_fp32_round_to_zero = bool(vk12_props.shaderRoundingModeRTZFloat32),
|
||||
.support_explicit_workgroup_layout = true,
|
||||
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
|
||||
.supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(),
|
||||
.supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(),
|
||||
@ -208,6 +217,8 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
||||
// TODO: Emitted bounds checks cause problems with phi control flow; needs to be fixed.
|
||||
.supports_robust_buffer_access = true, // instance_.IsRobustBufferAccess2Supported(),
|
||||
.supports_image_fp32_atomic_min_max = instance_.IsShaderAtomicFloatImage32MinMaxSupported(),
|
||||
.supports_workgroup_explicit_memory_layout =
|
||||
instance_.IsWorkgroupMemoryExplicitLayoutSupported(),
|
||||
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
|
||||
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
|
||||
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||
|
||||
|
220
src/video_core/texture_cache/host_compatibility.cpp
Normal file
220
src/video_core/texture_cache/host_compatibility.cpp
Normal file
@ -0,0 +1,220 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
// Copyright © 2023 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
// Copyright © 2015-2023 The Khronos Group Inc.
|
||||
// Copyright © 2015-2023 Valve Corporation
|
||||
// Copyright © 2015-2023 LunarG, Inc.
|
||||
|
||||
#include <unordered_map>
|
||||
#include "common/enum.h"
|
||||
#include "video_core/texture_cache/host_compatibility.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
/**
|
||||
* @brief All classes of format compatibility according to the Vulkan specification
|
||||
* @url
|
||||
* https://github.com/KhronosGroup/Vulkan-ValidationLayers/blob/d37c676f/layers/generated/vk_format_utils.h#L47-L131
|
||||
*/
|
||||
enum class CompatibilityClass {
|
||||
NONE = 0,
|
||||
_128BIT = 1 << 0,
|
||||
_16BIT = 1 << 1,
|
||||
_192BIT = 1 << 2,
|
||||
_24BIT = 1 << 3,
|
||||
_256BIT = 1 << 4,
|
||||
_32BIT = 1 << 5,
|
||||
_48BIT = 1 << 6,
|
||||
_64BIT = 1 << 7,
|
||||
_8BIT = 1 << 8,
|
||||
_96BIT = 1 << 9,
|
||||
BC1_RGB = 1 << 10,
|
||||
BC1_RGBA = 1 << 11,
|
||||
BC2 = 1 << 12,
|
||||
BC3 = 1 << 13,
|
||||
BC4 = 1 << 14,
|
||||
BC5 = 1 << 15,
|
||||
BC6H = 1 << 16,
|
||||
BC7 = 1 << 17,
|
||||
D16 = 1 << 18,
|
||||
D16S8 = 1 << 19,
|
||||
D24 = 1 << 20,
|
||||
D24S8 = 1 << 21,
|
||||
D32 = 1 << 22,
|
||||
D32S8 = 1 << 23,
|
||||
S8 = 1 << 24,
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(CompatibilityClass)
|
||||
|
||||
/**
|
||||
* @brief The format compatibility class according to the Vulkan specification
|
||||
* @url
|
||||
* https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#formats-compatibility-classes
|
||||
* @url
|
||||
* https://github.com/KhronosGroup/Vulkan-ValidationLayers/blob/d37c676f/layers/generated/vk_format_utils.cpp#L70-L812
|
||||
*/
|
||||
static const std::unordered_map<vk::Format, CompatibilityClass> FORMAT_TABLE = {
|
||||
{vk::Format::eA1R5G5B5UnormPack16, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eA2B10G10R10SintPack32, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eA2B10G10R10SnormPack32, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eA2B10G10R10SscaledPack32, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eA2B10G10R10UintPack32, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eA2B10G10R10UnormPack32, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eA2B10G10R10UscaledPack32, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eA2R10G10B10SintPack32, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eA2R10G10B10SnormPack32, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eA2R10G10B10SscaledPack32, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eA2R10G10B10UintPack32, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eA2R10G10B10UnormPack32, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eA2R10G10B10UscaledPack32, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eA4B4G4R4UnormPack16, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eA4R4G4B4UnormPack16, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eA8B8G8R8SintPack32, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eA8B8G8R8SnormPack32, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eA8B8G8R8SrgbPack32, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eA8B8G8R8SscaledPack32, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eA8B8G8R8UintPack32, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eA8B8G8R8UnormPack32, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eA8B8G8R8UscaledPack32, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eB10G11R11UfloatPack32, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eB4G4R4A4UnormPack16, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eB5G5R5A1UnormPack16, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eB5G6R5UnormPack16, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eB8G8R8A8Sint, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eB8G8R8A8Snorm, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eB8G8R8A8Srgb, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eB8G8R8A8Sscaled, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eB8G8R8A8Uint, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eB8G8R8A8Unorm, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eB8G8R8A8Uscaled, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eB8G8R8Sint, CompatibilityClass::_24BIT},
|
||||
{vk::Format::eB8G8R8Snorm, CompatibilityClass::_24BIT},
|
||||
{vk::Format::eB8G8R8Srgb, CompatibilityClass::_24BIT},
|
||||
{vk::Format::eB8G8R8Sscaled, CompatibilityClass::_24BIT},
|
||||
{vk::Format::eB8G8R8Uint, CompatibilityClass::_24BIT},
|
||||
{vk::Format::eB8G8R8Unorm, CompatibilityClass::_24BIT},
|
||||
{vk::Format::eB8G8R8Uscaled, CompatibilityClass::_24BIT},
|
||||
{vk::Format::eBc1RgbaSrgbBlock, CompatibilityClass::BC1_RGBA | CompatibilityClass::_64BIT},
|
||||
{vk::Format::eBc1RgbaUnormBlock, CompatibilityClass::BC1_RGBA | CompatibilityClass::_64BIT},
|
||||
{vk::Format::eBc1RgbSrgbBlock, CompatibilityClass::BC1_RGB | CompatibilityClass::_64BIT},
|
||||
{vk::Format::eBc1RgbUnormBlock, CompatibilityClass::BC1_RGB | CompatibilityClass::_64BIT},
|
||||
{vk::Format::eBc2SrgbBlock, CompatibilityClass::BC2 | CompatibilityClass::_128BIT},
|
||||
{vk::Format::eBc2UnormBlock, CompatibilityClass::BC2 | CompatibilityClass::_128BIT},
|
||||
{vk::Format::eBc3SrgbBlock, CompatibilityClass::BC3 | CompatibilityClass::_128BIT},
|
||||
{vk::Format::eBc3UnormBlock, CompatibilityClass::BC3 | CompatibilityClass::_128BIT},
|
||||
{vk::Format::eBc4SnormBlock, CompatibilityClass::BC4 | CompatibilityClass::_64BIT},
|
||||
{vk::Format::eBc4UnormBlock, CompatibilityClass::BC4 | CompatibilityClass::_64BIT},
|
||||
{vk::Format::eBc5SnormBlock, CompatibilityClass::BC5 | CompatibilityClass::_128BIT},
|
||||
{vk::Format::eBc5UnormBlock, CompatibilityClass::BC5 | CompatibilityClass::_128BIT},
|
||||
{vk::Format::eBc6HSfloatBlock, CompatibilityClass::BC6H | CompatibilityClass::_128BIT},
|
||||
{vk::Format::eBc6HUfloatBlock, CompatibilityClass::BC6H | CompatibilityClass::_128BIT},
|
||||
{vk::Format::eBc7SrgbBlock, CompatibilityClass::BC7 | CompatibilityClass::_128BIT},
|
||||
{vk::Format::eBc7UnormBlock, CompatibilityClass::BC7 | CompatibilityClass::_128BIT},
|
||||
{vk::Format::eD16Unorm, CompatibilityClass::D16},
|
||||
{vk::Format::eD16UnormS8Uint, CompatibilityClass::D16S8},
|
||||
{vk::Format::eD24UnormS8Uint, CompatibilityClass::D24S8},
|
||||
{vk::Format::eD32Sfloat, CompatibilityClass::D32},
|
||||
{vk::Format::eD32SfloatS8Uint, CompatibilityClass::D32S8},
|
||||
{vk::Format::eE5B9G9R9UfloatPack32, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eR10X6G10X6Unorm2Pack16, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eR10X6UnormPack16, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eR12X4G12X4Unorm2Pack16, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eR12X4UnormPack16, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eR16G16B16A16Sfloat, CompatibilityClass::_64BIT},
|
||||
{vk::Format::eR16G16B16A16Sint, CompatibilityClass::_64BIT},
|
||||
{vk::Format::eR16G16B16A16Snorm, CompatibilityClass::_64BIT},
|
||||
{vk::Format::eR16G16B16A16Sscaled, CompatibilityClass::_64BIT},
|
||||
{vk::Format::eR16G16B16A16Uint, CompatibilityClass::_64BIT},
|
||||
{vk::Format::eR16G16B16A16Unorm, CompatibilityClass::_64BIT},
|
||||
{vk::Format::eR16G16B16A16Uscaled, CompatibilityClass::_64BIT},
|
||||
{vk::Format::eR16G16B16Sfloat, CompatibilityClass::_48BIT},
|
||||
{vk::Format::eR16G16B16Sint, CompatibilityClass::_48BIT},
|
||||
{vk::Format::eR16G16B16Snorm, CompatibilityClass::_48BIT},
|
||||
{vk::Format::eR16G16B16Sscaled, CompatibilityClass::_48BIT},
|
||||
{vk::Format::eR16G16B16Uint, CompatibilityClass::_48BIT},
|
||||
{vk::Format::eR16G16B16Unorm, CompatibilityClass::_48BIT},
|
||||
{vk::Format::eR16G16B16Uscaled, CompatibilityClass::_48BIT},
|
||||
{vk::Format::eR16G16Sfloat, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eR16G16Sint, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eR16G16Snorm, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eR16G16Sscaled, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eR16G16Uint, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eR16G16Unorm, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eR16G16Uscaled, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eR16Sfloat, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eR16Sint, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eR16Snorm, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eR16Sscaled, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eR16Uint, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eR16Unorm, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eR16Uscaled, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eR32G32B32A32Sfloat, CompatibilityClass::_128BIT},
|
||||
{vk::Format::eR32G32B32A32Sint, CompatibilityClass::_128BIT},
|
||||
{vk::Format::eR32G32B32A32Uint, CompatibilityClass::_128BIT},
|
||||
{vk::Format::eR32G32B32Sfloat, CompatibilityClass::_96BIT},
|
||||
{vk::Format::eR32G32B32Sint, CompatibilityClass::_96BIT},
|
||||
{vk::Format::eR32G32B32Uint, CompatibilityClass::_96BIT},
|
||||
{vk::Format::eR32G32Sfloat, CompatibilityClass::_64BIT},
|
||||
{vk::Format::eR32G32Sint, CompatibilityClass::_64BIT},
|
||||
{vk::Format::eR32G32Uint, CompatibilityClass::_64BIT},
|
||||
{vk::Format::eR32Sfloat, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eR32Sint, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eR32Uint, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eR4G4B4A4UnormPack16, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eR4G4UnormPack8, CompatibilityClass::_8BIT},
|
||||
{vk::Format::eR5G5B5A1UnormPack16, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eR5G6B5UnormPack16, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eR64G64B64A64Sfloat, CompatibilityClass::_256BIT},
|
||||
{vk::Format::eR64G64B64A64Sint, CompatibilityClass::_256BIT},
|
||||
{vk::Format::eR64G64B64A64Uint, CompatibilityClass::_256BIT},
|
||||
{vk::Format::eR64G64B64Sfloat, CompatibilityClass::_192BIT},
|
||||
{vk::Format::eR64G64B64Sint, CompatibilityClass::_192BIT},
|
||||
{vk::Format::eR64G64B64Uint, CompatibilityClass::_192BIT},
|
||||
{vk::Format::eR64G64Sfloat, CompatibilityClass::_128BIT},
|
||||
{vk::Format::eR64G64Sint, CompatibilityClass::_128BIT},
|
||||
{vk::Format::eR64G64Uint, CompatibilityClass::_128BIT},
|
||||
{vk::Format::eR64Sfloat, CompatibilityClass::_64BIT},
|
||||
{vk::Format::eR64Sint, CompatibilityClass::_64BIT},
|
||||
{vk::Format::eR64Uint, CompatibilityClass::_64BIT},
|
||||
{vk::Format::eR8G8B8A8Sint, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eR8G8B8A8Snorm, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eR8G8B8A8Srgb, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eR8G8B8A8Sscaled, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eR8G8B8A8Uint, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eR8G8B8A8Unorm, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eR8G8B8A8Uscaled, CompatibilityClass::_32BIT},
|
||||
{vk::Format::eR8G8B8Sint, CompatibilityClass::_24BIT},
|
||||
{vk::Format::eR8G8B8Snorm, CompatibilityClass::_24BIT},
|
||||
{vk::Format::eR8G8B8Srgb, CompatibilityClass::_24BIT},
|
||||
{vk::Format::eR8G8B8Sscaled, CompatibilityClass::_24BIT},
|
||||
{vk::Format::eR8G8B8Uint, CompatibilityClass::_24BIT},
|
||||
{vk::Format::eR8G8B8Unorm, CompatibilityClass::_24BIT},
|
||||
{vk::Format::eR8G8B8Uscaled, CompatibilityClass::_24BIT},
|
||||
{vk::Format::eR8G8Sint, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eR8G8Snorm, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eR8G8Srgb, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eR8G8Sscaled, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eR8G8Uint, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eR8G8Unorm, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eR8G8Uscaled, CompatibilityClass::_16BIT},
|
||||
{vk::Format::eR8Sint, CompatibilityClass::_8BIT},
|
||||
{vk::Format::eR8Snorm, CompatibilityClass::_8BIT},
|
||||
{vk::Format::eR8Srgb, CompatibilityClass::_8BIT},
|
||||
{vk::Format::eR8Sscaled, CompatibilityClass::_8BIT},
|
||||
{vk::Format::eR8Uint, CompatibilityClass::_8BIT},
|
||||
{vk::Format::eR8Unorm, CompatibilityClass::_8BIT},
|
||||
{vk::Format::eR8Uscaled, CompatibilityClass::_8BIT},
|
||||
{vk::Format::eS8Uint, CompatibilityClass::S8},
|
||||
{vk::Format::eX8D24UnormPack32, CompatibilityClass::D24},
|
||||
{vk::Format::eUndefined, CompatibilityClass::NONE},
|
||||
};
|
||||
|
||||
bool IsVulkanFormatCompatible(vk::Format base, vk::Format view) {
|
||||
if (base == view) {
|
||||
return true;
|
||||
}
|
||||
const auto base_comp = FORMAT_TABLE.at(base);
|
||||
const auto view_comp = FORMAT_TABLE.at(view);
|
||||
return (base_comp & view_comp) == view_comp;
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
@ -6,387 +6,11 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace VideoCore {
|
||||
/**
|
||||
* @brief All classes of format compatibility according to the Vulkan specification
|
||||
* @url
|
||||
* https://github.com/KhronosGroup/Vulkan-ValidationLayers/blob/d37c676f75f545a3e5a98d7dfb89864391a1db1e/layers/generated/vk_format_utils.h#L47-L131
|
||||
* @note This is copied directly from Vulkan Validation Layers and doesn't follow the Skyline naming
|
||||
* conventions
|
||||
*/
|
||||
enum class FORMAT_COMPATIBILITY_CLASS {
|
||||
NONE = 0,
|
||||
_10BIT_2PLANE_420,
|
||||
_10BIT_2PLANE_422,
|
||||
_10BIT_2PLANE_444,
|
||||
_10BIT_3PLANE_420,
|
||||
_10BIT_3PLANE_422,
|
||||
_10BIT_3PLANE_444,
|
||||
_12BIT_2PLANE_420,
|
||||
_12BIT_2PLANE_422,
|
||||
_12BIT_2PLANE_444,
|
||||
_12BIT_3PLANE_420,
|
||||
_12BIT_3PLANE_422,
|
||||
_12BIT_3PLANE_444,
|
||||
_128BIT,
|
||||
_16BIT,
|
||||
_16BIT_2PLANE_420,
|
||||
_16BIT_2PLANE_422,
|
||||
_16BIT_2PLANE_444,
|
||||
_16BIT_3PLANE_420,
|
||||
_16BIT_3PLANE_422,
|
||||
_16BIT_3PLANE_444,
|
||||
_192BIT,
|
||||
_24BIT,
|
||||
_256BIT,
|
||||
_32BIT,
|
||||
_32BIT_B8G8R8G8,
|
||||
_32BIT_G8B8G8R8,
|
||||
_48BIT,
|
||||
_64BIT,
|
||||
_64BIT_B10G10R10G10,
|
||||
_64BIT_B12G12R12G12,
|
||||
_64BIT_B16G16R16G16,
|
||||
_64BIT_G10B10G10R10,
|
||||
_64BIT_G12B12G12R12,
|
||||
_64BIT_G16B16G16R16,
|
||||
_64BIT_R10G10B10A10,
|
||||
_64BIT_R12G12B12A12,
|
||||
_8BIT,
|
||||
_8BIT_2PLANE_420,
|
||||
_8BIT_2PLANE_422,
|
||||
_8BIT_2PLANE_444,
|
||||
_8BIT_3PLANE_420,
|
||||
_8BIT_3PLANE_422,
|
||||
_8BIT_3PLANE_444,
|
||||
_96BIT,
|
||||
ASTC_10X10,
|
||||
ASTC_10X5,
|
||||
ASTC_10X6,
|
||||
ASTC_10X8,
|
||||
ASTC_12X10,
|
||||
ASTC_12X12,
|
||||
ASTC_4X4,
|
||||
ASTC_5X4,
|
||||
ASTC_5X5,
|
||||
ASTC_6X5,
|
||||
ASTC_6X6,
|
||||
ASTC_8X5,
|
||||
ASTC_8X6,
|
||||
ASTC_8X8,
|
||||
BC1_RGB,
|
||||
BC1_RGBA,
|
||||
BC2,
|
||||
BC3,
|
||||
BC4,
|
||||
BC5,
|
||||
BC6H,
|
||||
BC7,
|
||||
D16,
|
||||
D16S8,
|
||||
D24,
|
||||
D24S8,
|
||||
D32,
|
||||
D32S8,
|
||||
EAC_R,
|
||||
EAC_RG,
|
||||
ETC2_EAC_RGBA,
|
||||
ETC2_RGB,
|
||||
ETC2_RGBA,
|
||||
PVRTC1_2BPP,
|
||||
PVRTC1_4BPP,
|
||||
PVRTC2_2BPP,
|
||||
PVRTC2_4BPP,
|
||||
S8
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief The format compatibility class according to the Vulkan specification
|
||||
* @url
|
||||
* https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#formats-compatibility-classes
|
||||
* @url
|
||||
* https://github.com/KhronosGroup/Vulkan-ValidationLayers/blob/d37c676f75f545a3e5a98d7dfb89864391a1db1e/layers/generated/vk_format_utils.cpp#L70-L812
|
||||
* @note This is copied directly from Vulkan Validation Layers and doesn't follow the Skyline naming
|
||||
* conventions
|
||||
*/
|
||||
static const std::unordered_map<VkFormat, FORMAT_COMPATIBILITY_CLASS> vkFormatClassTable{
|
||||
{VK_FORMAT_A1R5G5B5_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_A2B10G10R10_SINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_A2B10G10R10_SNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_A2B10G10R10_SSCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_A2B10G10R10_UINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_A2B10G10R10_UNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_A2B10G10R10_USCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_A2R10G10B10_SINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_A2R10G10B10_SNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_A2R10G10B10_SSCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_A2R10G10B10_UINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_A2R10G10B10_UNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_A2R10G10B10_USCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_A8B8G8R8_SINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_A8B8G8R8_SNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_A8B8G8R8_SRGB_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_A8B8G8R8_SSCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_A8B8G8R8_UINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_A8B8G8R8_UNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_A8B8G8R8_USCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_ASTC_10x10_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_10X10},
|
||||
{VK_FORMAT_ASTC_10x10_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X10},
|
||||
{VK_FORMAT_ASTC_10x10_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X10},
|
||||
{VK_FORMAT_ASTC_10x5_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_10X5},
|
||||
{VK_FORMAT_ASTC_10x5_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X5},
|
||||
{VK_FORMAT_ASTC_10x5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X5},
|
||||
{VK_FORMAT_ASTC_10x6_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_10X6},
|
||||
{VK_FORMAT_ASTC_10x6_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X6},
|
||||
{VK_FORMAT_ASTC_10x6_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X6},
|
||||
{VK_FORMAT_ASTC_10x8_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_10X8},
|
||||
{VK_FORMAT_ASTC_10x8_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X8},
|
||||
{VK_FORMAT_ASTC_10x8_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X8},
|
||||
{VK_FORMAT_ASTC_12x10_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_12X10},
|
||||
{VK_FORMAT_ASTC_12x10_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_12X10},
|
||||
{VK_FORMAT_ASTC_12x10_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_12X10},
|
||||
{VK_FORMAT_ASTC_12x12_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_12X12},
|
||||
{VK_FORMAT_ASTC_12x12_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_12X12},
|
||||
{VK_FORMAT_ASTC_12x12_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_12X12},
|
||||
{VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_4X4},
|
||||
{VK_FORMAT_ASTC_4x4_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_4X4},
|
||||
{VK_FORMAT_ASTC_4x4_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_4X4},
|
||||
{VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_5X4},
|
||||
{VK_FORMAT_ASTC_5x4_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_5X4},
|
||||
{VK_FORMAT_ASTC_5x4_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_5X4},
|
||||
{VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_5X5},
|
||||
{VK_FORMAT_ASTC_5x5_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_5X5},
|
||||
{VK_FORMAT_ASTC_5x5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_5X5},
|
||||
{VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_6X5},
|
||||
{VK_FORMAT_ASTC_6x5_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_6X5},
|
||||
{VK_FORMAT_ASTC_6x5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_6X5},
|
||||
{VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_6X6},
|
||||
{VK_FORMAT_ASTC_6x6_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_6X6},
|
||||
{VK_FORMAT_ASTC_6x6_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_6X6},
|
||||
{VK_FORMAT_ASTC_8x5_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_8X5},
|
||||
{VK_FORMAT_ASTC_8x5_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X5},
|
||||
{VK_FORMAT_ASTC_8x5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X5},
|
||||
{VK_FORMAT_ASTC_8x6_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_8X6},
|
||||
{VK_FORMAT_ASTC_8x6_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X6},
|
||||
{VK_FORMAT_ASTC_8x6_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X6},
|
||||
{VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_8X8},
|
||||
{VK_FORMAT_ASTC_8x8_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X8},
|
||||
{VK_FORMAT_ASTC_8x8_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X8},
|
||||
{VK_FORMAT_B10G11R11_UFLOAT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16,
|
||||
FORMAT_COMPATIBILITY_CLASS::_64BIT_B10G10R10G10},
|
||||
{VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16,
|
||||
FORMAT_COMPATIBILITY_CLASS::_64BIT_B12G12R12G12},
|
||||
{VK_FORMAT_B16G16R16G16_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_64BIT_B16G16R16G16},
|
||||
{VK_FORMAT_B4G4R4A4_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_B5G5R5A1_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_B5G6R5_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_B8G8R8A8_SINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_B8G8R8A8_SNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_B8G8R8A8_SRGB, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_B8G8R8A8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_B8G8R8A8_UINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_B8G8R8A8_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_B8G8R8A8_USCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_B8G8R8G8_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT_B8G8R8G8},
|
||||
{VK_FORMAT_B8G8R8_SINT, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
||||
{VK_FORMAT_B8G8R8_SNORM, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
||||
{VK_FORMAT_B8G8R8_SRGB, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
||||
{VK_FORMAT_B8G8R8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
||||
{VK_FORMAT_B8G8R8_UINT, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
||||
{VK_FORMAT_B8G8R8_UNORM, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
||||
{VK_FORMAT_B8G8R8_USCALED, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
||||
{VK_FORMAT_BC1_RGBA_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC1_RGBA},
|
||||
{VK_FORMAT_BC1_RGBA_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC1_RGBA},
|
||||
{VK_FORMAT_BC1_RGB_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC1_RGB},
|
||||
{VK_FORMAT_BC1_RGB_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC1_RGB},
|
||||
{VK_FORMAT_BC2_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC2},
|
||||
{VK_FORMAT_BC2_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC2},
|
||||
{VK_FORMAT_BC3_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC3},
|
||||
{VK_FORMAT_BC3_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC3},
|
||||
{VK_FORMAT_BC4_SNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC4},
|
||||
{VK_FORMAT_BC4_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC4},
|
||||
{VK_FORMAT_BC5_SNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC5},
|
||||
{VK_FORMAT_BC5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC5},
|
||||
{VK_FORMAT_BC6H_SFLOAT_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC6H},
|
||||
{VK_FORMAT_BC6H_UFLOAT_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC6H},
|
||||
{VK_FORMAT_BC7_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC7},
|
||||
{VK_FORMAT_BC7_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC7},
|
||||
{VK_FORMAT_D16_UNORM, FORMAT_COMPATIBILITY_CLASS::D16},
|
||||
{VK_FORMAT_D16_UNORM_S8_UINT, FORMAT_COMPATIBILITY_CLASS::D16S8},
|
||||
{VK_FORMAT_D24_UNORM_S8_UINT, FORMAT_COMPATIBILITY_CLASS::D24S8},
|
||||
{VK_FORMAT_D32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::D32},
|
||||
{VK_FORMAT_D32_SFLOAT_S8_UINT, FORMAT_COMPATIBILITY_CLASS::D32S8},
|
||||
{VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_EAC_R11G11_SNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::EAC_RG},
|
||||
{VK_FORMAT_EAC_R11G11_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::EAC_RG},
|
||||
{VK_FORMAT_EAC_R11_SNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::EAC_R},
|
||||
{VK_FORMAT_EAC_R11_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::EAC_R},
|
||||
{VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_RGBA},
|
||||
{VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_RGBA},
|
||||
{VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_EAC_RGBA},
|
||||
{VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_EAC_RGBA},
|
||||
{VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_RGB},
|
||||
{VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_RGB},
|
||||
{VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16,
|
||||
FORMAT_COMPATIBILITY_CLASS::_64BIT_G10B10G10R10},
|
||||
{VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
|
||||
FORMAT_COMPATIBILITY_CLASS::_10BIT_2PLANE_420},
|
||||
{VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16,
|
||||
FORMAT_COMPATIBILITY_CLASS::_10BIT_2PLANE_422},
|
||||
{VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT,
|
||||
FORMAT_COMPATIBILITY_CLASS::_10BIT_2PLANE_444},
|
||||
{VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16,
|
||||
FORMAT_COMPATIBILITY_CLASS::_10BIT_3PLANE_420},
|
||||
{VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16,
|
||||
FORMAT_COMPATIBILITY_CLASS::_10BIT_3PLANE_422},
|
||||
{VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16,
|
||||
FORMAT_COMPATIBILITY_CLASS::_10BIT_3PLANE_444},
|
||||
{VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16,
|
||||
FORMAT_COMPATIBILITY_CLASS::_64BIT_G12B12G12R12},
|
||||
{VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16,
|
||||
FORMAT_COMPATIBILITY_CLASS::_12BIT_2PLANE_420},
|
||||
{VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16,
|
||||
FORMAT_COMPATIBILITY_CLASS::_12BIT_2PLANE_422},
|
||||
{VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT,
|
||||
FORMAT_COMPATIBILITY_CLASS::_12BIT_2PLANE_444},
|
||||
{VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16,
|
||||
FORMAT_COMPATIBILITY_CLASS::_12BIT_3PLANE_420},
|
||||
{VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16,
|
||||
FORMAT_COMPATIBILITY_CLASS::_12BIT_3PLANE_422},
|
||||
{VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16,
|
||||
FORMAT_COMPATIBILITY_CLASS::_12BIT_3PLANE_444},
|
||||
{VK_FORMAT_G16B16G16R16_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_64BIT_G16B16G16R16},
|
||||
{VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_2PLANE_420},
|
||||
{VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_2PLANE_422},
|
||||
{VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT, FORMAT_COMPATIBILITY_CLASS::_16BIT_2PLANE_444},
|
||||
{VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_3PLANE_420},
|
||||
{VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_3PLANE_422},
|
||||
{VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_3PLANE_444},
|
||||
{VK_FORMAT_G8B8G8R8_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT_G8B8G8R8},
|
||||
{VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_2PLANE_420},
|
||||
{VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_2PLANE_422},
|
||||
{VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT, FORMAT_COMPATIBILITY_CLASS::_8BIT_2PLANE_444},
|
||||
{VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_3PLANE_420},
|
||||
{VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_3PLANE_422},
|
||||
{VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_3PLANE_444},
|
||||
{VK_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC1_2BPP},
|
||||
{VK_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC1_2BPP},
|
||||
{VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC1_4BPP},
|
||||
{VK_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC1_4BPP},
|
||||
{VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC2_2BPP},
|
||||
{VK_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC2_2BPP},
|
||||
{VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC2_4BPP},
|
||||
{VK_FORMAT_PVRTC2_4BPP_UNORM_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC2_4BPP},
|
||||
{VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16, FORMAT_COMPATIBILITY_CLASS::_64BIT_R10G10B10A10},
|
||||
{VK_FORMAT_R10X6G10X6_UNORM_2PACK16, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_R10X6_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16, FORMAT_COMPATIBILITY_CLASS::_64BIT_R12G12B12A12},
|
||||
{VK_FORMAT_R12X4G12X4_UNORM_2PACK16, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_R12X4_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_R16G16B16A16_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
||||
{VK_FORMAT_R16G16B16A16_SINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
||||
{VK_FORMAT_R16G16B16A16_SNORM, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
||||
{VK_FORMAT_R16G16B16A16_SSCALED, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
||||
{VK_FORMAT_R16G16B16A16_UINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
||||
{VK_FORMAT_R16G16B16A16_UNORM, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
||||
{VK_FORMAT_R16G16B16A16_USCALED, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
||||
{VK_FORMAT_R16G16B16_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_48BIT},
|
||||
{VK_FORMAT_R16G16B16_SINT, FORMAT_COMPATIBILITY_CLASS::_48BIT},
|
||||
{VK_FORMAT_R16G16B16_SNORM, FORMAT_COMPATIBILITY_CLASS::_48BIT},
|
||||
{VK_FORMAT_R16G16B16_SSCALED, FORMAT_COMPATIBILITY_CLASS::_48BIT},
|
||||
{VK_FORMAT_R16G16B16_UINT, FORMAT_COMPATIBILITY_CLASS::_48BIT},
|
||||
{VK_FORMAT_R16G16B16_UNORM, FORMAT_COMPATIBILITY_CLASS::_48BIT},
|
||||
{VK_FORMAT_R16G16B16_USCALED, FORMAT_COMPATIBILITY_CLASS::_48BIT},
|
||||
{VK_FORMAT_R16G16_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_R16G16_SINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_R16G16_SNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_R16G16_SSCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_R16G16_UINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_R16G16_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_R16G16_USCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_R16_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_R16_SINT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_R16_SNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_R16_SSCALED, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_R16_UINT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_R16_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_R16_USCALED, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_R32G32B32A32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
|
||||
{VK_FORMAT_R32G32B32A32_SINT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
|
||||
{VK_FORMAT_R32G32B32A32_UINT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
|
||||
{VK_FORMAT_R32G32B32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_96BIT},
|
||||
{VK_FORMAT_R32G32B32_SINT, FORMAT_COMPATIBILITY_CLASS::_96BIT},
|
||||
{VK_FORMAT_R32G32B32_UINT, FORMAT_COMPATIBILITY_CLASS::_96BIT},
|
||||
{VK_FORMAT_R32G32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
||||
{VK_FORMAT_R32G32_SINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
||||
{VK_FORMAT_R32G32_UINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
||||
{VK_FORMAT_R32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_R32_SINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_R32_UINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_R4G4B4A4_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_R4G4_UNORM_PACK8, FORMAT_COMPATIBILITY_CLASS::_8BIT},
|
||||
{VK_FORMAT_R5G5B5A1_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_R5G6B5_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_R64G64B64A64_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_256BIT},
|
||||
{VK_FORMAT_R64G64B64A64_SINT, FORMAT_COMPATIBILITY_CLASS::_256BIT},
|
||||
{VK_FORMAT_R64G64B64A64_UINT, FORMAT_COMPATIBILITY_CLASS::_256BIT},
|
||||
{VK_FORMAT_R64G64B64_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_192BIT},
|
||||
{VK_FORMAT_R64G64B64_SINT, FORMAT_COMPATIBILITY_CLASS::_192BIT},
|
||||
{VK_FORMAT_R64G64B64_UINT, FORMAT_COMPATIBILITY_CLASS::_192BIT},
|
||||
{VK_FORMAT_R64G64_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
|
||||
{VK_FORMAT_R64G64_SINT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
|
||||
{VK_FORMAT_R64G64_UINT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
|
||||
{VK_FORMAT_R64_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
||||
{VK_FORMAT_R64_SINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
||||
{VK_FORMAT_R64_UINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
||||
{VK_FORMAT_R8G8B8A8_SINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_R8G8B8A8_SNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_R8G8B8A8_SRGB, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_R8G8B8A8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_R8G8B8A8_UINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_R8G8B8A8_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_R8G8B8A8_USCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
||||
{VK_FORMAT_R8G8B8_SINT, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
||||
{VK_FORMAT_R8G8B8_SNORM, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
||||
{VK_FORMAT_R8G8B8_SRGB, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
||||
{VK_FORMAT_R8G8B8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
||||
{VK_FORMAT_R8G8B8_UINT, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
||||
{VK_FORMAT_R8G8B8_UNORM, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
||||
{VK_FORMAT_R8G8B8_USCALED, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
||||
{VK_FORMAT_R8G8_SINT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_R8G8_SNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_R8G8_SRGB, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_R8G8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_R8G8_UINT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_R8G8_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_R8G8_USCALED, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
||||
{VK_FORMAT_R8_SINT, FORMAT_COMPATIBILITY_CLASS::_8BIT},
|
||||
{VK_FORMAT_R8_SNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT},
|
||||
{VK_FORMAT_R8_SRGB, FORMAT_COMPATIBILITY_CLASS::_8BIT},
|
||||
{VK_FORMAT_R8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_8BIT},
|
||||
{VK_FORMAT_R8_UINT, FORMAT_COMPATIBILITY_CLASS::_8BIT},
|
||||
{VK_FORMAT_R8_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT},
|
||||
{VK_FORMAT_R8_USCALED, FORMAT_COMPATIBILITY_CLASS::_8BIT},
|
||||
{VK_FORMAT_S8_UINT, FORMAT_COMPATIBILITY_CLASS::S8},
|
||||
{VK_FORMAT_X8_D24_UNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::D24},
|
||||
{VK_FORMAT_UNDEFINED, FORMAT_COMPATIBILITY_CLASS::NONE},
|
||||
};
|
||||
/// Returns true if the two formats are compatible according to Vulkan's format compatibility rules
|
||||
bool IsVulkanFormatCompatible(vk::Format base, vk::Format view);
|
||||
|
||||
/**
|
||||
* @return If the two formats are compatible according to Vulkan's format compatibility rules
|
||||
* @url
|
||||
* https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#formats-compatibility
|
||||
*/
|
||||
static bool IsVulkanFormatCompatible(vk::Format lhs, vk::Format rhs) {
|
||||
if (lhs == rhs) {
|
||||
return true;
|
||||
}
|
||||
return vkFormatClassTable.at(VkFormat(lhs)) == vkFormatClassTable.at(VkFormat(rhs));
|
||||
}
|
||||
} // namespace VideoCore
|
||||
|
@ -14,62 +14,6 @@ namespace VideoCore {
|
||||
|
||||
using namespace Vulkan;
|
||||
|
||||
bool ImageInfo::IsBlockCoded() const {
|
||||
switch (pixel_format) {
|
||||
case vk::Format::eBc1RgbaSrgbBlock:
|
||||
case vk::Format::eBc1RgbaUnormBlock:
|
||||
case vk::Format::eBc1RgbSrgbBlock:
|
||||
case vk::Format::eBc1RgbUnormBlock:
|
||||
case vk::Format::eBc2SrgbBlock:
|
||||
case vk::Format::eBc2UnormBlock:
|
||||
case vk::Format::eBc3SrgbBlock:
|
||||
case vk::Format::eBc3UnormBlock:
|
||||
case vk::Format::eBc4SnormBlock:
|
||||
case vk::Format::eBc4UnormBlock:
|
||||
case vk::Format::eBc5SnormBlock:
|
||||
case vk::Format::eBc5UnormBlock:
|
||||
case vk::Format::eBc6HSfloatBlock:
|
||||
case vk::Format::eBc6HUfloatBlock:
|
||||
case vk::Format::eBc7SrgbBlock:
|
||||
case vk::Format::eBc7UnormBlock:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool ImageInfo::IsPacked() const {
|
||||
switch (pixel_format) {
|
||||
case vk::Format::eB5G5R5A1UnormPack16:
|
||||
[[fallthrough]];
|
||||
case vk::Format::eB5G6R5UnormPack16:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool ImageInfo::IsDepthStencil() const {
|
||||
switch (pixel_format) {
|
||||
case vk::Format::eD16Unorm:
|
||||
case vk::Format::eD16UnormS8Uint:
|
||||
case vk::Format::eD32Sfloat:
|
||||
case vk::Format::eD32SfloatS8Uint:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool ImageInfo::HasStencil() const {
|
||||
if (pixel_format == vk::Format::eD32SfloatS8Uint ||
|
||||
pixel_format == vk::Format::eD24UnormS8Uint ||
|
||||
pixel_format == vk::Format::eD16UnormS8Uint) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) {
|
||||
vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferSrc |
|
||||
vk::ImageUsageFlagBits::eTransferDst |
|
||||
@ -161,6 +105,10 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||
if (info.props.is_volume) {
|
||||
flags |= vk::ImageCreateFlagBits::e2DArrayCompatible;
|
||||
}
|
||||
// Not supported by MoltenVK.
|
||||
if (info.props.is_block && instance->GetDriverID() != vk::DriverId::eMoltenvk) {
|
||||
flags |= vk::ImageCreateFlagBits::eBlockTexelViewCompatible;
|
||||
}
|
||||
|
||||
usage_flags = ImageUsageFlags(info);
|
||||
format_features = FormatFeatureFlags(usage_flags);
|
||||
@ -372,9 +320,9 @@ void Image::CopyImage(const Image& image) {
|
||||
|
||||
boost::container::small_vector<vk::ImageCopy, 14> image_copy{};
|
||||
for (u32 m = 0; m < image.info.resources.levels; ++m) {
|
||||
const auto mip_w = std::max(info.size.width >> m, 1u);
|
||||
const auto mip_h = std::max(info.size.height >> m, 1u);
|
||||
const auto mip_d = std::max(info.size.depth >> m, 1u);
|
||||
const auto mip_w = std::max(image.info.size.width >> m, 1u);
|
||||
const auto mip_h = std::max(image.info.size.height >> m, 1u);
|
||||
const auto mip_d = std::max(image.info.size.depth >> m, 1u);
|
||||
|
||||
image_copy.emplace_back(vk::ImageCopy{
|
||||
.srcSubresource{
|
||||
|
@ -81,7 +81,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
||||
tiling_mode = buffer.GetTilingMode();
|
||||
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt());
|
||||
num_samples = buffer.NumSamples();
|
||||
num_bits = NumBits(buffer.GetDataFmt());
|
||||
num_bits = NumBitsPerBlock(buffer.GetDataFmt());
|
||||
type = vk::ImageType::e2D;
|
||||
size.width = hint.Valid() ? hint.width : buffer.Pitch();
|
||||
size.height = hint.Valid() ? hint.height : buffer.Height();
|
||||
@ -142,7 +142,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& de
|
||||
resources.levels = image.NumLevels();
|
||||
resources.layers = image.NumLayers();
|
||||
num_samples = image.NumSamples();
|
||||
num_bits = NumBits(image.GetDataFmt());
|
||||
num_bits = NumBitsPerBlock(image.GetDataFmt());
|
||||
|
||||
guest_address = image.Address();
|
||||
|
||||
@ -152,6 +152,80 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& de
|
||||
UpdateSize();
|
||||
}
|
||||
|
||||
bool ImageInfo::IsBlockCoded() const {
|
||||
switch (pixel_format) {
|
||||
case vk::Format::eBc1RgbaSrgbBlock:
|
||||
case vk::Format::eBc1RgbaUnormBlock:
|
||||
case vk::Format::eBc1RgbSrgbBlock:
|
||||
case vk::Format::eBc1RgbUnormBlock:
|
||||
case vk::Format::eBc2SrgbBlock:
|
||||
case vk::Format::eBc2UnormBlock:
|
||||
case vk::Format::eBc3SrgbBlock:
|
||||
case vk::Format::eBc3UnormBlock:
|
||||
case vk::Format::eBc4SnormBlock:
|
||||
case vk::Format::eBc4UnormBlock:
|
||||
case vk::Format::eBc5SnormBlock:
|
||||
case vk::Format::eBc5UnormBlock:
|
||||
case vk::Format::eBc6HSfloatBlock:
|
||||
case vk::Format::eBc6HUfloatBlock:
|
||||
case vk::Format::eBc7SrgbBlock:
|
||||
case vk::Format::eBc7UnormBlock:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool ImageInfo::IsPacked() const {
|
||||
switch (pixel_format) {
|
||||
case vk::Format::eB5G5R5A1UnormPack16:
|
||||
[[fallthrough]];
|
||||
case vk::Format::eB5G6R5UnormPack16:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool ImageInfo::IsDepthStencil() const {
|
||||
switch (pixel_format) {
|
||||
case vk::Format::eD16Unorm:
|
||||
case vk::Format::eD16UnormS8Uint:
|
||||
case vk::Format::eD32Sfloat:
|
||||
case vk::Format::eD32SfloatS8Uint:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool ImageInfo::HasStencil() const {
|
||||
if (pixel_format == vk::Format::eD32SfloatS8Uint ||
|
||||
pixel_format == vk::Format::eD24UnormS8Uint ||
|
||||
pixel_format == vk::Format::eD16UnormS8Uint) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ImageInfo::IsCompatible(const ImageInfo& info) const {
|
||||
return (pixel_format == info.pixel_format && num_samples == info.num_samples &&
|
||||
num_bits == info.num_bits);
|
||||
}
|
||||
|
||||
bool ImageInfo::IsTilingCompatible(u32 lhs, u32 rhs) const {
|
||||
if (lhs == rhs) {
|
||||
return true;
|
||||
}
|
||||
if (lhs == 0x0e && rhs == 0x0d) {
|
||||
return true;
|
||||
}
|
||||
if (lhs == 0x0d && rhs == 0x0e) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void ImageInfo::UpdateSize() {
|
||||
mips_layout.clear();
|
||||
MipInfo mip_info{};
|
||||
@ -163,7 +237,6 @@ void ImageInfo::UpdateSize() {
|
||||
if (props.is_block) {
|
||||
mip_w = (mip_w + 3) / 4;
|
||||
mip_h = (mip_h + 3) / 4;
|
||||
bpp *= 16;
|
||||
}
|
||||
mip_w = std::max(mip_w, 1u);
|
||||
mip_h = std::max(mip_h, 1u);
|
||||
|
@ -25,6 +25,11 @@ struct ImageInfo {
|
||||
bool IsTiled() const {
|
||||
return tiling_mode != AmdGpu::TilingMode::Display_Linear;
|
||||
}
|
||||
Extent3D BlockDim() const {
|
||||
const u32 shift = props.is_block ? 2 : 0;
|
||||
return Extent3D{size.width >> shift, size.height >> shift, size.depth};
|
||||
}
|
||||
|
||||
bool IsBlockCoded() const;
|
||||
bool IsPacked() const;
|
||||
bool IsDepthStencil() const;
|
||||
@ -33,24 +38,8 @@ struct ImageInfo {
|
||||
s32 MipOf(const ImageInfo& info) const;
|
||||
s32 SliceOf(const ImageInfo& info, s32 mip) const;
|
||||
|
||||
/// Verifies if images are compatible for subresource merging.
|
||||
bool IsCompatible(const ImageInfo& info) const {
|
||||
return (pixel_format == info.pixel_format && num_samples == info.num_samples &&
|
||||
num_bits == info.num_bits);
|
||||
}
|
||||
|
||||
bool IsTilingCompatible(u32 lhs, u32 rhs) const {
|
||||
if (lhs == rhs) {
|
||||
return true;
|
||||
}
|
||||
if (lhs == 0x0e && rhs == 0x0d) {
|
||||
return true;
|
||||
}
|
||||
if (lhs == 0x0d && rhs == 0x0e) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
bool IsCompatible(const ImageInfo& info) const;
|
||||
bool IsTilingCompatible(u32 lhs, u32 rhs) const;
|
||||
|
||||
void UpdateSize();
|
||||
|
||||
|
@ -199,7 +199,8 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
|
||||
scheduler.CurrentTick() - tex_cache_image.tick_accessed_last > NumFramesBeforeRemoval;
|
||||
|
||||
if (image_info.guest_address == tex_cache_image.info.guest_address) { // Equal address
|
||||
if (image_info.size != tex_cache_image.info.size) {
|
||||
if (image_info.BlockDim() != tex_cache_image.info.BlockDim() ||
|
||||
image_info.num_bits != tex_cache_image.info.num_bits) {
|
||||
// Very likely this kind of overlap is caused by allocation from a pool.
|
||||
if (safe_to_delete) {
|
||||
FreeImage(cache_image_id);
|
||||
@ -211,15 +212,19 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
|
||||
return {depth_image_id, -1, -1};
|
||||
}
|
||||
|
||||
if (image_info.IsBlockCoded() && !tex_cache_image.info.IsBlockCoded()) {
|
||||
// Compressed view of uncompressed image with same block size.
|
||||
// We need to recreate the image with compressed format and copy.
|
||||
return {ExpandImage(image_info, cache_image_id), -1, -1};
|
||||
}
|
||||
|
||||
if (image_info.pixel_format != tex_cache_image.info.pixel_format ||
|
||||
image_info.guest_size <= tex_cache_image.info.guest_size) {
|
||||
auto result_id = merged_image_id ? merged_image_id : cache_image_id;
|
||||
const auto& result_image = slot_images[result_id];
|
||||
return {
|
||||
IsVulkanFormatCompatible(image_info.pixel_format, result_image.info.pixel_format)
|
||||
? result_id
|
||||
: ImageId{},
|
||||
-1, -1};
|
||||
const bool is_compatible =
|
||||
IsVulkanFormatCompatible(result_image.info.pixel_format, image_info.pixel_format);
|
||||
return {is_compatible ? result_id : ImageId{}, -1, -1};
|
||||
}
|
||||
|
||||
if (image_info.type == tex_cache_image.info.type &&
|
||||
@ -299,6 +304,7 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) {
|
||||
auto& new_image = slot_images[new_image_id];
|
||||
|
||||
src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
|
||||
RefreshImage(new_image);
|
||||
new_image.CopyImage(src_image);
|
||||
|
||||
if (src_image.binding.is_bound || src_image.binding.is_target) {
|
||||
@ -339,7 +345,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) {
|
||||
continue;
|
||||
}
|
||||
if (False(flags & FindFlags::RelaxFmt) &&
|
||||
(!IsVulkanFormatCompatible(info.pixel_format, cache_image.info.pixel_format) ||
|
||||
(!IsVulkanFormatCompatible(cache_image.info.pixel_format, info.pixel_format) ||
|
||||
(cache_image.info.type != info.type && info.size != Extent3D{1, 1, 1}))) {
|
||||
continue;
|
||||
}
|
||||
@ -511,9 +517,9 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
|
||||
// So this calculation should be very uncommon and reasonably fast
|
||||
// For now we'll just check up to 64 first pixels
|
||||
const auto addr = std::bit_cast<u8*>(image.info.guest_address);
|
||||
const auto w = std::min(image.info.size.width, u32(8));
|
||||
const auto h = std::min(image.info.size.height, u32(8));
|
||||
const auto size = w * h * image.info.num_bits / 8;
|
||||
const u32 w = std::min(image.info.size.width, u32(8));
|
||||
const u32 h = std::min(image.info.size.height, u32(8));
|
||||
const u32 size = w * h * image.info.num_bits >> (3 + image.info.props.is_block ? 4 : 0);
|
||||
const u64 hash = XXH3_64bits(addr, size);
|
||||
if (image.hash == hash) {
|
||||
image.flags &= ~ImageFlagBits::MaybeCpuDirty;
|
||||
|
@ -25,10 +25,9 @@
|
||||
namespace VideoCore {
|
||||
|
||||
const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const {
|
||||
const auto bpp = info.num_bits * (info.props.is_block ? 16 : 1);
|
||||
switch (info.tiling_mode) {
|
||||
case AmdGpu::TilingMode::Texture_MicroTiled:
|
||||
switch (bpp) {
|
||||
switch (info.num_bits) {
|
||||
case 8:
|
||||
return &detilers[DetilerType::Micro8];
|
||||
case 16:
|
||||
@ -43,7 +42,7 @@ const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const {
|
||||
return nullptr;
|
||||
}
|
||||
case AmdGpu::TilingMode::Texture_Volume:
|
||||
switch (bpp) {
|
||||
switch (info.num_bits) {
|
||||
case 8:
|
||||
return &detilers[DetilerType::Macro8];
|
||||
case 32:
|
||||
@ -55,7 +54,7 @@ const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const {
|
||||
}
|
||||
break;
|
||||
case AmdGpu::TilingMode::Display_MicroTiled:
|
||||
switch (bpp) {
|
||||
switch (info.num_bits) {
|
||||
case 64:
|
||||
return &detilers[DetilerType::Display_Micro64];
|
||||
default:
|
||||
@ -71,7 +70,7 @@ struct DetilerParams {
|
||||
u32 num_levels;
|
||||
u32 pitch0;
|
||||
u32 height;
|
||||
u32 sizes[14];
|
||||
std::array<u32, 16> sizes;
|
||||
};
|
||||
|
||||
TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler)
|
||||
@ -270,13 +269,16 @@ std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o
|
||||
params.height = info.size.height;
|
||||
if (info.tiling_mode == AmdGpu::TilingMode::Texture_Volume ||
|
||||
info.tiling_mode == AmdGpu::TilingMode::Display_MicroTiled) {
|
||||
ASSERT(info.resources.levels == 1);
|
||||
if (info.resources.levels != 1) {
|
||||
LOG_ERROR(Render_Vulkan, "Unexpected mipmaps for volume and display tilings {}",
|
||||
info.resources.levels);
|
||||
}
|
||||
const auto tiles_per_row = info.pitch / 8u;
|
||||
const auto tiles_per_slice = tiles_per_row * ((info.size.height + 7u) / 8u);
|
||||
params.sizes[0] = tiles_per_row;
|
||||
params.sizes[1] = tiles_per_slice;
|
||||
} else {
|
||||
ASSERT(info.resources.levels <= 14);
|
||||
ASSERT(info.resources.levels <= params.sizes.size());
|
||||
std::memset(¶ms.sizes, 0, sizeof(params.sizes));
|
||||
for (int m = 0; m < info.resources.levels; ++m) {
|
||||
params.sizes[m] = info.mips_layout[m].size + (m > 0 ? params.sizes[m - 1] : 0);
|
||||
@ -287,8 +289,7 @@ std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o
|
||||
¶ms);
|
||||
|
||||
ASSERT((image_size % 64) == 0);
|
||||
const auto bpp = info.num_bits * (info.props.is_block ? 16u : 1u);
|
||||
const auto num_tiles = image_size / (64 * (bpp / 8));
|
||||
const auto num_tiles = image_size / (64 * (info.num_bits / 8));
|
||||
cmdbuf.dispatch(num_tiles, 1, 1);
|
||||
return {out_buffer.first, 0};
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user