mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-23 18:45:36 +00:00
Merge branch 'main' into m4aac
This commit is contained in:
commit
dd7c1cbd86
36
.github/workflows/build.yml
vendored
36
.github/workflows/build.yml
vendored
@ -76,18 +76,13 @@ jobs:
|
|||||||
${{ env.cache-name }}-
|
${{ env.cache-name }}-
|
||||||
|
|
||||||
- name: Cache CMake Build
|
- name: Cache CMake Build
|
||||||
uses: hendrikmuhs/ccache-action@v1.2.17
|
uses: hendrikmuhs/ccache-action@v1.2.18
|
||||||
env:
|
env:
|
||||||
cache-name: ${{ runner.os }}-sdl-cache-cmake-build
|
cache-name: ${{ runner.os }}-sdl-cache-cmake-build
|
||||||
with:
|
with:
|
||||||
append-timestamp: false
|
append-timestamp: false
|
||||||
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
|
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
|
||||||
|
|
||||||
- name: Setup VS Environment
|
|
||||||
uses: ilammy/msvc-dev-cmd@v1.13.0
|
|
||||||
with:
|
|
||||||
arch: amd64
|
|
||||||
|
|
||||||
- name: Configure CMake
|
- name: Configure CMake
|
||||||
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||||
|
|
||||||
@ -111,7 +106,7 @@ jobs:
|
|||||||
- name: Setup Qt
|
- name: Setup Qt
|
||||||
uses: jurplel/install-qt-action@v4
|
uses: jurplel/install-qt-action@v4
|
||||||
with:
|
with:
|
||||||
version: 6.9.0
|
version: 6.9.1
|
||||||
host: windows
|
host: windows
|
||||||
target: desktop
|
target: desktop
|
||||||
arch: win64_msvc2022_64
|
arch: win64_msvc2022_64
|
||||||
@ -130,18 +125,13 @@ jobs:
|
|||||||
${{ env.cache-name }}-
|
${{ env.cache-name }}-
|
||||||
|
|
||||||
- name: Cache CMake Build
|
- name: Cache CMake Build
|
||||||
uses: hendrikmuhs/ccache-action@v1.2.17
|
uses: hendrikmuhs/ccache-action@v1.2.18
|
||||||
env:
|
env:
|
||||||
cache-name: ${{ runner.os }}-qt-cache-cmake-build
|
cache-name: ${{ runner.os }}-qt-cache-cmake-build
|
||||||
with:
|
with:
|
||||||
append-timestamp: false
|
append-timestamp: false
|
||||||
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
|
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
|
||||||
|
|
||||||
- name: Setup VS Environment
|
|
||||||
uses: ilammy/msvc-dev-cmd@v1.13.0
|
|
||||||
with:
|
|
||||||
arch: amd64
|
|
||||||
|
|
||||||
- name: Configure CMake
|
- name: Configure CMake
|
||||||
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||||
|
|
||||||
@ -186,7 +176,7 @@ jobs:
|
|||||||
${{ env.cache-name }}-
|
${{ env.cache-name }}-
|
||||||
|
|
||||||
- name: Cache CMake Build
|
- name: Cache CMake Build
|
||||||
uses: hendrikmuhs/ccache-action@v1.2.17
|
uses: hendrikmuhs/ccache-action@v1.2.18
|
||||||
env:
|
env:
|
||||||
cache-name: ${{runner.os}}-sdl-cache-cmake-build
|
cache-name: ${{runner.os}}-sdl-cache-cmake-build
|
||||||
with:
|
with:
|
||||||
@ -228,7 +218,7 @@ jobs:
|
|||||||
- name: Setup Qt
|
- name: Setup Qt
|
||||||
uses: jurplel/install-qt-action@v4
|
uses: jurplel/install-qt-action@v4
|
||||||
with:
|
with:
|
||||||
version: 6.9.0
|
version: 6.9.1
|
||||||
host: mac
|
host: mac
|
||||||
target: desktop
|
target: desktop
|
||||||
arch: clang_64
|
arch: clang_64
|
||||||
@ -247,7 +237,7 @@ jobs:
|
|||||||
${{ env.cache-name }}-
|
${{ env.cache-name }}-
|
||||||
|
|
||||||
- name: Cache CMake Build
|
- name: Cache CMake Build
|
||||||
uses: hendrikmuhs/ccache-action@v1.2.17
|
uses: hendrikmuhs/ccache-action@v1.2.18
|
||||||
env:
|
env:
|
||||||
cache-name: ${{runner.os}}-qt-cache-cmake-build
|
cache-name: ${{runner.os}}-qt-cache-cmake-build
|
||||||
with:
|
with:
|
||||||
@ -301,7 +291,7 @@ jobs:
|
|||||||
${{ env.cache-name }}-
|
${{ env.cache-name }}-
|
||||||
|
|
||||||
- name: Cache CMake Build
|
- name: Cache CMake Build
|
||||||
uses: hendrikmuhs/ccache-action@v1.2.17
|
uses: hendrikmuhs/ccache-action@v1.2.18
|
||||||
env:
|
env:
|
||||||
cache-name: ${{ runner.os }}-sdl-cache-cmake-build
|
cache-name: ${{ runner.os }}-sdl-cache-cmake-build
|
||||||
with:
|
with:
|
||||||
@ -362,7 +352,7 @@ jobs:
|
|||||||
${{ env.cache-name }}-
|
${{ env.cache-name }}-
|
||||||
|
|
||||||
- name: Cache CMake Build
|
- name: Cache CMake Build
|
||||||
uses: hendrikmuhs/ccache-action@v1.2.17
|
uses: hendrikmuhs/ccache-action@v1.2.18
|
||||||
env:
|
env:
|
||||||
cache-name: ${{ runner.os }}-qt-cache-cmake-build
|
cache-name: ${{ runner.os }}-qt-cache-cmake-build
|
||||||
with:
|
with:
|
||||||
@ -409,7 +399,7 @@ jobs:
|
|||||||
${{ env.cache-name }}-
|
${{ env.cache-name }}-
|
||||||
|
|
||||||
- name: Cache CMake Build
|
- name: Cache CMake Build
|
||||||
uses: hendrikmuhs/ccache-action@v1.2.17
|
uses: hendrikmuhs/ccache-action@v1.2.18
|
||||||
env:
|
env:
|
||||||
cache-name: ${{ runner.os }}-sdl-gcc-cache-cmake-build
|
cache-name: ${{ runner.os }}-sdl-gcc-cache-cmake-build
|
||||||
with:
|
with:
|
||||||
@ -445,7 +435,7 @@ jobs:
|
|||||||
${{ env.cache-name }}-
|
${{ env.cache-name }}-
|
||||||
|
|
||||||
- name: Cache CMake Build
|
- name: Cache CMake Build
|
||||||
uses: hendrikmuhs/ccache-action@v1.2.17
|
uses: hendrikmuhs/ccache-action@v1.2.18
|
||||||
env:
|
env:
|
||||||
cache-name: ${{ runner.os }}-qt-gcc-cache-cmake-build
|
cache-name: ${{ runner.os }}-qt-gcc-cache-cmake-build
|
||||||
with:
|
with:
|
||||||
@ -494,7 +484,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
token: ${{ secrets.SHADPS4_TOKEN_REPO }}
|
token: ${{ secrets.SHADPS4_TOKEN_REPO }}
|
||||||
name: "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}"
|
name: "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}"
|
||||||
tag: "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}"
|
tag: "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.fullhash }}"
|
||||||
draft: false
|
draft: false
|
||||||
prerelease: true
|
prerelease: true
|
||||||
body: "Full Changelog: [${{ env.last_release_tag }}...${{ needs.get-info.outputs.shorthash }}](https://github.com/shadps4-emu/shadPS4/compare/${{ env.last_release_tag }}...${{ needs.get-info.outputs.fullhash }})"
|
body: "Full Changelog: [${{ env.last_release_tag }}...${{ needs.get-info.outputs.shorthash }}](https://github.com/shadps4-emu/shadPS4/compare/${{ env.last_release_tag }}...${{ needs.get-info.outputs.fullhash }})"
|
||||||
@ -530,14 +520,14 @@ jobs:
|
|||||||
|
|
||||||
# Check if release already exists and get ID
|
# Check if release already exists and get ID
|
||||||
release_id=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \
|
release_id=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \
|
||||||
"https://api.github.com/repos/$REPO/releases/tags/Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}" | jq -r '.id')
|
"https://api.github.com/repos/$REPO/releases/tags/Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.fullhash }}" | jq -r '.id')
|
||||||
|
|
||||||
if [[ "$release_id" == "null" ]]; then
|
if [[ "$release_id" == "null" ]]; then
|
||||||
echo "Creating release in $REPO for $filename"
|
echo "Creating release in $REPO for $filename"
|
||||||
release_id=$(curl -s -X POST -H "Authorization: token $GITHUB_TOKEN" \
|
release_id=$(curl -s -X POST -H "Authorization: token $GITHUB_TOKEN" \
|
||||||
-H "Accept: application/vnd.github.v3+json" \
|
-H "Accept: application/vnd.github.v3+json" \
|
||||||
-d '{
|
-d '{
|
||||||
"tag_name": "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}",
|
"tag_name": "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.fullhash }}",
|
||||||
"name": "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}",
|
"name": "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}",
|
||||||
"draft": false,
|
"draft": false,
|
||||||
"prerelease": true,
|
"prerelease": true,
|
||||||
|
@ -966,6 +966,7 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
|
|||||||
src/video_core/texture_cache/tile_manager.cpp
|
src/video_core/texture_cache/tile_manager.cpp
|
||||||
src/video_core/texture_cache/tile_manager.h
|
src/video_core/texture_cache/tile_manager.h
|
||||||
src/video_core/texture_cache/types.h
|
src/video_core/texture_cache/types.h
|
||||||
|
src/video_core/texture_cache/host_compatibility.cpp
|
||||||
src/video_core/texture_cache/host_compatibility.h
|
src/video_core/texture_cache/host_compatibility.h
|
||||||
src/video_core/page_manager.cpp
|
src/video_core/page_manager.cpp
|
||||||
src/video_core/page_manager.h
|
src/video_core/page_manager.h
|
||||||
|
@ -88,7 +88,8 @@ static bool FilterTcbAccess(const ZydisDecodedOperand* operands) {
|
|||||||
dst_op.reg.value <= ZYDIS_REGISTER_R15;
|
dst_op.reg.value <= ZYDIS_REGISTER_R15;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void GenerateTcbAccess(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
|
static void GenerateTcbAccess(void* /* address */, const ZydisDecodedOperand* operands,
|
||||||
|
Xbyak::CodeGenerator& c) {
|
||||||
const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
|
const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
|
||||||
|
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
@ -126,7 +127,8 @@ static bool FilterNoSSE4a(const ZydisDecodedOperand*) {
|
|||||||
return !cpu.has(Cpu::tSSE4a);
|
return !cpu.has(Cpu::tSSE4a);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void GenerateEXTRQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
|
static void GenerateEXTRQ(void* /* address */, const ZydisDecodedOperand* operands,
|
||||||
|
Xbyak::CodeGenerator& c) {
|
||||||
bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
|
bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
|
||||||
operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
|
operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
|
||||||
|
|
||||||
@ -245,7 +247,8 @@ static void GenerateEXTRQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenera
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void GenerateINSERTQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
|
static void GenerateINSERTQ(void* /* address */, const ZydisDecodedOperand* operands,
|
||||||
|
Xbyak::CodeGenerator& c) {
|
||||||
bool immediateForm = operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
|
bool immediateForm = operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
|
||||||
operands[3].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
|
operands[3].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
|
||||||
|
|
||||||
@ -383,8 +386,44 @@ static void GenerateINSERTQ(const ZydisDecodedOperand* operands, Xbyak::CodeGene
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void ReplaceMOVNT(void* address, u8 rep_prefix) {
|
||||||
|
// Find the opcode byte
|
||||||
|
// There can be any amount of prefixes but the instruction can't be more than 15 bytes
|
||||||
|
// And we know for sure this is a MOVNTSS/MOVNTSD
|
||||||
|
bool found = false;
|
||||||
|
bool rep_prefix_found = false;
|
||||||
|
int index = 0;
|
||||||
|
u8* ptr = reinterpret_cast<u8*>(address);
|
||||||
|
for (int i = 0; i < 15; i++) {
|
||||||
|
if (ptr[i] == rep_prefix) {
|
||||||
|
rep_prefix_found = true;
|
||||||
|
} else if (ptr[i] == 0x2B) {
|
||||||
|
index = i;
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Some sanity checks
|
||||||
|
ASSERT(found);
|
||||||
|
ASSERT(index >= 2);
|
||||||
|
ASSERT(ptr[index - 1] == 0x0F);
|
||||||
|
ASSERT(rep_prefix_found);
|
||||||
|
|
||||||
|
// This turns the MOVNTSS/MOVNTSD to a MOVSS/MOVSD m, xmm
|
||||||
|
ptr[index] = 0x11;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ReplaceMOVNTSS(void* address, const ZydisDecodedOperand*, Xbyak::CodeGenerator&) {
|
||||||
|
ReplaceMOVNT(address, 0xF3);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ReplaceMOVNTSD(void* address, const ZydisDecodedOperand*, Xbyak::CodeGenerator&) {
|
||||||
|
ReplaceMOVNT(address, 0xF2);
|
||||||
|
}
|
||||||
|
|
||||||
using PatchFilter = bool (*)(const ZydisDecodedOperand*);
|
using PatchFilter = bool (*)(const ZydisDecodedOperand*);
|
||||||
using InstructionGenerator = void (*)(const ZydisDecodedOperand*, Xbyak::CodeGenerator&);
|
using InstructionGenerator = void (*)(void*, const ZydisDecodedOperand*, Xbyak::CodeGenerator&);
|
||||||
struct PatchInfo {
|
struct PatchInfo {
|
||||||
/// Filter for more granular patch conditions past just the instruction mnemonic.
|
/// Filter for more granular patch conditions past just the instruction mnemonic.
|
||||||
PatchFilter filter;
|
PatchFilter filter;
|
||||||
@ -400,6 +439,8 @@ static const std::unordered_map<ZydisMnemonic, PatchInfo> Patches = {
|
|||||||
// SSE4a
|
// SSE4a
|
||||||
{ZYDIS_MNEMONIC_EXTRQ, {FilterNoSSE4a, GenerateEXTRQ, true}},
|
{ZYDIS_MNEMONIC_EXTRQ, {FilterNoSSE4a, GenerateEXTRQ, true}},
|
||||||
{ZYDIS_MNEMONIC_INSERTQ, {FilterNoSSE4a, GenerateINSERTQ, true}},
|
{ZYDIS_MNEMONIC_INSERTQ, {FilterNoSSE4a, GenerateINSERTQ, true}},
|
||||||
|
{ZYDIS_MNEMONIC_MOVNTSS, {FilterNoSSE4a, ReplaceMOVNTSS, false}},
|
||||||
|
{ZYDIS_MNEMONIC_MOVNTSD, {FilterNoSSE4a, ReplaceMOVNTSD, false}},
|
||||||
|
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
// Windows needs a trampoline.
|
// Windows needs a trampoline.
|
||||||
@ -477,7 +518,7 @@ static std::pair<bool, u64> TryPatch(u8* code, PatchModule* module) {
|
|||||||
auto& trampoline_gen = module->trampoline_gen;
|
auto& trampoline_gen = module->trampoline_gen;
|
||||||
const auto trampoline_ptr = trampoline_gen.getCurr();
|
const auto trampoline_ptr = trampoline_gen.getCurr();
|
||||||
|
|
||||||
patch_info.generator(operands, trampoline_gen);
|
patch_info.generator(code, operands, trampoline_gen);
|
||||||
|
|
||||||
// Return to the following instruction at the end of the trampoline.
|
// Return to the following instruction at the end of the trampoline.
|
||||||
trampoline_gen.jmp(code + instruction.length);
|
trampoline_gen.jmp(code + instruction.length);
|
||||||
@ -485,7 +526,7 @@ static std::pair<bool, u64> TryPatch(u8* code, PatchModule* module) {
|
|||||||
// Replace instruction with near jump to the trampoline.
|
// Replace instruction with near jump to the trampoline.
|
||||||
patch_gen.jmp(trampoline_ptr, Xbyak::CodeGenerator::LabelType::T_NEAR);
|
patch_gen.jmp(trampoline_ptr, Xbyak::CodeGenerator::LabelType::T_NEAR);
|
||||||
} else {
|
} else {
|
||||||
patch_info.generator(operands, patch_gen);
|
patch_info.generator(code, operands, patch_gen);
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto patch_size = patch_gen.getCurr() - code;
|
const auto patch_size = patch_gen.getCurr() - code;
|
||||||
|
@ -273,6 +273,10 @@ void RegisterKernel(Core::Loader::SymbolsResolver* sym) {
|
|||||||
Libraries::Net::sceNetInetNtop); // TODO fix it to sys_ ...
|
Libraries::Net::sceNetInetNtop); // TODO fix it to sys_ ...
|
||||||
LIB_FUNCTION("4n51s0zEf0c", "libScePosix", 1, "libkernel", 1, 1,
|
LIB_FUNCTION("4n51s0zEf0c", "libScePosix", 1, "libkernel", 1, 1,
|
||||||
Libraries::Net::sceNetInetPton); // TODO fix it to sys_ ...
|
Libraries::Net::sceNetInetPton); // TODO fix it to sys_ ...
|
||||||
|
LIB_FUNCTION("XVL8So3QJUk", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_connect);
|
||||||
|
LIB_FUNCTION("3e+4Iv7IJ8U", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_accept);
|
||||||
|
LIB_FUNCTION("aNeavPDNKzA", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_sendmsg);
|
||||||
|
LIB_FUNCTION("pxnCmagrtao", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_listen);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Libraries::Kernel
|
} // namespace Libraries::Kernel
|
||||||
|
@ -222,9 +222,10 @@ s32 PS4_SYSV_ABI sceKernelMapDirectMemory2(void** addr, u64 len, s32 type, s32 p
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t len, int prot,
|
s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags,
|
||||||
int flags, const char* name) {
|
const char* name) {
|
||||||
|
LOG_INFO(Kernel_Vmm, "in_addr = {}, len = {:#x}, prot = {:#x}, flags = {:#x}, name = '{}'",
|
||||||
|
fmt::ptr(*addr_in_out), len, prot, flags, name);
|
||||||
if (len == 0 || !Common::Is16KBAligned(len)) {
|
if (len == 0 || !Common::Is16KBAligned(len)) {
|
||||||
LOG_ERROR(Kernel_Vmm, "len is 0 or not 16kb multiple");
|
LOG_ERROR(Kernel_Vmm, "len is 0 or not 16kb multiple");
|
||||||
return ORBIS_KERNEL_ERROR_EINVAL;
|
return ORBIS_KERNEL_ERROR_EINVAL;
|
||||||
@ -243,18 +244,14 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t
|
|||||||
const VAddr in_addr = reinterpret_cast<VAddr>(*addr_in_out);
|
const VAddr in_addr = reinterpret_cast<VAddr>(*addr_in_out);
|
||||||
const auto mem_prot = static_cast<Core::MemoryProt>(prot);
|
const auto mem_prot = static_cast<Core::MemoryProt>(prot);
|
||||||
const auto map_flags = static_cast<Core::MemoryMapFlags>(flags);
|
const auto map_flags = static_cast<Core::MemoryMapFlags>(flags);
|
||||||
SCOPE_EXIT {
|
|
||||||
LOG_INFO(Kernel_Vmm,
|
|
||||||
"in_addr = {:#x}, out_addr = {}, len = {:#x}, prot = {:#x}, flags = {:#x}",
|
|
||||||
in_addr, fmt::ptr(*addr_in_out), len, prot, flags);
|
|
||||||
};
|
|
||||||
auto* memory = Core::Memory::Instance();
|
auto* memory = Core::Memory::Instance();
|
||||||
return memory->MapMemory(addr_in_out, in_addr, len, mem_prot, map_flags,
|
const auto ret = memory->MapMemory(addr_in_out, in_addr, len, mem_prot, map_flags,
|
||||||
Core::VMAType::Flexible, name);
|
Core::VMAType::Flexible, name);
|
||||||
|
LOG_INFO(Kernel_Vmm, "out_addr = {}", fmt::ptr(*addr_in_out));
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot,
|
s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags) {
|
||||||
int flags) {
|
|
||||||
return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, "anon");
|
return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, "anon");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -663,6 +660,9 @@ int PS4_SYSV_ABI sceKernelSetPrtAperture(int id, VAddr address, size_t size) {
|
|||||||
"PRT aperture id = {}, address = {:#x}, size = {:#x} is set but not used", id,
|
"PRT aperture id = {}, address = {:#x}, size = {:#x} is set but not used", id,
|
||||||
address, size);
|
address, size);
|
||||||
|
|
||||||
|
auto* memory = Core::Memory::Instance();
|
||||||
|
memory->SetPrtArea(id, address, size);
|
||||||
|
|
||||||
PrtApertures[id] = {address, size};
|
PrtApertures[id] = {address, size};
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
@ -141,10 +141,9 @@ s32 PS4_SYSV_ABI sceKernelAvailableDirectMemorySize(u64 searchStart, u64 searchE
|
|||||||
s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtualQueryInfo* info,
|
s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtualQueryInfo* info,
|
||||||
size_t infoSize);
|
size_t infoSize);
|
||||||
s32 PS4_SYSV_ABI sceKernelReserveVirtualRange(void** addr, u64 len, int flags, u64 alignment);
|
s32 PS4_SYSV_ABI sceKernelReserveVirtualRange(void** addr, u64 len, int flags, u64 alignment);
|
||||||
s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addrInOut, std::size_t len, int prot,
|
s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags,
|
||||||
int flags, const char* name);
|
const char* name);
|
||||||
s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot,
|
s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags);
|
||||||
int flags);
|
|
||||||
int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot);
|
int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot);
|
||||||
|
|
||||||
s32 PS4_SYSV_ABI sceKernelMprotect(const void* addr, u64 size, s32 prot);
|
s32 PS4_SYSV_ABI sceKernelMprotect(const void* addr, u64 size, s32 prot);
|
||||||
|
@ -426,6 +426,7 @@ void RegisterMutex(Core::Loader::SymbolsResolver* sym) {
|
|||||||
// Posix
|
// Posix
|
||||||
LIB_FUNCTION("ttHNfU+qDBU", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_init);
|
LIB_FUNCTION("ttHNfU+qDBU", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_init);
|
||||||
LIB_FUNCTION("7H0iTOciTLo", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_lock);
|
LIB_FUNCTION("7H0iTOciTLo", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_lock);
|
||||||
|
LIB_FUNCTION("Io9+nTKXZtA", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_timedlock);
|
||||||
LIB_FUNCTION("2Z+PpY6CaJg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_unlock);
|
LIB_FUNCTION("2Z+PpY6CaJg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_unlock);
|
||||||
LIB_FUNCTION("ltCfaGr2JGE", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_destroy);
|
LIB_FUNCTION("ltCfaGr2JGE", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_destroy);
|
||||||
LIB_FUNCTION("dQHWEsJtoE4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutexattr_init);
|
LIB_FUNCTION("dQHWEsJtoE4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutexattr_init);
|
||||||
|
@ -282,7 +282,12 @@ s32 PS4_SYSV_ABI sceVideoOutGetVblankStatus(int handle, SceVideoOutVblankStatus*
|
|||||||
|
|
||||||
s32 PS4_SYSV_ABI sceVideoOutGetResolutionStatus(s32 handle, SceVideoOutResolutionStatus* status) {
|
s32 PS4_SYSV_ABI sceVideoOutGetResolutionStatus(s32 handle, SceVideoOutResolutionStatus* status) {
|
||||||
LOG_INFO(Lib_VideoOut, "called");
|
LOG_INFO(Lib_VideoOut, "called");
|
||||||
*status = driver->GetPort(handle)->resolution;
|
auto* port = driver->GetPort(handle);
|
||||||
|
if (!port || !port->is_open) {
|
||||||
|
return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
*status = port->resolution;
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -51,7 +51,7 @@ void ZlibTaskThread(const std::stop_token& stop) {
|
|||||||
if (!task_queue_cv.wait(lock, stop, [&] { return !task_queue.empty(); })) {
|
if (!task_queue_cv.wait(lock, stop, [&] { return !task_queue.empty(); })) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
task = task_queue.back();
|
task = task_queue.front();
|
||||||
task_queue.pop();
|
task_queue.pop();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -136,7 +136,7 @@ s32 PS4_SYSV_ABI sceZlibWaitForDone(u64* request_id, const u32* timeout) {
|
|||||||
} else {
|
} else {
|
||||||
done_queue_cv.wait(lock, pred);
|
done_queue_cv.wait(lock, pred);
|
||||||
}
|
}
|
||||||
*request_id = done_queue.back();
|
*request_id = done_queue.front();
|
||||||
done_queue.pop();
|
done_queue.pop();
|
||||||
}
|
}
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
|
@ -332,21 +332,22 @@ bool Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul
|
|||||||
sr.type = sym_type;
|
sr.type = sym_type;
|
||||||
|
|
||||||
const auto* record = m_hle_symbols.FindSymbol(sr);
|
const auto* record = m_hle_symbols.FindSymbol(sr);
|
||||||
if (!record) {
|
|
||||||
// Check if it an export function
|
|
||||||
const auto* p = FindExportedModule(*module, *library);
|
|
||||||
if (p && p->export_sym.GetSize() > 0) {
|
|
||||||
record = p->export_sym.FindSymbol(sr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (record) {
|
if (record) {
|
||||||
*return_info = *record;
|
*return_info = *record;
|
||||||
|
|
||||||
Core::Devtools::Widget::ModuleList::AddModule(sr.library);
|
Core::Devtools::Widget::ModuleList::AddModule(sr.library);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if it an export function
|
||||||
|
const auto* p = FindExportedModule(*module, *library);
|
||||||
|
if (p && p->export_sym.GetSize() > 0) {
|
||||||
|
record = p->export_sym.FindSymbol(sr);
|
||||||
|
if (record) {
|
||||||
|
*return_info = *record;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const auto aeronid = AeroLib::FindByNid(sr.name.c_str());
|
const auto aeronid = AeroLib::FindByNid(sr.name.c_str());
|
||||||
if (aeronid) {
|
if (aeronid) {
|
||||||
return_info->name = aeronid->name;
|
return_info->name = aeronid->name;
|
||||||
|
@ -95,6 +95,46 @@ u64 MemoryManager::ClampRangeSize(VAddr virtual_addr, u64 size) {
|
|||||||
return clamped_size;
|
return clamped_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MemoryManager::SetPrtArea(u32 id, VAddr address, u64 size) {
|
||||||
|
PrtArea& area = prt_areas[id];
|
||||||
|
if (area.mapped) {
|
||||||
|
rasterizer->UnmapMemory(area.start, area.end - area.start);
|
||||||
|
}
|
||||||
|
|
||||||
|
area.start = address;
|
||||||
|
area.end = address + size;
|
||||||
|
area.mapped = true;
|
||||||
|
|
||||||
|
// Pretend the entire PRT area is mapped to avoid GPU tracking errors.
|
||||||
|
// The caches will use CopySparseMemory to fetch data which avoids unmapped areas.
|
||||||
|
rasterizer->MapMemory(address, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void MemoryManager::CopySparseMemory(VAddr virtual_addr, u8* dest, u64 size) {
|
||||||
|
const bool is_sparse = std::ranges::any_of(
|
||||||
|
prt_areas, [&](const PrtArea& area) { return area.Overlaps(virtual_addr, size); });
|
||||||
|
if (!is_sparse) {
|
||||||
|
std::memcpy(dest, std::bit_cast<const u8*>(virtual_addr), size);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto vma = FindVMA(virtual_addr);
|
||||||
|
ASSERT_MSG(vma->second.Contains(virtual_addr, 0),
|
||||||
|
"Attempted to access invalid GPU address {:#x}", virtual_addr);
|
||||||
|
while (size) {
|
||||||
|
u64 copy_size = std::min<u64>(vma->second.size - (virtual_addr - vma->first), size);
|
||||||
|
if (vma->second.IsFree()) {
|
||||||
|
std::memset(dest, 0, copy_size);
|
||||||
|
} else {
|
||||||
|
std::memcpy(dest, std::bit_cast<const u8*>(virtual_addr), copy_size);
|
||||||
|
}
|
||||||
|
size -= copy_size;
|
||||||
|
virtual_addr += copy_size;
|
||||||
|
dest += copy_size;
|
||||||
|
++vma;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool MemoryManager::TryWriteBacking(void* address, const void* data, u32 num_bytes) {
|
bool MemoryManager::TryWriteBacking(void* address, const void* data, u32 num_bytes) {
|
||||||
const VAddr virtual_addr = std::bit_cast<VAddr>(address);
|
const VAddr virtual_addr = std::bit_cast<VAddr>(address);
|
||||||
const auto& vma = FindVMA(virtual_addr)->second;
|
const auto& vma = FindVMA(virtual_addr)->second;
|
||||||
@ -182,7 +222,6 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size,
|
|||||||
auto& area = CarveDmemArea(mapping_start, size)->second;
|
auto& area = CarveDmemArea(mapping_start, size)->second;
|
||||||
area.memory_type = memory_type;
|
area.memory_type = memory_type;
|
||||||
area.is_free = false;
|
area.is_free = false;
|
||||||
MergeAdjacent(dmem_map, dmem_area);
|
|
||||||
return mapping_start;
|
return mapping_start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -172,6 +172,10 @@ public:
|
|||||||
|
|
||||||
u64 ClampRangeSize(VAddr virtual_addr, u64 size);
|
u64 ClampRangeSize(VAddr virtual_addr, u64 size);
|
||||||
|
|
||||||
|
void SetPrtArea(u32 id, VAddr address, u64 size);
|
||||||
|
|
||||||
|
void CopySparseMemory(VAddr source, u8* dest, u64 size);
|
||||||
|
|
||||||
bool TryWriteBacking(void* address, const void* data, u32 num_bytes);
|
bool TryWriteBacking(void* address, const void* data, u32 num_bytes);
|
||||||
|
|
||||||
void SetupMemoryRegions(u64 flexible_size, bool use_extended_mem1, bool use_extended_mem2);
|
void SetupMemoryRegions(u64 flexible_size, bool use_extended_mem1, bool use_extended_mem2);
|
||||||
@ -275,6 +279,18 @@ private:
|
|||||||
size_t pool_budget{};
|
size_t pool_budget{};
|
||||||
Vulkan::Rasterizer* rasterizer{};
|
Vulkan::Rasterizer* rasterizer{};
|
||||||
|
|
||||||
|
struct PrtArea {
|
||||||
|
VAddr start;
|
||||||
|
VAddr end;
|
||||||
|
bool mapped;
|
||||||
|
|
||||||
|
bool Overlaps(VAddr test_address, u64 test_size) const {
|
||||||
|
const VAddr overlap_end = test_address + test_size;
|
||||||
|
return start < overlap_end && test_address < end;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
std::array<PrtArea, 3> prt_areas{};
|
||||||
|
|
||||||
friend class ::Core::Devtools::Widget::MemoryMapViewer;
|
friend class ::Core::Devtools::Widget::MemoryMapViewer;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
|
|
||||||
#include <set>
|
#include <set>
|
||||||
#include "common/singleton.h"
|
#include "common/singleton.h"
|
||||||
|
#include "common/types.h"
|
||||||
|
|
||||||
namespace Core {
|
namespace Core {
|
||||||
|
|
||||||
|
@ -137,7 +137,7 @@ tr("The Auto Updater allows up to 60 update checks per hour.\\nYou have reached
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
latestRev = latestVersion.right(7);
|
latestRev = latestVersion.right(40);
|
||||||
latestDate = jsonObj["published_at"].toString();
|
latestDate = jsonObj["published_at"].toString();
|
||||||
|
|
||||||
QJsonArray assets = jsonObj["assets"].toArray();
|
QJsonArray assets = jsonObj["assets"].toArray();
|
||||||
@ -167,7 +167,7 @@ tr("The Auto Updater allows up to 60 update checks per hour.\\nYou have reached
|
|||||||
QDateTime dateTime = QDateTime::fromString(latestDate, Qt::ISODate);
|
QDateTime dateTime = QDateTime::fromString(latestDate, Qt::ISODate);
|
||||||
latestDate = dateTime.isValid() ? dateTime.toString("yyyy-MM-dd HH:mm:ss") : "Unknown date";
|
latestDate = dateTime.isValid() ? dateTime.toString("yyyy-MM-dd HH:mm:ss") : "Unknown date";
|
||||||
|
|
||||||
if (latestRev == currentRev.left(7)) {
|
if (latestRev == currentRev) {
|
||||||
if (showMessage) {
|
if (showMessage) {
|
||||||
QMessageBox::information(this, tr("Auto Updater"),
|
QMessageBox::information(this, tr("Auto Updater"),
|
||||||
tr("Your version is already up to date!"));
|
tr("Your version is already up to date!"));
|
||||||
@ -215,7 +215,7 @@ void CheckUpdate::setupUI(const QString& downloadUrl, const QString& latestDate,
|
|||||||
"<td>%3</td>"
|
"<td>%3</td>"
|
||||||
"<td>(%4)</td>"
|
"<td>(%4)</td>"
|
||||||
"</tr></table></p>")
|
"</tr></table></p>")
|
||||||
.arg(currentRev.left(7), currentDate, latestRev, latestDate);
|
.arg(currentRev.left(7), currentDate, latestRev.left(7), latestDate);
|
||||||
|
|
||||||
QLabel* updateLabel = new QLabel(updateText, this);
|
QLabel* updateLabel = new QLabel(updateText, this);
|
||||||
layout->addWidget(updateLabel);
|
layout->addWidget(updateLabel);
|
||||||
|
@ -2048,7 +2048,7 @@
|
|||||||
</message>
|
</message>
|
||||||
<message>
|
<message>
|
||||||
<source> * Unsupported Vulkan Version</source>
|
<source> * Unsupported Vulkan Version</source>
|
||||||
<translation type="unfinished"> * Unsupported Vulkan Version</translation>
|
<translation> * Versão do Vulkan não suportada</translation>
|
||||||
</message>
|
</message>
|
||||||
</context>
|
</context>
|
||||||
<context>
|
<context>
|
||||||
|
@ -138,7 +138,7 @@
|
|||||||
</message>
|
</message>
|
||||||
<message>
|
<message>
|
||||||
<source>File Exists</source>
|
<source>File Exists</source>
|
||||||
<translation>Dosya mevcut</translation>
|
<translation>Dosya Mevcut</translation>
|
||||||
</message>
|
</message>
|
||||||
<message>
|
<message>
|
||||||
<source>File already exists. Do you want to replace it?</source>
|
<source>File already exists. Do you want to replace it?</source>
|
||||||
@ -1221,7 +1221,7 @@
|
|||||||
</message>
|
</message>
|
||||||
<message>
|
<message>
|
||||||
<source>Exit shadPS4</source>
|
<source>Exit shadPS4</source>
|
||||||
<translation>shadPS4'ten Çık</translation>
|
<translation>shadPS4 Çıkış</translation>
|
||||||
</message>
|
</message>
|
||||||
<message>
|
<message>
|
||||||
<source>Exit the application.</source>
|
<source>Exit the application.</source>
|
||||||
@ -1381,7 +1381,7 @@
|
|||||||
</message>
|
</message>
|
||||||
<message>
|
<message>
|
||||||
<source>Game Boot</source>
|
<source>Game Boot</source>
|
||||||
<translation>Oyun Başlatma</translation>
|
<translation>Oyun Başlat</translation>
|
||||||
</message>
|
</message>
|
||||||
<message>
|
<message>
|
||||||
<source>Only one file can be selected!</source>
|
<source>Only one file can be selected!</source>
|
||||||
|
@ -303,6 +303,11 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
|
|||||||
ctx.AddCapability(spv::Capability::PhysicalStorageBufferAddresses);
|
ctx.AddCapability(spv::Capability::PhysicalStorageBufferAddresses);
|
||||||
ctx.AddExtension("SPV_KHR_physical_storage_buffer");
|
ctx.AddExtension("SPV_KHR_physical_storage_buffer");
|
||||||
}
|
}
|
||||||
|
if (info.uses_shared && profile.supports_workgroup_explicit_memory_layout) {
|
||||||
|
ctx.AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
|
||||||
|
ctx.AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
|
||||||
|
ctx.AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
|
void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include "common/div_ceil.h"
|
||||||
|
#include "shader_recompiler/backend/spirv/emit_spirv_bounds.h"
|
||||||
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
||||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||||
|
|
||||||
@ -15,42 +17,40 @@ std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
|
|||||||
Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value,
|
Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value,
|
||||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
||||||
const Id shift_id{ctx.ConstU32(2U)};
|
const Id shift_id{ctx.ConstU32(2U)};
|
||||||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
|
||||||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
|
||||||
|
const Id pointer{
|
||||||
|
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)};
|
||||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||||
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
|
return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||||
|
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Id SharedAtomicU64(EmitContext& ctx, Id offset, Id value,
|
||||||
|
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
||||||
|
const Id shift_id{ctx.ConstU32(3U)};
|
||||||
|
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
|
||||||
|
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
|
||||||
|
const Id pointer{
|
||||||
|
ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
|
||||||
|
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||||
|
return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||||
|
return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
Id SharedAtomicU32_IncDec(EmitContext& ctx, Id offset,
|
Id SharedAtomicU32_IncDec(EmitContext& ctx, Id offset,
|
||||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) {
|
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) {
|
||||||
const Id shift_id{ctx.ConstU32(2U)};
|
const Id shift_id{ctx.ConstU32(2U)};
|
||||||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
|
||||||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
|
||||||
|
const Id pointer{
|
||||||
|
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)};
|
||||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||||
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics);
|
return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||||
}
|
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics);
|
||||||
|
});
|
||||||
Id BufferAtomicU32BoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto emit_func) {
|
|
||||||
if (Sirit::ValidId(buffer_size)) {
|
|
||||||
// Bounds checking enabled, wrap in a conditional branch to make sure that
|
|
||||||
// the atomic is not mistakenly executed when the index is out of bounds.
|
|
||||||
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer_size);
|
|
||||||
const Id ib_label = ctx.OpLabel();
|
|
||||||
const Id oob_label = ctx.OpLabel();
|
|
||||||
const Id end_label = ctx.OpLabel();
|
|
||||||
ctx.OpSelectionMerge(end_label, spv::SelectionControlMask::MaskNone);
|
|
||||||
ctx.OpBranchConditional(in_bounds, ib_label, oob_label);
|
|
||||||
ctx.AddLabel(ib_label);
|
|
||||||
const Id ib_result = emit_func();
|
|
||||||
ctx.OpBranch(end_label);
|
|
||||||
ctx.AddLabel(oob_label);
|
|
||||||
const Id oob_result = ctx.u32_zero_value;
|
|
||||||
ctx.OpBranch(end_label);
|
|
||||||
ctx.AddLabel(end_label);
|
|
||||||
return ctx.OpPhi(ctx.U32[1], ib_result, ib_label, oob_result, oob_label);
|
|
||||||
}
|
|
||||||
// Bounds checking not enabled, just perform the atomic operation.
|
|
||||||
return emit_func();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||||
@ -63,11 +63,42 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
|
|||||||
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
|
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
|
||||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
|
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
|
||||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||||
return BufferAtomicU32BoundsCheck(ctx, index, buffer.size_dwords, [&] {
|
return AccessBoundsCheck<32>(ctx, index, buffer.size_dwords, [&] {
|
||||||
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, value);
|
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, value);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id BufferAtomicU32CmpSwap(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||||
|
Id cmp_value,
|
||||||
|
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id, Id, Id)) {
|
||||||
|
const auto& buffer = ctx.buffers[handle];
|
||||||
|
if (Sirit::ValidId(buffer.offset)) {
|
||||||
|
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||||
|
}
|
||||||
|
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||||
|
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
|
||||||
|
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
|
||||||
|
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||||
|
return AccessBoundsCheck<32>(ctx, index, buffer.size_dwords, [&] {
|
||||||
|
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, semantics, value, cmp_value);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Id BufferAtomicU64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||||
|
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
||||||
|
const auto& buffer = ctx.buffers[handle];
|
||||||
|
if (Sirit::ValidId(buffer.offset)) {
|
||||||
|
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||||
|
}
|
||||||
|
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(3u));
|
||||||
|
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U64];
|
||||||
|
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
|
||||||
|
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||||
|
return AccessBoundsCheck<64>(ctx, index, buffer.size_qwords, [&] {
|
||||||
|
return (ctx.*atomic_func)(ctx.U64, ptr, scope, semantics, value);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value,
|
Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value,
|
||||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
||||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||||
@ -89,6 +120,10 @@ Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) {
|
|||||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicIAdd);
|
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicIAdd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitSharedAtomicIAdd64(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicIAdd);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) {
|
Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) {
|
||||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax);
|
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax);
|
||||||
}
|
}
|
||||||
@ -133,6 +168,10 @@ Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
|
|||||||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd);
|
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitBufferAtomicIAdd64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||||
|
return BufferAtomicU64(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin);
|
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin);
|
||||||
}
|
}
|
||||||
@ -175,6 +214,12 @@ Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
|
|||||||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicExchange);
|
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicExchange);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitBufferAtomicCmpSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||||
|
Id cmp_value) {
|
||||||
|
return BufferAtomicU32CmpSwap(ctx, inst, handle, address, value, cmp_value,
|
||||||
|
&Sirit::Module::OpAtomicCompareExchange);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
|
Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
|
||||||
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicIAdd);
|
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicIAdd);
|
||||||
}
|
}
|
||||||
|
48
src/shader_recompiler/backend/spirv/emit_spirv_bounds.h
Normal file
48
src/shader_recompiler/backend/spirv/emit_spirv_bounds.h
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
||||||
|
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||||
|
|
||||||
|
namespace Shader::Backend::SPIRV {
|
||||||
|
|
||||||
|
template <u32 bit_size>
|
||||||
|
auto AccessBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto emit_func) {
|
||||||
|
Id zero_value{};
|
||||||
|
Id result_type{};
|
||||||
|
if constexpr (bit_size == 64) {
|
||||||
|
zero_value = ctx.u64_zero_value;
|
||||||
|
result_type = ctx.U64;
|
||||||
|
} else if constexpr (bit_size == 32) {
|
||||||
|
zero_value = ctx.u32_zero_value;
|
||||||
|
result_type = ctx.U32[1];
|
||||||
|
} else if constexpr (bit_size == 16) {
|
||||||
|
zero_value = ctx.u16_zero_value;
|
||||||
|
result_type = ctx.U16;
|
||||||
|
} else {
|
||||||
|
static_assert(false, "type not supported");
|
||||||
|
}
|
||||||
|
if (Sirit::ValidId(buffer_size)) {
|
||||||
|
// Bounds checking enabled, wrap in a conditional branch to make sure that
|
||||||
|
// the atomic is not mistakenly executed when the index is out of bounds.
|
||||||
|
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer_size);
|
||||||
|
const Id ib_label = ctx.OpLabel();
|
||||||
|
const Id end_label = ctx.OpLabel();
|
||||||
|
ctx.OpSelectionMerge(end_label, spv::SelectionControlMask::MaskNone);
|
||||||
|
ctx.OpBranchConditional(in_bounds, ib_label, end_label);
|
||||||
|
const auto last_label = ctx.last_label;
|
||||||
|
ctx.AddLabel(ib_label);
|
||||||
|
const auto ib_result = emit_func();
|
||||||
|
ctx.OpBranch(end_label);
|
||||||
|
ctx.AddLabel(end_label);
|
||||||
|
if (Sirit::ValidId(ib_result)) {
|
||||||
|
return ctx.OpPhi(result_type, ib_result, ib_label, zero_value, last_label);
|
||||||
|
} else {
|
||||||
|
return Id{0};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Bounds checking not enabled, just perform the atomic operation.
|
||||||
|
return emit_func();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Shader::Backend::SPIRV
|
@ -86,6 +86,7 @@ void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
|
|||||||
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||||
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||||
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||||
|
Id EmitBufferAtomicIAdd64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||||
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||||
Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||||
Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||||
@ -96,6 +97,8 @@ Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
|
|||||||
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||||
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||||
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||||
|
Id EmitBufferAtomicCmpSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||||
|
Id cmp_value);
|
||||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index);
|
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index);
|
||||||
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
||||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
|
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
|
||||||
@ -118,11 +121,14 @@ Id EmitUndefU8(EmitContext& ctx);
|
|||||||
Id EmitUndefU16(EmitContext& ctx);
|
Id EmitUndefU16(EmitContext& ctx);
|
||||||
Id EmitUndefU32(EmitContext& ctx);
|
Id EmitUndefU32(EmitContext& ctx);
|
||||||
Id EmitUndefU64(EmitContext& ctx);
|
Id EmitUndefU64(EmitContext& ctx);
|
||||||
|
Id EmitLoadSharedU16(EmitContext& ctx, Id offset);
|
||||||
Id EmitLoadSharedU32(EmitContext& ctx, Id offset);
|
Id EmitLoadSharedU32(EmitContext& ctx, Id offset);
|
||||||
Id EmitLoadSharedU64(EmitContext& ctx, Id offset);
|
Id EmitLoadSharedU64(EmitContext& ctx, Id offset);
|
||||||
|
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value);
|
||||||
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value);
|
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value);
|
||||||
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value);
|
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value);
|
||||||
Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value);
|
||||||
|
Id EmitSharedAtomicIAdd64(EmitContext& ctx, Id offset, Id value);
|
||||||
Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value);
|
||||||
Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value);
|
||||||
Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value);
|
||||||
@ -372,6 +378,7 @@ Id EmitBitCount64(EmitContext& ctx, Id value);
|
|||||||
Id EmitBitwiseNot32(EmitContext& ctx, Id value);
|
Id EmitBitwiseNot32(EmitContext& ctx, Id value);
|
||||||
Id EmitFindSMsb32(EmitContext& ctx, Id value);
|
Id EmitFindSMsb32(EmitContext& ctx, Id value);
|
||||||
Id EmitFindUMsb32(EmitContext& ctx, Id value);
|
Id EmitFindUMsb32(EmitContext& ctx, Id value);
|
||||||
|
Id EmitFindUMsb64(EmitContext& ctx, Id value);
|
||||||
Id EmitFindILsb32(EmitContext& ctx, Id value);
|
Id EmitFindILsb32(EmitContext& ctx, Id value);
|
||||||
Id EmitFindILsb64(EmitContext& ctx, Id value);
|
Id EmitFindILsb64(EmitContext& ctx, Id value);
|
||||||
Id EmitSMin32(EmitContext& ctx, Id a, Id b);
|
Id EmitSMin32(EmitContext& ctx, Id a, Id b);
|
||||||
|
@ -229,6 +229,20 @@ Id EmitFindUMsb32(EmitContext& ctx, Id value) {
|
|||||||
return ctx.OpFindUMsb(ctx.U32[1], value);
|
return ctx.OpFindUMsb(ctx.U32[1], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitFindUMsb64(EmitContext& ctx, Id value) {
|
||||||
|
// Vulkan restricts some bitwise operations to 32-bit only, so decompose into
|
||||||
|
// two 32-bit values and select the correct result.
|
||||||
|
const Id unpacked{ctx.OpBitcast(ctx.U32[2], value)};
|
||||||
|
const Id hi{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 1U)};
|
||||||
|
const Id lo{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 0U)};
|
||||||
|
const Id hi_msb{ctx.OpFindUMsb(ctx.U32[1], hi)};
|
||||||
|
const Id lo_msb{ctx.OpFindUMsb(ctx.U32[1], lo)};
|
||||||
|
const Id found_hi{ctx.OpINotEqual(ctx.U1[1], hi_msb, ctx.ConstU32(u32(-1)))};
|
||||||
|
const Id shifted_hi{ctx.OpIAdd(ctx.U32[1], hi_msb, ctx.ConstU32(32u))};
|
||||||
|
// value == 0 case is checked in IREmitter
|
||||||
|
return ctx.OpSelect(ctx.U32[1], found_hi, shifted_hi, lo_msb);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitFindILsb32(EmitContext& ctx, Id value) {
|
Id EmitFindILsb32(EmitContext& ctx, Id value) {
|
||||||
return ctx.OpFindILsb(ctx.U32[1], value);
|
return ctx.OpFindILsb(ctx.U32[1], value);
|
||||||
}
|
}
|
||||||
|
@ -1,43 +1,86 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include "common/div_ceil.h"
|
||||||
|
#include "shader_recompiler/backend/spirv/emit_spirv_bounds.h"
|
||||||
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
||||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||||
|
|
||||||
namespace Shader::Backend::SPIRV {
|
namespace Shader::Backend::SPIRV {
|
||||||
|
|
||||||
|
Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
|
||||||
|
const Id shift_id{ctx.ConstU32(1U)};
|
||||||
|
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
|
||||||
|
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 2u)};
|
||||||
|
|
||||||
|
return AccessBoundsCheck<16>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||||
|
const Id pointer =
|
||||||
|
ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index);
|
||||||
|
return ctx.OpLoad(ctx.U16, pointer);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
|
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
|
||||||
const Id shift_id{ctx.ConstU32(2U)};
|
const Id shift_id{ctx.ConstU32(2U)};
|
||||||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
|
||||||
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index);
|
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
|
||||||
return ctx.OpLoad(ctx.U32[1], pointer);
|
|
||||||
|
return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||||
|
const Id pointer =
|
||||||
|
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index);
|
||||||
|
return ctx.OpLoad(ctx.U32[1], pointer);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
|
Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
|
||||||
const Id shift_id{ctx.ConstU32(2U)};
|
const Id shift_id{ctx.ConstU32(3U)};
|
||||||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
|
||||||
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))};
|
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
|
||||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
|
|
||||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
|
return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||||
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
|
const Id pointer{
|
||||||
ctx.OpLoad(ctx.U32[1], rhs_pointer));
|
ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
|
||||||
|
return ctx.OpLoad(ctx.U64, pointer);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
const Id shift{ctx.ConstU32(1U)};
|
||||||
|
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift)};
|
||||||
|
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 2u)};
|
||||||
|
|
||||||
|
AccessBoundsCheck<16>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||||
|
const Id pointer =
|
||||||
|
ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index);
|
||||||
|
ctx.OpStore(pointer, value);
|
||||||
|
return Id{0};
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
|
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
|
||||||
const Id shift{ctx.ConstU32(2U)};
|
const Id shift{ctx.ConstU32(2U)};
|
||||||
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift)};
|
||||||
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
|
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
|
||||||
ctx.OpStore(pointer, value);
|
|
||||||
|
AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||||
|
const Id pointer =
|
||||||
|
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index);
|
||||||
|
ctx.OpStore(pointer, value);
|
||||||
|
return Id{0};
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
|
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
|
||||||
const Id shift{ctx.ConstU32(2U)};
|
const Id shift{ctx.ConstU32(3U)};
|
||||||
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift)};
|
||||||
const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.ConstU32(1U))};
|
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
|
||||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
|
|
||||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
|
AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||||
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
|
const Id pointer{
|
||||||
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
|
ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
|
||||||
|
ctx.OpStore(pointer, value);
|
||||||
|
return Id{0};
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Shader::Backend::SPIRV
|
} // namespace Shader::Backend::SPIRV
|
||||||
|
@ -146,6 +146,7 @@ void EmitContext::DefineArithmeticTypes() {
|
|||||||
false_value = ConstantFalse(U1[1]);
|
false_value = ConstantFalse(U1[1]);
|
||||||
u8_one_value = Constant(U8, 1U);
|
u8_one_value = Constant(U8, 1U);
|
||||||
u8_zero_value = Constant(U8, 0U);
|
u8_zero_value = Constant(U8, 0U);
|
||||||
|
u16_zero_value = Constant(U16, 0U);
|
||||||
u32_one_value = ConstU32(1U);
|
u32_one_value = ConstU32(1U);
|
||||||
u32_zero_value = ConstU32(0U);
|
u32_zero_value = ConstU32(0U);
|
||||||
f32_zero_value = ConstF32(0.0f);
|
f32_zero_value = ConstF32(0.0f);
|
||||||
@ -285,6 +286,8 @@ void EmitContext::DefineBufferProperties() {
|
|||||||
Name(buffer.size_shorts, fmt::format("buf{}_short_size", binding));
|
Name(buffer.size_shorts, fmt::format("buf{}_short_size", binding));
|
||||||
buffer.size_dwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(2U));
|
buffer.size_dwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(2U));
|
||||||
Name(buffer.size_dwords, fmt::format("buf{}_dword_size", binding));
|
Name(buffer.size_dwords, fmt::format("buf{}_dword_size", binding));
|
||||||
|
buffer.size_qwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(3U));
|
||||||
|
Name(buffer.size_qwords, fmt::format("buf{}_qword_size", binding));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -307,7 +310,9 @@ void EmitContext::DefineInterpolatedAttribs() {
|
|||||||
const Id p2{OpCompositeExtract(F32[4], p_array, 2U)};
|
const Id p2{OpCompositeExtract(F32[4], p_array, 2U)};
|
||||||
const Id p10{OpFSub(F32[4], p1, p0)};
|
const Id p10{OpFSub(F32[4], p1, p0)};
|
||||||
const Id p20{OpFSub(F32[4], p2, p0)};
|
const Id p20{OpFSub(F32[4], p2, p0)};
|
||||||
const Id bary_coord{OpLoad(F32[3], gl_bary_coord_id)};
|
const Id bary_coord{OpLoad(F32[3], IsLinear(info.interp_qualifiers[i])
|
||||||
|
? bary_coord_linear_id
|
||||||
|
: bary_coord_persp_id)};
|
||||||
const Id bary_coord_y{OpCompositeExtract(F32[1], bary_coord, 1)};
|
const Id bary_coord_y{OpCompositeExtract(F32[1], bary_coord, 1)};
|
||||||
const Id bary_coord_z{OpCompositeExtract(F32[1], bary_coord, 2)};
|
const Id bary_coord_z{OpCompositeExtract(F32[1], bary_coord, 2)};
|
||||||
const Id p10_y{OpVectorTimesScalar(F32[4], p10, bary_coord_y)};
|
const Id p10_y{OpVectorTimesScalar(F32[4], p10, bary_coord_y)};
|
||||||
@ -411,8 +416,14 @@ void EmitContext::DefineInputs() {
|
|||||||
DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
||||||
}
|
}
|
||||||
if (profile.needs_manual_interpolation) {
|
if (profile.needs_manual_interpolation) {
|
||||||
gl_bary_coord_id =
|
if (info.has_perspective_interp) {
|
||||||
DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input);
|
bary_coord_persp_id =
|
||||||
|
DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input);
|
||||||
|
}
|
||||||
|
if (info.has_linear_interp) {
|
||||||
|
bary_coord_linear_id = DefineVariable(F32[3], spv::BuiltIn::BaryCoordNoPerspKHR,
|
||||||
|
spv::StorageClass::Input);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
|
for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
|
||||||
const auto& input = runtime_info.fs_info.inputs[i];
|
const auto& input = runtime_info.fs_info.inputs[i];
|
||||||
@ -435,9 +446,12 @@ void EmitContext::DefineInputs() {
|
|||||||
} else {
|
} else {
|
||||||
attr_id = DefineInput(type, semantic);
|
attr_id = DefineInput(type, semantic);
|
||||||
Name(attr_id, fmt::format("fs_in_attr{}", semantic));
|
Name(attr_id, fmt::format("fs_in_attr{}", semantic));
|
||||||
}
|
|
||||||
if (input.is_flat) {
|
if (input.is_flat) {
|
||||||
Decorate(attr_id, spv::Decoration::Flat);
|
Decorate(attr_id, spv::Decoration::Flat);
|
||||||
|
} else if (IsLinear(info.interp_qualifiers[i])) {
|
||||||
|
Decorate(attr_id, spv::Decoration::NoPerspective);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
input_params[semantic] =
|
input_params[semantic] =
|
||||||
GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false);
|
GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false);
|
||||||
@ -634,7 +648,8 @@ void EmitContext::DefineOutputs() {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case LogicalStage::Fragment:
|
case LogicalStage::Fragment: {
|
||||||
|
u32 num_render_targets = 0;
|
||||||
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
|
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
|
||||||
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
|
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
|
||||||
if (!info.stores.GetAny(mrt)) {
|
if (!info.stores.GetAny(mrt)) {
|
||||||
@ -643,11 +658,21 @@ void EmitContext::DefineOutputs() {
|
|||||||
const u32 num_components = info.stores.NumComponents(mrt);
|
const u32 num_components = info.stores.NumComponents(mrt);
|
||||||
const AmdGpu::NumberFormat num_format{runtime_info.fs_info.color_buffers[i].num_format};
|
const AmdGpu::NumberFormat num_format{runtime_info.fs_info.color_buffers[i].num_format};
|
||||||
const Id type{GetAttributeType(*this, num_format)[num_components]};
|
const Id type{GetAttributeType(*this, num_format)[num_components]};
|
||||||
const Id id{DefineOutput(type, i)};
|
Id id;
|
||||||
|
if (runtime_info.fs_info.dual_source_blending) {
|
||||||
|
id = DefineOutput(type, 0);
|
||||||
|
Decorate(id, spv::Decoration::Index, i);
|
||||||
|
} else {
|
||||||
|
id = DefineOutput(type, i);
|
||||||
|
}
|
||||||
Name(id, fmt::format("frag_color{}", i));
|
Name(id, fmt::format("frag_color{}", i));
|
||||||
frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true);
|
frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true);
|
||||||
|
++num_render_targets;
|
||||||
}
|
}
|
||||||
|
ASSERT_MSG(!runtime_info.fs_info.dual_source_blending || num_render_targets == 2,
|
||||||
|
"Dual source blending enabled, there must be exactly two MRT exports");
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case LogicalStage::Geometry: {
|
case LogicalStage::Geometry: {
|
||||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
||||||
|
|
||||||
@ -957,13 +982,27 @@ void EmitContext::DefineSharedMemory() {
|
|||||||
}
|
}
|
||||||
ASSERT(info.stage == Stage::Compute);
|
ASSERT(info.stage == Stage::Compute);
|
||||||
const u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
|
const u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
|
||||||
const u32 num_elements{Common::DivCeil(shared_memory_size, 4U)};
|
|
||||||
const Id type{TypeArray(U32[1], ConstU32(num_elements))};
|
const auto make_type = [&](Id element_type, u32 element_size) {
|
||||||
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
|
const u32 num_elements{Common::DivCeil(shared_memory_size, element_size)};
|
||||||
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
|
const Id array_type{TypeArray(element_type, ConstU32(num_elements))};
|
||||||
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
|
Decorate(array_type, spv::Decoration::ArrayStride, element_size);
|
||||||
Name(shared_memory_u32, "shared_mem");
|
|
||||||
interfaces.push_back(shared_memory_u32);
|
const Id struct_type{TypeStruct(array_type)};
|
||||||
|
MemberDecorate(struct_type, 0u, spv::Decoration::Offset, 0u);
|
||||||
|
Decorate(struct_type, spv::Decoration::Block);
|
||||||
|
|
||||||
|
const Id pointer = TypePointer(spv::StorageClass::Workgroup, struct_type);
|
||||||
|
const Id element_pointer = TypePointer(spv::StorageClass::Workgroup, element_type);
|
||||||
|
const Id variable = AddGlobalVariable(pointer, spv::StorageClass::Workgroup);
|
||||||
|
Decorate(variable, spv::Decoration::Aliased);
|
||||||
|
interfaces.push_back(variable);
|
||||||
|
|
||||||
|
return std::make_tuple(variable, element_pointer, pointer);
|
||||||
|
};
|
||||||
|
std::tie(shared_memory_u16, shared_u16, shared_memory_u16_type) = make_type(U16, 2u);
|
||||||
|
std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make_type(U32[1], 4u);
|
||||||
|
std::tie(shared_memory_u64, shared_u64, shared_memory_u64_type) = make_type(U64, 8u);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitContext::DefineFloat32ToUfloatM5(u32 mantissa_bits, const std::string_view name) {
|
Id EmitContext::DefineFloat32ToUfloatM5(u32 mantissa_bits, const std::string_view name) {
|
||||||
|
@ -235,17 +235,16 @@ public:
|
|||||||
Id false_value{};
|
Id false_value{};
|
||||||
Id u8_one_value{};
|
Id u8_one_value{};
|
||||||
Id u8_zero_value{};
|
Id u8_zero_value{};
|
||||||
|
Id u16_zero_value{};
|
||||||
Id u32_one_value{};
|
Id u32_one_value{};
|
||||||
Id u32_zero_value{};
|
Id u32_zero_value{};
|
||||||
Id f32_zero_value{};
|
Id f32_zero_value{};
|
||||||
Id u64_one_value{};
|
Id u64_one_value{};
|
||||||
Id u64_zero_value{};
|
Id u64_zero_value{};
|
||||||
|
|
||||||
Id shared_u8{};
|
|
||||||
Id shared_u16{};
|
Id shared_u16{};
|
||||||
Id shared_u32{};
|
Id shared_u32{};
|
||||||
Id shared_u32x2{};
|
Id shared_u64{};
|
||||||
Id shared_u32x4{};
|
|
||||||
|
|
||||||
Id input_u32{};
|
Id input_u32{};
|
||||||
Id input_f32{};
|
Id input_f32{};
|
||||||
@ -285,16 +284,16 @@ public:
|
|||||||
Id image_u32{};
|
Id image_u32{};
|
||||||
Id image_f32{};
|
Id image_f32{};
|
||||||
|
|
||||||
Id shared_memory_u8{};
|
|
||||||
Id shared_memory_u16{};
|
Id shared_memory_u16{};
|
||||||
Id shared_memory_u32{};
|
Id shared_memory_u32{};
|
||||||
Id shared_memory_u32x2{};
|
Id shared_memory_u64{};
|
||||||
Id shared_memory_u32x4{};
|
|
||||||
|
|
||||||
|
Id shared_memory_u16_type{};
|
||||||
Id shared_memory_u32_type{};
|
Id shared_memory_u32_type{};
|
||||||
|
Id shared_memory_u64_type{};
|
||||||
|
|
||||||
Id interpolate_func{};
|
Id bary_coord_persp_id{};
|
||||||
Id gl_bary_coord_id{};
|
Id bary_coord_linear_id{};
|
||||||
|
|
||||||
struct TextureDefinition {
|
struct TextureDefinition {
|
||||||
const VectorIds* data_types;
|
const VectorIds* data_types;
|
||||||
@ -320,6 +319,7 @@ public:
|
|||||||
Id size;
|
Id size;
|
||||||
Id size_shorts;
|
Id size_shorts;
|
||||||
Id size_dwords;
|
Id size_dwords;
|
||||||
|
Id size_qwords;
|
||||||
std::array<BufferSpv, u32(PointerType::NumAlias)> aliases;
|
std::array<BufferSpv, u32(PointerType::NumAlias)> aliases;
|
||||||
|
|
||||||
const BufferSpv& operator[](PointerType alias) const {
|
const BufferSpv& operator[](PointerType alias) const {
|
||||||
|
@ -67,6 +67,9 @@ CopyShaderData ParseCopyShader(std::span<const u32> code) {
|
|||||||
|
|
||||||
if (last_attr != IR::Attribute::Position0) {
|
if (last_attr != IR::Attribute::Position0) {
|
||||||
data.num_attrs = static_cast<u32>(last_attr) - static_cast<u32>(IR::Attribute::Param0) + 1;
|
data.num_attrs = static_cast<u32>(last_attr) - static_cast<u32>(IR::Attribute::Param0) + 1;
|
||||||
|
const auto it = data.attr_map.begin();
|
||||||
|
const u32 comp_stride = std::next(it)->first - it->first;
|
||||||
|
data.output_vertices = comp_stride / 64;
|
||||||
}
|
}
|
||||||
|
|
||||||
return data;
|
return data;
|
||||||
|
@ -3,8 +3,8 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <map>
|
||||||
#include <span>
|
#include <span>
|
||||||
#include <unordered_map>
|
|
||||||
|
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "shader_recompiler/ir/attribute.h"
|
#include "shader_recompiler/ir/attribute.h"
|
||||||
@ -12,8 +12,9 @@
|
|||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
|
||||||
struct CopyShaderData {
|
struct CopyShaderData {
|
||||||
std::unordered_map<u32, std::pair<Shader::IR::Attribute, u32>> attr_map;
|
std::map<u32, std::pair<Shader::IR::Attribute, u32>> attr_map;
|
||||||
u32 num_attrs{0};
|
u32 num_attrs{0};
|
||||||
|
u32 output_vertices{0};
|
||||||
};
|
};
|
||||||
|
|
||||||
CopyShaderData ParseCopyShader(std::span<const u32> code);
|
CopyShaderData ParseCopyShader(std::span<const u32> code);
|
||||||
|
@ -605,11 +605,12 @@ public:
|
|||||||
Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_)
|
Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_)
|
||||||
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_},
|
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_},
|
||||||
syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_},
|
syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_},
|
||||||
runtime_info{runtime_info_}, profile{profile_} {
|
runtime_info{runtime_info_}, profile{profile_},
|
||||||
|
translator{info_, runtime_info_, profile_} {
|
||||||
Visit(root_stmt, nullptr, nullptr);
|
Visit(root_stmt, nullptr, nullptr);
|
||||||
|
|
||||||
IR::Block& first_block{*syntax_list.front().data.block};
|
IR::Block* first_block = syntax_list.front().data.block;
|
||||||
Translator{&first_block, info, runtime_info, profile}.EmitPrologue();
|
translator.EmitPrologue(first_block);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -637,8 +638,8 @@ private:
|
|||||||
current_block->has_multiple_predecessors = stmt.block->num_predecessors > 1;
|
current_block->has_multiple_predecessors = stmt.block->num_predecessors > 1;
|
||||||
const u32 start = stmt.block->begin_index;
|
const u32 start = stmt.block->begin_index;
|
||||||
const u32 size = stmt.block->end_index - start + 1;
|
const u32 size = stmt.block->end_index - start + 1;
|
||||||
Translate(current_block, stmt.block->begin, inst_list.subspan(start, size),
|
translator.Translate(current_block, stmt.block->begin,
|
||||||
info, runtime_info, profile);
|
inst_list.subspan(start, size));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -820,6 +821,7 @@ private:
|
|||||||
Info& info;
|
Info& info;
|
||||||
const RuntimeInfo& runtime_info;
|
const RuntimeInfo& runtime_info;
|
||||||
const Profile& profile;
|
const Profile& profile;
|
||||||
|
Translator translator;
|
||||||
};
|
};
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
@ -13,6 +13,8 @@ void Translator::EmitDataShare(const GcnInst& inst) {
|
|||||||
// DS
|
// DS
|
||||||
case Opcode::DS_ADD_U32:
|
case Opcode::DS_ADD_U32:
|
||||||
return DS_ADD_U32(inst, false);
|
return DS_ADD_U32(inst, false);
|
||||||
|
case Opcode::DS_ADD_U64:
|
||||||
|
return DS_ADD_U64(inst, false);
|
||||||
case Opcode::DS_SUB_U32:
|
case Opcode::DS_SUB_U32:
|
||||||
return DS_SUB_U32(inst, false);
|
return DS_SUB_U32(inst, false);
|
||||||
case Opcode::DS_INC_U32:
|
case Opcode::DS_INC_U32:
|
||||||
@ -61,10 +63,14 @@ void Translator::EmitDataShare(const GcnInst& inst) {
|
|||||||
return DS_READ(32, false, true, false, inst);
|
return DS_READ(32, false, true, false, inst);
|
||||||
case Opcode::DS_READ2ST64_B32:
|
case Opcode::DS_READ2ST64_B32:
|
||||||
return DS_READ(32, false, true, true, inst);
|
return DS_READ(32, false, true, true, inst);
|
||||||
|
case Opcode::DS_READ_U16:
|
||||||
|
return DS_READ(16, false, false, false, inst);
|
||||||
case Opcode::DS_CONSUME:
|
case Opcode::DS_CONSUME:
|
||||||
return DS_CONSUME(inst);
|
return DS_CONSUME(inst);
|
||||||
case Opcode::DS_APPEND:
|
case Opcode::DS_APPEND:
|
||||||
return DS_APPEND(inst);
|
return DS_APPEND(inst);
|
||||||
|
case Opcode::DS_WRITE_B16:
|
||||||
|
return DS_WRITE(16, false, false, false, inst);
|
||||||
case Opcode::DS_WRITE_B64:
|
case Opcode::DS_WRITE_B64:
|
||||||
return DS_WRITE(64, false, false, false, inst);
|
return DS_WRITE(64, false, false, false, inst);
|
||||||
case Opcode::DS_WRITE2_B64:
|
case Opcode::DS_WRITE2_B64:
|
||||||
@ -123,6 +129,18 @@ void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::DS_ADD_U64(const GcnInst& inst, bool rtn) {
|
||||||
|
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||||
|
const IR::U64 data{GetSrc64(inst.src[1])};
|
||||||
|
const IR::U32 offset =
|
||||||
|
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
||||||
|
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||||
|
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
|
||||||
|
if (rtn) {
|
||||||
|
SetDst64(inst.dst[0], IR::U64{original_val});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Translator::DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn) {
|
void Translator::DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn) {
|
||||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||||
const IR::U32 data{GetSrc(inst.src[1])};
|
const IR::U32 data{GetSrc(inst.src[1])};
|
||||||
@ -201,23 +219,28 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid
|
|||||||
if (bit_size == 32) {
|
if (bit_size == 32) {
|
||||||
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
|
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
|
||||||
} else {
|
} else {
|
||||||
ir.WriteShared(
|
ir.WriteShared(64,
|
||||||
64, ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)),
|
ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data0),
|
||||||
addr0);
|
ir.GetVectorReg(data0 + 1))),
|
||||||
|
addr0);
|
||||||
}
|
}
|
||||||
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
|
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
|
||||||
if (bit_size == 32) {
|
if (bit_size == 32) {
|
||||||
ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
|
ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
|
||||||
} else {
|
} else {
|
||||||
ir.WriteShared(
|
ir.WriteShared(64,
|
||||||
64, ir.CompositeConstruct(ir.GetVectorReg(data1), ir.GetVectorReg(data1 + 1)),
|
ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data1),
|
||||||
addr1);
|
ir.GetVectorReg(data1 + 1))),
|
||||||
|
addr1);
|
||||||
}
|
}
|
||||||
} else if (bit_size == 64) {
|
} else if (bit_size == 64) {
|
||||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
||||||
const IR::Value data =
|
const IR::Value data =
|
||||||
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
|
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
|
||||||
ir.WriteShared(bit_size, data, addr0);
|
ir.WriteShared(bit_size, ir.PackUint2x32(data), addr0);
|
||||||
|
} else if (bit_size == 16) {
|
||||||
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
||||||
|
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
|
||||||
} else {
|
} else {
|
||||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
||||||
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
|
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
|
||||||
@ -289,22 +312,29 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride
|
|||||||
if (bit_size == 32) {
|
if (bit_size == 32) {
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{data0});
|
ir.SetVectorReg(dst_reg++, IR::U32{data0});
|
||||||
} else {
|
} else {
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 0)});
|
const auto vector = ir.UnpackUint2x32(IR::U64{data0});
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 1)});
|
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)});
|
||||||
|
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 1)});
|
||||||
}
|
}
|
||||||
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
|
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
|
||||||
const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1);
|
const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1);
|
||||||
if (bit_size == 32) {
|
if (bit_size == 32) {
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{data1});
|
ir.SetVectorReg(dst_reg++, IR::U32{data1});
|
||||||
} else {
|
} else {
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 0)});
|
const auto vector = ir.UnpackUint2x32(IR::U64{data1});
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 1)});
|
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)});
|
||||||
|
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 1)});
|
||||||
}
|
}
|
||||||
} else if (bit_size == 64) {
|
} else if (bit_size == 64) {
|
||||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
||||||
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0);
|
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0);
|
||||||
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)});
|
const auto vector = ir.UnpackUint2x32(IR::U64{data});
|
||||||
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(data, 1)});
|
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(vector, 0)});
|
||||||
|
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(vector, 1)});
|
||||||
|
} else if (bit_size == 16) {
|
||||||
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
||||||
|
const IR::U16 data = IR::U16{ir.LoadShared(bit_size, is_signed, addr0)};
|
||||||
|
ir.SetVectorReg(dst_reg, ir.UConvert(32, data));
|
||||||
} else {
|
} else {
|
||||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
||||||
const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr0)};
|
const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr0)};
|
||||||
|
@ -26,8 +26,11 @@ void Translator::ExportMrtValue(IR::Attribute attribute, u32 comp, const IR::F32
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value) {
|
void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value) {
|
||||||
const u32 color_buffer_idx =
|
u32 color_buffer_idx =
|
||||||
static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0);
|
static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0);
|
||||||
|
if (runtime_info.fs_info.dual_source_blending && attribute == IR::Attribute::RenderTarget1) {
|
||||||
|
color_buffer_idx = 0;
|
||||||
|
}
|
||||||
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
|
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
|
||||||
|
|
||||||
AmdGpu::NumberFormat num_format;
|
AmdGpu::NumberFormat num_format;
|
||||||
@ -68,8 +71,11 @@ void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Translator::ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) {
|
void Translator::ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) {
|
||||||
const u32 color_buffer_idx =
|
u32 color_buffer_idx =
|
||||||
static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0);
|
static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0);
|
||||||
|
if (runtime_info.fs_info.dual_source_blending && attribute == IR::Attribute::RenderTarget1) {
|
||||||
|
color_buffer_idx = 0;
|
||||||
|
}
|
||||||
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
|
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
|
||||||
const auto swizzled_comp = SwizzleMrtComponent(color_buffer, comp);
|
const auto swizzled_comp = SwizzleMrtComponent(color_buffer, comp);
|
||||||
|
|
||||||
|
@ -114,6 +114,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
|
|||||||
return S_FF1_I32_B64(inst);
|
return S_FF1_I32_B64(inst);
|
||||||
case Opcode::S_FLBIT_I32_B32:
|
case Opcode::S_FLBIT_I32_B32:
|
||||||
return S_FLBIT_I32_B32(inst);
|
return S_FLBIT_I32_B32(inst);
|
||||||
|
case Opcode::S_FLBIT_I32_B64:
|
||||||
|
return S_FLBIT_I32_B64(inst);
|
||||||
case Opcode::S_BITSET0_B32:
|
case Opcode::S_BITSET0_B32:
|
||||||
return S_BITSET_B32(inst, 0);
|
return S_BITSET_B32(inst, 0);
|
||||||
case Opcode::S_BITSET1_B32:
|
case Opcode::S_BITSET1_B32:
|
||||||
@ -686,6 +688,17 @@ void Translator::S_FLBIT_I32_B32(const GcnInst& inst) {
|
|||||||
SetDst(inst.dst[0], IR::U32{ir.Select(cond, pos_from_left, ir.Imm32(~0U))});
|
SetDst(inst.dst[0], IR::U32{ir.Select(cond, pos_from_left, ir.Imm32(~0U))});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::S_FLBIT_I32_B64(const GcnInst& inst) {
|
||||||
|
const IR::U64 src0{GetSrc64(inst.src[0])};
|
||||||
|
// Gcn wants the MSB position counting from the left, but SPIR-V counts from the rightmost (LSB)
|
||||||
|
// position
|
||||||
|
const IR::U32 msb_pos = ir.FindUMsb(src0);
|
||||||
|
const IR::U32 pos_from_left = ir.ISub(ir.Imm32(63), msb_pos);
|
||||||
|
// Select 0xFFFFFFFF if src0 was 0
|
||||||
|
const IR::U1 cond = ir.INotEqual(src0, ir.Imm64(u64(0u)));
|
||||||
|
SetDst(inst.dst[0], IR::U32{ir.Select(cond, pos_from_left, ir.Imm32(~0U))});
|
||||||
|
}
|
||||||
|
|
||||||
void Translator::S_BITSET_B32(const GcnInst& inst, u32 bit_value) {
|
void Translator::S_BITSET_B32(const GcnInst& inst, u32 bit_value) {
|
||||||
const IR::U32 old_value{GetSrc(inst.dst[0])};
|
const IR::U32 old_value{GetSrc(inst.dst[0])};
|
||||||
const IR::U32 offset{ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0U), ir.Imm32(5U))};
|
const IR::U32 offset{ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0U), ir.Imm32(5U))};
|
||||||
|
@ -21,16 +21,60 @@
|
|||||||
|
|
||||||
namespace Shader::Gcn {
|
namespace Shader::Gcn {
|
||||||
|
|
||||||
static u32 next_vgpr_num;
|
Translator::Translator(Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_)
|
||||||
static std::unordered_map<u32, IR::VectorReg> vgpr_map;
|
: info{info_}, runtime_info{runtime_info_}, profile{profile_},
|
||||||
|
next_vgpr_num{runtime_info.num_allocated_vgprs} {
|
||||||
Translator::Translator(IR::Block* block_, Info& info_, const RuntimeInfo& runtime_info_,
|
if (info.l_stage == LogicalStage::Fragment) {
|
||||||
const Profile& profile_)
|
dst_frag_vreg = GatherInterpQualifiers();
|
||||||
: ir{*block_, block_->begin()}, info{info_}, runtime_info{runtime_info_}, profile{profile_} {
|
}
|
||||||
next_vgpr_num = vgpr_map.empty() ? runtime_info.num_allocated_vgprs : next_vgpr_num;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::EmitPrologue() {
|
IR::VectorReg Translator::GatherInterpQualifiers() {
|
||||||
|
u32 dst_vreg{};
|
||||||
|
if (runtime_info.fs_info.addr_flags.persp_sample_ena) {
|
||||||
|
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveSample; // I
|
||||||
|
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveSample; // J
|
||||||
|
info.has_perspective_interp = true;
|
||||||
|
}
|
||||||
|
if (runtime_info.fs_info.addr_flags.persp_center_ena) {
|
||||||
|
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCenter; // I
|
||||||
|
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCenter; // J
|
||||||
|
info.has_perspective_interp = true;
|
||||||
|
}
|
||||||
|
if (runtime_info.fs_info.addr_flags.persp_centroid_ena) {
|
||||||
|
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCentroid; // I
|
||||||
|
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCentroid; // J
|
||||||
|
info.has_perspective_interp = true;
|
||||||
|
}
|
||||||
|
if (runtime_info.fs_info.addr_flags.persp_pull_model_ena) {
|
||||||
|
++dst_vreg; // I/W
|
||||||
|
++dst_vreg; // J/W
|
||||||
|
++dst_vreg; // 1/W
|
||||||
|
}
|
||||||
|
if (runtime_info.fs_info.addr_flags.linear_sample_ena) {
|
||||||
|
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearSample; // I
|
||||||
|
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearSample; // J
|
||||||
|
info.has_linear_interp = true;
|
||||||
|
}
|
||||||
|
if (runtime_info.fs_info.addr_flags.linear_center_ena) {
|
||||||
|
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCenter; // I
|
||||||
|
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCenter; // J
|
||||||
|
info.has_linear_interp = true;
|
||||||
|
}
|
||||||
|
if (runtime_info.fs_info.addr_flags.linear_centroid_ena) {
|
||||||
|
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCentroid; // I
|
||||||
|
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCentroid; // J
|
||||||
|
info.has_linear_interp = true;
|
||||||
|
}
|
||||||
|
if (runtime_info.fs_info.addr_flags.line_stipple_tex_ena) {
|
||||||
|
++dst_vreg;
|
||||||
|
}
|
||||||
|
return IR::VectorReg(dst_vreg);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Translator::EmitPrologue(IR::Block* first_block) {
|
||||||
|
ir = IR::IREmitter(*first_block, first_block->begin());
|
||||||
|
|
||||||
ir.Prologue();
|
ir.Prologue();
|
||||||
ir.SetExec(ir.Imm1(true));
|
ir.SetExec(ir.Imm1(true));
|
||||||
|
|
||||||
@ -60,39 +104,7 @@ void Translator::EmitPrologue() {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case LogicalStage::Fragment:
|
case LogicalStage::Fragment:
|
||||||
dst_vreg = IR::VectorReg::V0;
|
dst_vreg = dst_frag_vreg;
|
||||||
if (runtime_info.fs_info.addr_flags.persp_sample_ena) {
|
|
||||||
++dst_vreg; // I
|
|
||||||
++dst_vreg; // J
|
|
||||||
}
|
|
||||||
if (runtime_info.fs_info.addr_flags.persp_center_ena) {
|
|
||||||
++dst_vreg; // I
|
|
||||||
++dst_vreg; // J
|
|
||||||
}
|
|
||||||
if (runtime_info.fs_info.addr_flags.persp_centroid_ena) {
|
|
||||||
++dst_vreg; // I
|
|
||||||
++dst_vreg; // J
|
|
||||||
}
|
|
||||||
if (runtime_info.fs_info.addr_flags.persp_pull_model_ena) {
|
|
||||||
++dst_vreg; // I/W
|
|
||||||
++dst_vreg; // J/W
|
|
||||||
++dst_vreg; // 1/W
|
|
||||||
}
|
|
||||||
if (runtime_info.fs_info.addr_flags.linear_sample_ena) {
|
|
||||||
++dst_vreg; // I
|
|
||||||
++dst_vreg; // J
|
|
||||||
}
|
|
||||||
if (runtime_info.fs_info.addr_flags.linear_center_ena) {
|
|
||||||
++dst_vreg; // I
|
|
||||||
++dst_vreg; // J
|
|
||||||
}
|
|
||||||
if (runtime_info.fs_info.addr_flags.linear_centroid_ena) {
|
|
||||||
++dst_vreg; // I
|
|
||||||
++dst_vreg; // J
|
|
||||||
}
|
|
||||||
if (runtime_info.fs_info.addr_flags.line_stipple_tex_ena) {
|
|
||||||
++dst_vreg;
|
|
||||||
}
|
|
||||||
if (runtime_info.fs_info.addr_flags.pos_x_float_ena) {
|
if (runtime_info.fs_info.addr_flags.pos_x_float_ena) {
|
||||||
if (runtime_info.fs_info.en_flags.pos_x_float_ena) {
|
if (runtime_info.fs_info.en_flags.pos_x_float_ena) {
|
||||||
ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 0));
|
ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 0));
|
||||||
@ -543,6 +555,26 @@ void Translator::LogMissingOpcode(const GcnInst& inst) {
|
|||||||
info.translation_failed = true;
|
info.translation_failed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list) {
|
||||||
|
if (inst_list.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ir = IR::IREmitter{*block, block->begin()};
|
||||||
|
for (const auto& inst : inst_list) {
|
||||||
|
pc += inst.length;
|
||||||
|
|
||||||
|
// Special case for emitting fetch shader.
|
||||||
|
if (inst.opcode == Opcode::S_SWAPPC_B64) {
|
||||||
|
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export ||
|
||||||
|
info.stage == Stage::Local);
|
||||||
|
EmitFetch(inst);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
TranslateInstruction(inst, pc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Translator::TranslateInstruction(const GcnInst& inst, const u32 pc) {
|
void Translator::TranslateInstruction(const GcnInst& inst, const u32 pc) {
|
||||||
// Emit instructions for each category.
|
// Emit instructions for each category.
|
||||||
switch (inst.category) {
|
switch (inst.category) {
|
||||||
@ -577,25 +609,4 @@ void Translator::TranslateInstruction(const GcnInst& inst, const u32 pc) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Info& info,
|
|
||||||
const RuntimeInfo& runtime_info, const Profile& profile) {
|
|
||||||
if (inst_list.empty()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
Translator translator{block, info, runtime_info, profile};
|
|
||||||
for (const auto& inst : inst_list) {
|
|
||||||
pc += inst.length;
|
|
||||||
|
|
||||||
// Special case for emitting fetch shader.
|
|
||||||
if (inst.opcode == Opcode::S_SWAPPC_B64) {
|
|
||||||
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export ||
|
|
||||||
info.stage == Stage::Local);
|
|
||||||
translator.EmitFetch(inst);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
translator.TranslateInstruction(inst, pc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Shader::Gcn
|
} // namespace Shader::Gcn
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <span>
|
#include <span>
|
||||||
|
#include <unordered_map>
|
||||||
#include "shader_recompiler/frontend/instruction.h"
|
#include "shader_recompiler/frontend/instruction.h"
|
||||||
#include "shader_recompiler/info.h"
|
#include "shader_recompiler/info.h"
|
||||||
#include "shader_recompiler/ir/basic_block.h"
|
#include "shader_recompiler/ir/basic_block.h"
|
||||||
@ -53,15 +54,17 @@ enum class NegateMode : u32 {
|
|||||||
Result,
|
Result,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static constexpr size_t MaxInterpVgpr = 16;
|
||||||
|
|
||||||
class Translator {
|
class Translator {
|
||||||
public:
|
public:
|
||||||
explicit Translator(IR::Block* block_, Info& info, const RuntimeInfo& runtime_info,
|
explicit Translator(Info& info, const RuntimeInfo& runtime_info, const Profile& profile);
|
||||||
const Profile& profile);
|
|
||||||
|
|
||||||
|
void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list);
|
||||||
void TranslateInstruction(const GcnInst& inst, u32 pc);
|
void TranslateInstruction(const GcnInst& inst, u32 pc);
|
||||||
|
|
||||||
// Instruction categories
|
// Instruction categories
|
||||||
void EmitPrologue();
|
void EmitPrologue(IR::Block* first_block);
|
||||||
void EmitFetch(const GcnInst& inst);
|
void EmitFetch(const GcnInst& inst);
|
||||||
void EmitExport(const GcnInst& inst);
|
void EmitExport(const GcnInst& inst);
|
||||||
void EmitFlowControl(u32 pc, const GcnInst& inst);
|
void EmitFlowControl(u32 pc, const GcnInst& inst);
|
||||||
@ -121,6 +124,7 @@ public:
|
|||||||
void S_FF1_I32_B32(const GcnInst& inst);
|
void S_FF1_I32_B32(const GcnInst& inst);
|
||||||
void S_FF1_I32_B64(const GcnInst& inst);
|
void S_FF1_I32_B64(const GcnInst& inst);
|
||||||
void S_FLBIT_I32_B32(const GcnInst& inst);
|
void S_FLBIT_I32_B32(const GcnInst& inst);
|
||||||
|
void S_FLBIT_I32_B64(const GcnInst& inst);
|
||||||
void S_BITSET_B32(const GcnInst& inst, u32 bit_value);
|
void S_BITSET_B32(const GcnInst& inst, u32 bit_value);
|
||||||
void S_GETPC_B64(u32 pc, const GcnInst& inst);
|
void S_GETPC_B64(u32 pc, const GcnInst& inst);
|
||||||
void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst);
|
void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst);
|
||||||
@ -204,6 +208,7 @@ public:
|
|||||||
void V_EXP_F32(const GcnInst& inst);
|
void V_EXP_F32(const GcnInst& inst);
|
||||||
void V_LOG_F32(const GcnInst& inst);
|
void V_LOG_F32(const GcnInst& inst);
|
||||||
void V_RCP_F32(const GcnInst& inst);
|
void V_RCP_F32(const GcnInst& inst);
|
||||||
|
void V_RCP_LEGACY_F32(const GcnInst& inst);
|
||||||
void V_RCP_F64(const GcnInst& inst);
|
void V_RCP_F64(const GcnInst& inst);
|
||||||
void V_RSQ_F32(const GcnInst& inst);
|
void V_RSQ_F32(const GcnInst& inst);
|
||||||
void V_SQRT_F32(const GcnInst& inst);
|
void V_SQRT_F32(const GcnInst& inst);
|
||||||
@ -266,6 +271,7 @@ public:
|
|||||||
// Data share
|
// Data share
|
||||||
// DS
|
// DS
|
||||||
void DS_ADD_U32(const GcnInst& inst, bool rtn);
|
void DS_ADD_U32(const GcnInst& inst, bool rtn);
|
||||||
|
void DS_ADD_U64(const GcnInst& inst, bool rtn);
|
||||||
void DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn);
|
void DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn);
|
||||||
void DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn);
|
void DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn);
|
||||||
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
|
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
|
||||||
@ -324,16 +330,18 @@ private:
|
|||||||
void LogMissingOpcode(const GcnInst& inst);
|
void LogMissingOpcode(const GcnInst& inst);
|
||||||
|
|
||||||
IR::VectorReg GetScratchVgpr(u32 offset);
|
IR::VectorReg GetScratchVgpr(u32 offset);
|
||||||
|
IR::VectorReg GatherInterpQualifiers();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
IR::IREmitter ir;
|
IR::IREmitter ir;
|
||||||
Info& info;
|
Info& info;
|
||||||
const RuntimeInfo& runtime_info;
|
const RuntimeInfo& runtime_info;
|
||||||
const Profile& profile;
|
const Profile& profile;
|
||||||
|
u32 next_vgpr_num;
|
||||||
|
std::unordered_map<u32, IR::VectorReg> vgpr_map;
|
||||||
|
std::array<IR::Interpolation, MaxInterpVgpr> vgpr_to_interp{};
|
||||||
|
IR::VectorReg dst_frag_vreg{};
|
||||||
bool opcode_missing = false;
|
bool opcode_missing = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info,
|
|
||||||
const RuntimeInfo& runtime_info, const Profile& profile);
|
|
||||||
|
|
||||||
} // namespace Shader::Gcn
|
} // namespace Shader::Gcn
|
||||||
|
@ -158,6 +158,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
|||||||
return V_LOG_F32(inst);
|
return V_LOG_F32(inst);
|
||||||
case Opcode::V_RCP_F32:
|
case Opcode::V_RCP_F32:
|
||||||
return V_RCP_F32(inst);
|
return V_RCP_F32(inst);
|
||||||
|
case Opcode::V_RCP_LEGACY_F32:
|
||||||
|
return V_RCP_LEGACY_F32(inst);
|
||||||
case Opcode::V_RCP_F64:
|
case Opcode::V_RCP_F64:
|
||||||
return V_RCP_F64(inst);
|
return V_RCP_F64(inst);
|
||||||
case Opcode::V_RCP_IFLAG_F32:
|
case Opcode::V_RCP_IFLAG_F32:
|
||||||
@ -798,6 +800,20 @@ void Translator::V_RCP_F32(const GcnInst& inst) {
|
|||||||
SetDst(inst.dst[0], ir.FPRecip(src0));
|
SetDst(inst.dst[0], ir.FPRecip(src0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::V_RCP_LEGACY_F32(const GcnInst& inst) {
|
||||||
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
|
const auto result = ir.FPRecip(src0);
|
||||||
|
const auto inf = ir.FPIsInf(result);
|
||||||
|
|
||||||
|
const auto raw_result = ir.ConvertFToU(32, result);
|
||||||
|
const auto sign_bit = ir.ShiftRightLogical(raw_result, ir.Imm32(31u));
|
||||||
|
const auto sign_bit_set = ir.INotEqual(sign_bit, ir.Imm32(0u));
|
||||||
|
const IR::F32 inf_result{ir.Select(sign_bit_set, ir.Imm32(-0.0f), ir.Imm32(0.0f))};
|
||||||
|
const IR::F32 val{ir.Select(inf, inf_result, result)};
|
||||||
|
|
||||||
|
SetDst(inst.dst[0], val);
|
||||||
|
}
|
||||||
|
|
||||||
void Translator::V_RCP_F64(const GcnInst& inst) {
|
void Translator::V_RCP_F64(const GcnInst& inst) {
|
||||||
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
|
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
|
||||||
SetDst64(inst.dst[0], ir.FPRecip(src0));
|
SetDst64(inst.dst[0], ir.FPRecip(src0));
|
||||||
|
@ -22,13 +22,14 @@ void Translator::EmitVectorInterpolation(const GcnInst& inst) {
|
|||||||
// VINTRP
|
// VINTRP
|
||||||
|
|
||||||
void Translator::V_INTERP_P2_F32(const GcnInst& inst) {
|
void Translator::V_INTERP_P2_F32(const GcnInst& inst) {
|
||||||
auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr);
|
const auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr);
|
||||||
|
info.interp_qualifiers[attr.param_index] = vgpr_to_interp[inst.src[0].code];
|
||||||
const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index};
|
const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index};
|
||||||
SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan));
|
SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_INTERP_MOV_F32(const GcnInst& inst) {
|
void Translator::V_INTERP_MOV_F32(const GcnInst& inst) {
|
||||||
auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr);
|
const auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr);
|
||||||
const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index};
|
const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index};
|
||||||
SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan));
|
SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan));
|
||||||
}
|
}
|
||||||
|
@ -70,6 +70,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
|||||||
return BUFFER_ATOMIC(AtomicOp::Add, inst);
|
return BUFFER_ATOMIC(AtomicOp::Add, inst);
|
||||||
case Opcode::BUFFER_ATOMIC_SWAP:
|
case Opcode::BUFFER_ATOMIC_SWAP:
|
||||||
return BUFFER_ATOMIC(AtomicOp::Swap, inst);
|
return BUFFER_ATOMIC(AtomicOp::Swap, inst);
|
||||||
|
case Opcode::BUFFER_ATOMIC_CMPSWAP:
|
||||||
|
return BUFFER_ATOMIC(AtomicOp::CmpSwap, inst);
|
||||||
case Opcode::BUFFER_ATOMIC_SMIN:
|
case Opcode::BUFFER_ATOMIC_SMIN:
|
||||||
return BUFFER_ATOMIC(AtomicOp::Smin, inst);
|
return BUFFER_ATOMIC(AtomicOp::Smin, inst);
|
||||||
case Opcode::BUFFER_ATOMIC_UMIN:
|
case Opcode::BUFFER_ATOMIC_UMIN:
|
||||||
@ -331,6 +333,10 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
|
|||||||
switch (op) {
|
switch (op) {
|
||||||
case AtomicOp::Swap:
|
case AtomicOp::Swap:
|
||||||
return ir.BufferAtomicSwap(handle, address, vdata_val, buffer_info);
|
return ir.BufferAtomicSwap(handle, address, vdata_val, buffer_info);
|
||||||
|
case AtomicOp::CmpSwap: {
|
||||||
|
const IR::Value cmp_val = ir.GetVectorReg(vdata + 1);
|
||||||
|
return ir.BufferAtomicCmpSwap(handle, address, vdata_val, cmp_val, buffer_info);
|
||||||
|
}
|
||||||
case AtomicOp::Add:
|
case AtomicOp::Add:
|
||||||
return ir.BufferAtomicIAdd(handle, address, vdata_val, buffer_info);
|
return ir.BufferAtomicIAdd(handle, address, vdata_val, buffer_info);
|
||||||
case AtomicOp::Smin:
|
case AtomicOp::Smin:
|
||||||
|
@ -193,6 +193,8 @@ struct Info {
|
|||||||
PersistentSrtInfo srt_info;
|
PersistentSrtInfo srt_info;
|
||||||
std::vector<u32> flattened_ud_buf;
|
std::vector<u32> flattened_ud_buf;
|
||||||
|
|
||||||
|
std::array<IR::Interpolation, 32> interp_qualifiers{};
|
||||||
|
|
||||||
IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max;
|
IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max;
|
||||||
s32 tess_consts_dword_offset = -1;
|
s32 tess_consts_dword_offset = -1;
|
||||||
|
|
||||||
@ -206,6 +208,8 @@ struct Info {
|
|||||||
bool has_discard{};
|
bool has_discard{};
|
||||||
bool has_image_gather{};
|
bool has_image_gather{};
|
||||||
bool has_image_query{};
|
bool has_image_query{};
|
||||||
|
bool has_perspective_interp{};
|
||||||
|
bool has_linear_interp{};
|
||||||
bool uses_atomic_float_min_max{};
|
bool uses_atomic_float_min_max{};
|
||||||
bool uses_lane_id{};
|
bool uses_lane_id{};
|
||||||
bool uses_group_quad{};
|
bool uses_group_quad{};
|
||||||
|
@ -83,6 +83,16 @@ enum class Attribute : u64 {
|
|||||||
Max,
|
Max,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class Interpolation {
|
||||||
|
Invalid = 0,
|
||||||
|
PerspectiveSample = 1,
|
||||||
|
PerspectiveCenter = 2,
|
||||||
|
PerspectiveCentroid = 3,
|
||||||
|
LinearSample = 4,
|
||||||
|
LinearCenter = 5,
|
||||||
|
LinearCentroid = 6,
|
||||||
|
};
|
||||||
|
|
||||||
constexpr size_t NumAttributes = static_cast<size_t>(Attribute::Max);
|
constexpr size_t NumAttributes = static_cast<size_t>(Attribute::Max);
|
||||||
constexpr size_t NumRenderTargets = 8;
|
constexpr size_t NumRenderTargets = 8;
|
||||||
constexpr size_t NumParams = 32;
|
constexpr size_t NumParams = 32;
|
||||||
@ -104,6 +114,15 @@ constexpr bool IsMrt(Attribute attribute) noexcept {
|
|||||||
return attribute >= Attribute::RenderTarget0 && attribute <= Attribute::RenderTarget7;
|
return attribute >= Attribute::RenderTarget0 && attribute <= Attribute::RenderTarget7;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr bool IsLinear(Interpolation interp) noexcept {
|
||||||
|
return interp >= Interpolation::LinearSample && interp <= Interpolation::LinearCentroid;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr bool IsPerspective(Interpolation interp) noexcept {
|
||||||
|
return interp >= Interpolation::PerspectiveSample &&
|
||||||
|
interp <= Interpolation::PerspectiveCentroid;
|
||||||
|
}
|
||||||
|
|
||||||
[[nodiscard]] std::string NameOf(Attribute attribute);
|
[[nodiscard]] std::string NameOf(Attribute attribute);
|
||||||
|
|
||||||
[[nodiscard]] constexpr Attribute operator+(Attribute attr, int num) {
|
[[nodiscard]] constexpr Attribute operator+(Attribute attr, int num) {
|
||||||
|
@ -2,7 +2,6 @@
|
|||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <bit>
|
|
||||||
#include <source_location>
|
#include <source_location>
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
@ -294,10 +293,12 @@ void IREmitter::SetPatch(Patch patch, const F32& value) {
|
|||||||
|
|
||||||
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
||||||
switch (bit_size) {
|
switch (bit_size) {
|
||||||
|
case 16:
|
||||||
|
return Inst<U16>(Opcode::LoadSharedU16, offset);
|
||||||
case 32:
|
case 32:
|
||||||
return Inst<U32>(Opcode::LoadSharedU32, offset);
|
return Inst<U32>(Opcode::LoadSharedU32, offset);
|
||||||
case 64:
|
case 64:
|
||||||
return Inst(Opcode::LoadSharedU64, offset);
|
return Inst<U64>(Opcode::LoadSharedU64, offset);
|
||||||
default:
|
default:
|
||||||
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
|
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
|
||||||
}
|
}
|
||||||
@ -305,6 +306,9 @@ Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
|||||||
|
|
||||||
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) {
|
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) {
|
||||||
switch (bit_size) {
|
switch (bit_size) {
|
||||||
|
case 16:
|
||||||
|
Inst(Opcode::WriteSharedU16, offset, value);
|
||||||
|
break;
|
||||||
case 32:
|
case 32:
|
||||||
Inst(Opcode::WriteSharedU32, offset, value);
|
Inst(Opcode::WriteSharedU32, offset, value);
|
||||||
break;
|
break;
|
||||||
@ -316,10 +320,12 @@ void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
U32F32 IREmitter::SharedAtomicIAdd(const U32& address, const U32F32& data) {
|
U32U64 IREmitter::SharedAtomicIAdd(const U32& address, const U32U64& data) {
|
||||||
switch (data.Type()) {
|
switch (data.Type()) {
|
||||||
case Type::U32:
|
case Type::U32:
|
||||||
return Inst<U32>(Opcode::SharedAtomicIAdd32, address, data);
|
return Inst<U32>(Opcode::SharedAtomicIAdd32, address, data);
|
||||||
|
case Type::U64:
|
||||||
|
return Inst<U64>(Opcode::SharedAtomicIAdd64, address, data);
|
||||||
default:
|
default:
|
||||||
ThrowInvalidType(data.Type());
|
ThrowInvalidType(data.Type());
|
||||||
}
|
}
|
||||||
@ -513,6 +519,11 @@ Value IREmitter::BufferAtomicSwap(const Value& handle, const Value& address, con
|
|||||||
return Inst(Opcode::BufferAtomicSwap32, Flags{info}, handle, address, value);
|
return Inst(Opcode::BufferAtomicSwap32, Flags{info}, handle, address, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Value IREmitter::BufferAtomicCmpSwap(const Value& handle, const Value& address, const Value& vdata,
|
||||||
|
const Value& cmp_value, BufferInstInfo info) {
|
||||||
|
return Inst(Opcode::BufferAtomicCmpSwap32, Flags{info}, handle, address, vdata, cmp_value);
|
||||||
|
}
|
||||||
|
|
||||||
U32 IREmitter::DataAppend(const U32& counter) {
|
U32 IREmitter::DataAppend(const U32& counter) {
|
||||||
return Inst<U32>(Opcode::DataAppend, counter, Imm32(0));
|
return Inst<U32>(Opcode::DataAppend, counter, Imm32(0));
|
||||||
}
|
}
|
||||||
@ -1546,8 +1557,15 @@ U32 IREmitter::FindSMsb(const U32& value) {
|
|||||||
return Inst<U32>(Opcode::FindSMsb32, value);
|
return Inst<U32>(Opcode::FindSMsb32, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::FindUMsb(const U32& value) {
|
U32 IREmitter::FindUMsb(const U32U64& value) {
|
||||||
return Inst<U32>(Opcode::FindUMsb32, value);
|
switch (value.Type()) {
|
||||||
|
case Type::U32:
|
||||||
|
return Inst<U32>(Opcode::FindUMsb32, value);
|
||||||
|
case Type::U64:
|
||||||
|
return Inst<U32>(Opcode::FindUMsb64, value);
|
||||||
|
default:
|
||||||
|
ThrowInvalidType(value.Type());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::FindILsb(const U32U64& value) {
|
U32 IREmitter::FindILsb(const U32U64& value) {
|
||||||
|
@ -6,7 +6,6 @@
|
|||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
||||||
#include "shader_recompiler/info.h"
|
|
||||||
#include "shader_recompiler/ir/attribute.h"
|
#include "shader_recompiler/ir/attribute.h"
|
||||||
#include "shader_recompiler/ir/basic_block.h"
|
#include "shader_recompiler/ir/basic_block.h"
|
||||||
#include "shader_recompiler/ir/condition.h"
|
#include "shader_recompiler/ir/condition.h"
|
||||||
@ -17,6 +16,7 @@ namespace Shader::IR {
|
|||||||
|
|
||||||
class IREmitter {
|
class IREmitter {
|
||||||
public:
|
public:
|
||||||
|
explicit IREmitter() = default;
|
||||||
explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {}
|
explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {}
|
||||||
explicit IREmitter(Block& block_, Block::iterator insertion_point_)
|
explicit IREmitter(Block& block_, Block::iterator insertion_point_)
|
||||||
: block{&block_}, insertion_point{insertion_point_} {}
|
: block{&block_}, insertion_point{insertion_point_} {}
|
||||||
@ -99,7 +99,7 @@ public:
|
|||||||
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
|
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
|
||||||
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
||||||
|
|
||||||
[[nodiscard]] U32F32 SharedAtomicIAdd(const U32& address, const U32F32& data);
|
[[nodiscard]] U32U64 SharedAtomicIAdd(const U32& address, const U32U64& data);
|
||||||
[[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed);
|
[[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed);
|
||||||
[[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed);
|
[[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed);
|
||||||
[[nodiscard]] U32 SharedAtomicAnd(const U32& address, const U32& data);
|
[[nodiscard]] U32 SharedAtomicAnd(const U32& address, const U32& data);
|
||||||
@ -150,6 +150,9 @@ public:
|
|||||||
const Value& value, BufferInstInfo info);
|
const Value& value, BufferInstInfo info);
|
||||||
[[nodiscard]] Value BufferAtomicSwap(const Value& handle, const Value& address,
|
[[nodiscard]] Value BufferAtomicSwap(const Value& handle, const Value& address,
|
||||||
const Value& value, BufferInstInfo info);
|
const Value& value, BufferInstInfo info);
|
||||||
|
[[nodiscard]] Value BufferAtomicCmpSwap(const Value& handle, const Value& address,
|
||||||
|
const Value& value, const Value& cmp_value,
|
||||||
|
BufferInstInfo info);
|
||||||
|
|
||||||
[[nodiscard]] U32 DataAppend(const U32& counter);
|
[[nodiscard]] U32 DataAppend(const U32& counter);
|
||||||
[[nodiscard]] U32 DataConsume(const U32& counter);
|
[[nodiscard]] U32 DataConsume(const U32& counter);
|
||||||
@ -266,7 +269,7 @@ public:
|
|||||||
[[nodiscard]] U32 BitwiseNot(const U32& value);
|
[[nodiscard]] U32 BitwiseNot(const U32& value);
|
||||||
|
|
||||||
[[nodiscard]] U32 FindSMsb(const U32& value);
|
[[nodiscard]] U32 FindSMsb(const U32& value);
|
||||||
[[nodiscard]] U32 FindUMsb(const U32& value);
|
[[nodiscard]] U32 FindUMsb(const U32U64& value);
|
||||||
[[nodiscard]] U32 FindILsb(const U32U64& value);
|
[[nodiscard]] U32 FindILsb(const U32U64& value);
|
||||||
[[nodiscard]] U32 SMin(const U32& a, const U32& b);
|
[[nodiscard]] U32 SMin(const U32& a, const U32& b);
|
||||||
[[nodiscard]] U32 UMin(const U32& a, const U32& b);
|
[[nodiscard]] U32 UMin(const U32& a, const U32& b);
|
||||||
|
@ -30,13 +30,16 @@ OPCODE(EmitVertex, Void,
|
|||||||
OPCODE(EmitPrimitive, Void, )
|
OPCODE(EmitPrimitive, Void, )
|
||||||
|
|
||||||
// Shared memory operations
|
// Shared memory operations
|
||||||
|
OPCODE(LoadSharedU16, U16, U32, )
|
||||||
OPCODE(LoadSharedU32, U32, U32, )
|
OPCODE(LoadSharedU32, U32, U32, )
|
||||||
OPCODE(LoadSharedU64, U32x2, U32, )
|
OPCODE(LoadSharedU64, U64, U32, )
|
||||||
|
OPCODE(WriteSharedU16, Void, U32, U16, )
|
||||||
OPCODE(WriteSharedU32, Void, U32, U32, )
|
OPCODE(WriteSharedU32, Void, U32, U32, )
|
||||||
OPCODE(WriteSharedU64, Void, U32, U32x2, )
|
OPCODE(WriteSharedU64, Void, U32, U64, )
|
||||||
|
|
||||||
// Shared atomic operations
|
// Shared atomic operations
|
||||||
OPCODE(SharedAtomicIAdd32, U32, U32, U32, )
|
OPCODE(SharedAtomicIAdd32, U32, U32, U32, )
|
||||||
|
OPCODE(SharedAtomicIAdd64, U64, U32, U64, )
|
||||||
OPCODE(SharedAtomicSMin32, U32, U32, U32, )
|
OPCODE(SharedAtomicSMin32, U32, U32, U32, )
|
||||||
OPCODE(SharedAtomicUMin32, U32, U32, U32, )
|
OPCODE(SharedAtomicUMin32, U32, U32, U32, )
|
||||||
OPCODE(SharedAtomicSMax32, U32, U32, U32, )
|
OPCODE(SharedAtomicSMax32, U32, U32, U32, )
|
||||||
@ -116,6 +119,7 @@ OPCODE(StoreBufferFormatF32, Void, Opaq
|
|||||||
|
|
||||||
// Buffer atomic operations
|
// Buffer atomic operations
|
||||||
OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 )
|
OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 )
|
||||||
|
OPCODE(BufferAtomicIAdd64, U64, Opaque, Opaque, U64 )
|
||||||
OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 )
|
OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 )
|
||||||
OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 )
|
OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 )
|
||||||
OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 )
|
OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 )
|
||||||
@ -126,6 +130,7 @@ OPCODE(BufferAtomicAnd32, U32, Opaq
|
|||||||
OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, )
|
OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, )
|
||||||
OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, )
|
OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, )
|
||||||
OPCODE(BufferAtomicSwap32, U32, Opaque, Opaque, U32, )
|
OPCODE(BufferAtomicSwap32, U32, Opaque, Opaque, U32, )
|
||||||
|
OPCODE(BufferAtomicCmpSwap32, U32, Opaque, Opaque, U32, U32, )
|
||||||
|
|
||||||
// Vector utility
|
// Vector utility
|
||||||
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
|
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
|
||||||
@ -349,6 +354,7 @@ OPCODE(BitwiseNot32, U32, U32,
|
|||||||
|
|
||||||
OPCODE(FindSMsb32, U32, U32, )
|
OPCODE(FindSMsb32, U32, U32, )
|
||||||
OPCODE(FindUMsb32, U32, U32, )
|
OPCODE(FindUMsb32, U32, U32, )
|
||||||
|
OPCODE(FindUMsb64, U32, U64, )
|
||||||
OPCODE(FindILsb32, U32, U32, )
|
OPCODE(FindILsb32, U32, U32, )
|
||||||
OPCODE(FindILsb64, U32, U64, )
|
OPCODE(FindILsb64, U32, U64, )
|
||||||
OPCODE(SMin32, U32, U32, U32, )
|
OPCODE(SMin32, U32, U32, U32, )
|
||||||
|
@ -10,6 +10,8 @@
|
|||||||
#include "common/io_file.h"
|
#include "common/io_file.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
#include "common/path_util.h"
|
#include "common/path_util.h"
|
||||||
|
#include "common/signal_context.h"
|
||||||
|
#include "core/signals.h"
|
||||||
#include "shader_recompiler/info.h"
|
#include "shader_recompiler/info.h"
|
||||||
#include "shader_recompiler/ir/breadth_first_search.h"
|
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||||
#include "shader_recompiler/ir/opcodes.h"
|
#include "shader_recompiler/ir/opcodes.h"
|
||||||
@ -24,6 +26,7 @@
|
|||||||
using namespace Xbyak::util;
|
using namespace Xbyak::util;
|
||||||
|
|
||||||
static Xbyak::CodeGenerator g_srt_codegen(32_MB);
|
static Xbyak::CodeGenerator g_srt_codegen(32_MB);
|
||||||
|
static const u8* g_srt_codegen_start = nullptr;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
@ -54,6 +57,57 @@ static void DumpSrtProgram(const Shader::Info& info, const u8* code, size_t code
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool SrtWalkerSignalHandler(void* context, void* fault_address) {
|
||||||
|
// Only handle if the fault address is within the SRT code range
|
||||||
|
const u8* code_start = g_srt_codegen_start;
|
||||||
|
const u8* code_end = code_start + g_srt_codegen.getSize();
|
||||||
|
const void* code = Common::GetRip(context);
|
||||||
|
if (code < code_start || code >= code_end) {
|
||||||
|
return false; // Not in SRT code range
|
||||||
|
}
|
||||||
|
|
||||||
|
// Patch instruction to zero register
|
||||||
|
ZydisDecodedInstruction instruction;
|
||||||
|
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
|
||||||
|
ZyanStatus status = Common::Decoder::Instance()->decodeInstruction(instruction, operands,
|
||||||
|
const_cast<void*>(code), 15);
|
||||||
|
|
||||||
|
ASSERT(ZYAN_SUCCESS(status) && instruction.mnemonic == ZYDIS_MNEMONIC_MOV &&
|
||||||
|
operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER &&
|
||||||
|
operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY);
|
||||||
|
|
||||||
|
size_t len = instruction.length;
|
||||||
|
const size_t patch_size = 3;
|
||||||
|
u8* code_patch = const_cast<u8*>(reinterpret_cast<const u8*>(code));
|
||||||
|
|
||||||
|
// We can only encounter rdi or r10d as the first operand in a
|
||||||
|
// fault memory access for SRT walker.
|
||||||
|
switch (operands[0].reg.value) {
|
||||||
|
case ZYDIS_REGISTER_RDI:
|
||||||
|
// mov rdi, [rdi + (off_dw << 2)] -> xor rdi, rdi
|
||||||
|
code_patch[0] = 0x48;
|
||||||
|
code_patch[1] = 0x31;
|
||||||
|
code_patch[2] = 0xFF;
|
||||||
|
break;
|
||||||
|
case ZYDIS_REGISTER_R10D:
|
||||||
|
// mov r10d, [rdi + (off_dw << 2)] -> xor r10d, r10d
|
||||||
|
code_patch[0] = 0x45;
|
||||||
|
code_patch[1] = 0x31;
|
||||||
|
code_patch[2] = 0xD2;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE_MSG("Unsupported register for SRT walker patch");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill nops
|
||||||
|
memset(code_patch + patch_size, 0x90, len - patch_size);
|
||||||
|
|
||||||
|
LOG_DEBUG(Render_Recompiler, "Patched SRT walker at {}", code);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
using namespace Shader;
|
using namespace Shader;
|
||||||
|
|
||||||
struct PassInfo {
|
struct PassInfo {
|
||||||
@ -141,6 +195,15 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Register the signal handler for SRT walker, if not already registered
|
||||||
|
if (g_srt_codegen_start == nullptr) {
|
||||||
|
g_srt_codegen_start = c.getCurr();
|
||||||
|
auto* signals = Core::Signals::Instance();
|
||||||
|
// Call after the memory invalidation handler
|
||||||
|
constexpr u32 priority = 1;
|
||||||
|
signals->RegisterAccessViolationHandler(SrtWalkerSignalHandler, priority);
|
||||||
|
}
|
||||||
|
|
||||||
info.srt_info.walker_func = c.getCurr<PFN_SrtWalker>();
|
info.srt_info.walker_func = c.getCurr<PFN_SrtWalker>();
|
||||||
|
|
||||||
pass_info.dst_off_dw = NumUserDataRegs;
|
pass_info.dst_off_dw = NumUserDataRegs;
|
||||||
|
@ -15,7 +15,7 @@ struct FormatInfo {
|
|||||||
AmdGpu::NumberFormat num_format;
|
AmdGpu::NumberFormat num_format;
|
||||||
AmdGpu::CompMapping swizzle;
|
AmdGpu::CompMapping swizzle;
|
||||||
AmdGpu::NumberConversion num_conversion;
|
AmdGpu::NumberConversion num_conversion;
|
||||||
int num_components;
|
u32 num_components;
|
||||||
};
|
};
|
||||||
|
|
||||||
static bool IsBufferFormatLoad(const IR::Inst& inst) {
|
static bool IsBufferFormatLoad(const IR::Inst& inst) {
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include <unordered_map>
|
||||||
#include "shader_recompiler/ir/program.h"
|
#include "shader_recompiler/ir/program.h"
|
||||||
|
|
||||||
namespace Shader::Optimization {
|
namespace Shader::Optimization {
|
||||||
|
@ -39,11 +39,13 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
|||||||
ASSERT(addr->Arg(1).IsImmediate());
|
ASSERT(addr->Arg(1).IsImmediate());
|
||||||
offset = addr->Arg(1).U32();
|
offset = addr->Arg(1).U32();
|
||||||
}
|
}
|
||||||
IR::Value data = inst.Arg(1).Resolve();
|
IR::Value data = is_composite ? ir.UnpackUint2x32(IR::U64{inst.Arg(1).Resolve()})
|
||||||
|
: inst.Arg(1).Resolve();
|
||||||
for (s32 i = 0; i < num_components; i++) {
|
for (s32 i = 0; i < num_components; i++) {
|
||||||
const auto attrib = IR::Attribute::Param0 + (offset / 16);
|
const auto attrib = IR::Attribute::Param0 + (offset / 16);
|
||||||
const auto comp = (offset / 4) % 4;
|
const auto comp = (offset / 4) % 4;
|
||||||
const IR::U32 value = IR::U32{is_composite ? data.Inst()->Arg(i) : data};
|
const IR::U32 value =
|
||||||
|
IR::U32{is_composite ? ir.CompositeExtract(data, i) : data};
|
||||||
ir.SetAttribute(attrib, ir.BitCast<IR::F32, IR::U32>(value), comp);
|
ir.SetAttribute(attrib, ir.BitCast<IR::F32, IR::U32>(value), comp);
|
||||||
offset += 4;
|
offset += 4;
|
||||||
}
|
}
|
||||||
@ -91,6 +93,19 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
|||||||
const auto& gs_info = runtime_info.gs_info;
|
const auto& gs_info = runtime_info.gs_info;
|
||||||
info.gs_copy_data = Shader::ParseCopyShader(gs_info.vs_copy);
|
info.gs_copy_data = Shader::ParseCopyShader(gs_info.vs_copy);
|
||||||
|
|
||||||
|
u32 output_vertices = gs_info.output_vertices;
|
||||||
|
if (info.gs_copy_data.output_vertices &&
|
||||||
|
info.gs_copy_data.output_vertices != output_vertices) {
|
||||||
|
ASSERT_MSG(output_vertices > info.gs_copy_data.output_vertices &&
|
||||||
|
gs_info.mode == AmdGpu::Liverpool::GsMode::Mode::ScenarioG,
|
||||||
|
"Invalid geometry shader vertex configuration scenario = {}, max_vert_out = "
|
||||||
|
"{}, output_vertices = {}",
|
||||||
|
u32(gs_info.mode), output_vertices, info.gs_copy_data.output_vertices);
|
||||||
|
LOG_WARNING(Render_Vulkan, "MAX_VERT_OUT {} is larger than actual output vertices {}",
|
||||||
|
output_vertices, info.gs_copy_data.output_vertices);
|
||||||
|
output_vertices = info.gs_copy_data.output_vertices;
|
||||||
|
}
|
||||||
|
|
||||||
ForEachInstruction([&](IR::IREmitter& ir, IR::Inst& inst) {
|
ForEachInstruction([&](IR::IREmitter& ir, IR::Inst& inst) {
|
||||||
const auto opcode = inst.GetOpcode();
|
const auto opcode = inst.GetOpcode();
|
||||||
switch (opcode) {
|
switch (opcode) {
|
||||||
@ -122,7 +137,7 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
|||||||
|
|
||||||
const auto offset = inst.Flags<IR::BufferInstInfo>().inst_offset.Value();
|
const auto offset = inst.Flags<IR::BufferInstInfo>().inst_offset.Value();
|
||||||
const auto data = ir.BitCast<IR::F32>(IR::U32{inst.Arg(2)});
|
const auto data = ir.BitCast<IR::F32>(IR::U32{inst.Arg(2)});
|
||||||
const auto comp_ofs = gs_info.output_vertices * 4u;
|
const auto comp_ofs = output_vertices * 4u;
|
||||||
const auto output_size = comp_ofs * gs_info.out_vertex_data_size;
|
const auto output_size = comp_ofs * gs_info.out_vertex_data_size;
|
||||||
|
|
||||||
const auto vc_read_ofs = (((offset / comp_ofs) * comp_ofs) % output_size) * 16u;
|
const auto vc_read_ofs = (((offset / comp_ofs) * comp_ofs) % output_size) * 16u;
|
||||||
|
@ -34,8 +34,10 @@ void Visit(Info& info, const IR::Inst& inst) {
|
|||||||
info.uses_patches |= 1U << IR::GenericPatchIndex(patch);
|
info.uses_patches |= 1U << IR::GenericPatchIndex(patch);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case IR::Opcode::LoadSharedU16:
|
||||||
case IR::Opcode::LoadSharedU32:
|
case IR::Opcode::LoadSharedU32:
|
||||||
case IR::Opcode::LoadSharedU64:
|
case IR::Opcode::LoadSharedU64:
|
||||||
|
case IR::Opcode::WriteSharedU16:
|
||||||
case IR::Opcode::WriteSharedU32:
|
case IR::Opcode::WriteSharedU32:
|
||||||
case IR::Opcode::WriteSharedU64:
|
case IR::Opcode::WriteSharedU64:
|
||||||
info.uses_shared = true;
|
info.uses_shared = true;
|
||||||
|
@ -16,6 +16,7 @@ static bool IsSharedAccess(const IR::Inst& inst) {
|
|||||||
case IR::Opcode::WriteSharedU64:
|
case IR::Opcode::WriteSharedU64:
|
||||||
case IR::Opcode::SharedAtomicAnd32:
|
case IR::Opcode::SharedAtomicAnd32:
|
||||||
case IR::Opcode::SharedAtomicIAdd32:
|
case IR::Opcode::SharedAtomicIAdd32:
|
||||||
|
case IR::Opcode::SharedAtomicIAdd64:
|
||||||
case IR::Opcode::SharedAtomicOr32:
|
case IR::Opcode::SharedAtomicOr32:
|
||||||
case IR::Opcode::SharedAtomicSMax32:
|
case IR::Opcode::SharedAtomicSMax32:
|
||||||
case IR::Opcode::SharedAtomicUMax32:
|
case IR::Opcode::SharedAtomicUMax32:
|
||||||
@ -33,9 +34,11 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
|
|||||||
if (program.info.stage != Stage::Compute) {
|
if (program.info.stage != Stage::Compute) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Only perform the transform if the host shared memory is insufficient.
|
// Only perform the transform if the host shared memory is insufficient
|
||||||
|
// or the device does not support VK_KHR_workgroup_memory_explicit_layout
|
||||||
const u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
|
const u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
|
||||||
if (shared_memory_size <= profile.max_shared_memory_size) {
|
if (shared_memory_size <= profile.max_shared_memory_size &&
|
||||||
|
profile.supports_workgroup_explicit_memory_layout) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Add buffer binding for shared memory storage buffer.
|
// Add buffer binding for shared memory storage buffer.
|
||||||
@ -60,6 +63,7 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
|
|||||||
ir.BufferAtomicAnd(handle, inst.Arg(0), inst.Arg(1), {}));
|
ir.BufferAtomicAnd(handle, inst.Arg(0), inst.Arg(1), {}));
|
||||||
continue;
|
continue;
|
||||||
case IR::Opcode::SharedAtomicIAdd32:
|
case IR::Opcode::SharedAtomicIAdd32:
|
||||||
|
case IR::Opcode::SharedAtomicIAdd64:
|
||||||
inst.ReplaceUsesWithAndRemove(
|
inst.ReplaceUsesWithAndRemove(
|
||||||
ir.BufferAtomicIAdd(handle, inst.Arg(0), inst.Arg(1), {}));
|
ir.BufferAtomicIAdd(handle, inst.Arg(0), inst.Arg(1), {}));
|
||||||
continue;
|
continue;
|
||||||
@ -93,12 +97,19 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
|
|||||||
ir.Imm32(shared_memory_size));
|
ir.Imm32(shared_memory_size));
|
||||||
const IR::U32 address = ir.IAdd(IR::U32{inst.Arg(0)}, offset);
|
const IR::U32 address = ir.IAdd(IR::U32{inst.Arg(0)}, offset);
|
||||||
switch (inst.GetOpcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
|
case IR::Opcode::LoadSharedU16:
|
||||||
|
inst.ReplaceUsesWithAndRemove(ir.LoadBufferU16(handle, address, {}));
|
||||||
|
break;
|
||||||
case IR::Opcode::LoadSharedU32:
|
case IR::Opcode::LoadSharedU32:
|
||||||
inst.ReplaceUsesWithAndRemove(ir.LoadBufferU32(1, handle, address, {}));
|
inst.ReplaceUsesWithAndRemove(ir.LoadBufferU32(1, handle, address, {}));
|
||||||
break;
|
break;
|
||||||
case IR::Opcode::LoadSharedU64:
|
case IR::Opcode::LoadSharedU64:
|
||||||
inst.ReplaceUsesWithAndRemove(ir.LoadBufferU32(2, handle, address, {}));
|
inst.ReplaceUsesWithAndRemove(ir.LoadBufferU32(2, handle, address, {}));
|
||||||
break;
|
break;
|
||||||
|
case IR::Opcode::WriteSharedU16:
|
||||||
|
ir.StoreBufferU16(handle, address, IR::U32{inst.Arg(1)}, {});
|
||||||
|
inst.Invalidate();
|
||||||
|
break;
|
||||||
case IR::Opcode::WriteSharedU32:
|
case IR::Opcode::WriteSharedU32:
|
||||||
ir.StoreBufferU32(1, handle, address, inst.Arg(1), {});
|
ir.StoreBufferU32(1, handle, address, inst.Arg(1), {});
|
||||||
inst.Invalidate();
|
inst.Invalidate();
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
#include "common/bit_field.h"
|
#include "common/bit_field.h"
|
||||||
#include "common/enum.h"
|
#include "common/enum.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "video_core/amdgpu/types.h"
|
#include "video_core/amdgpu/pixel_format.h"
|
||||||
|
|
||||||
namespace Shader::IR {
|
namespace Shader::IR {
|
||||||
|
|
||||||
|
@ -23,13 +23,13 @@ struct Profile {
|
|||||||
bool support_fp32_denorm_preserve{};
|
bool support_fp32_denorm_preserve{};
|
||||||
bool support_fp32_denorm_flush{};
|
bool support_fp32_denorm_flush{};
|
||||||
bool support_fp32_round_to_zero{};
|
bool support_fp32_round_to_zero{};
|
||||||
bool support_explicit_workgroup_layout{};
|
|
||||||
bool support_legacy_vertex_attributes{};
|
bool support_legacy_vertex_attributes{};
|
||||||
bool supports_image_load_store_lod{};
|
bool supports_image_load_store_lod{};
|
||||||
bool supports_native_cube_calc{};
|
bool supports_native_cube_calc{};
|
||||||
bool supports_trinary_minmax{};
|
bool supports_trinary_minmax{};
|
||||||
bool supports_robust_buffer_access{};
|
bool supports_robust_buffer_access{};
|
||||||
bool supports_image_fp32_atomic_min_max{};
|
bool supports_image_fp32_atomic_min_max{};
|
||||||
|
bool supports_workgroup_explicit_memory_layout{};
|
||||||
bool has_broken_spirv_clamp{};
|
bool has_broken_spirv_clamp{};
|
||||||
bool lower_left_origin_mode{};
|
bool lower_left_origin_mode{};
|
||||||
bool needs_manual_interpolation{};
|
bool needs_manual_interpolation{};
|
||||||
|
@ -149,6 +149,7 @@ struct GeometryRuntimeInfo {
|
|||||||
u32 out_vertex_data_size{};
|
u32 out_vertex_data_size{};
|
||||||
AmdGpu::PrimitiveType in_primitive;
|
AmdGpu::PrimitiveType in_primitive;
|
||||||
GsOutputPrimTypes out_primitive;
|
GsOutputPrimTypes out_primitive;
|
||||||
|
AmdGpu::Liverpool::GsMode::Mode mode;
|
||||||
std::span<const u32> vs_copy;
|
std::span<const u32> vs_copy;
|
||||||
u64 vs_copy_hash;
|
u64 vs_copy_hash;
|
||||||
|
|
||||||
@ -196,11 +197,13 @@ struct FragmentRuntimeInfo {
|
|||||||
u32 num_inputs;
|
u32 num_inputs;
|
||||||
std::array<PsInput, 32> inputs;
|
std::array<PsInput, 32> inputs;
|
||||||
std::array<PsColorBuffer, MaxColorBuffers> color_buffers;
|
std::array<PsColorBuffer, MaxColorBuffers> color_buffers;
|
||||||
|
bool dual_source_blending;
|
||||||
|
|
||||||
bool operator==(const FragmentRuntimeInfo& other) const noexcept {
|
bool operator==(const FragmentRuntimeInfo& other) const noexcept {
|
||||||
return std::ranges::equal(color_buffers, other.color_buffers) &&
|
return std::ranges::equal(color_buffers, other.color_buffers) &&
|
||||||
en_flags.raw == other.en_flags.raw && addr_flags.raw == other.addr_flags.raw &&
|
en_flags.raw == other.en_flags.raw && addr_flags.raw == other.addr_flags.raw &&
|
||||||
num_inputs == other.num_inputs &&
|
num_inputs == other.num_inputs &&
|
||||||
|
dual_source_blending == other.dual_source_blending &&
|
||||||
std::ranges::equal(inputs.begin(), inputs.begin() + num_inputs, other.inputs.begin(),
|
std::ranges::equal(inputs.begin(), inputs.begin() + num_inputs, other.inputs.begin(),
|
||||||
other.inputs.begin() + num_inputs);
|
other.inputs.begin() + num_inputs);
|
||||||
}
|
}
|
||||||
|
@ -228,9 +228,12 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||||||
const u32 type = header->type;
|
const u32 type = header->type;
|
||||||
|
|
||||||
switch (type) {
|
switch (type) {
|
||||||
|
default:
|
||||||
|
UNREACHABLE_MSG("Wrong PM4 type {}", type);
|
||||||
|
break;
|
||||||
case 0:
|
case 0:
|
||||||
case 1:
|
UNREACHABLE_MSG("Unimplemented PM4 type 0, base reg: {}, size: {}",
|
||||||
UNREACHABLE_MSG("Unsupported PM4 type {}", type);
|
header->type0.base.Value(), header->type0.NumWords());
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
// Type-2 packet are used for padding purposes
|
// Type-2 packet are used for padding purposes
|
||||||
@ -394,7 +397,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::SetPredication: {
|
case PM4ItOpcode::SetPredication: {
|
||||||
LOG_WARNING(Render_Vulkan, "Unimplemented IT_SET_PREDICATION");
|
LOG_WARNING(Render, "Unimplemented IT_SET_PREDICATION");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::IndexType: {
|
case PM4ItOpcode::IndexType: {
|
||||||
@ -586,8 +589,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||||||
}
|
}
|
||||||
case PM4ItOpcode::EventWrite: {
|
case PM4ItOpcode::EventWrite: {
|
||||||
const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
|
const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
|
||||||
LOG_DEBUG(Render_Vulkan,
|
LOG_DEBUG(Render, "Encountered EventWrite: event_type = {}, event_index = {}",
|
||||||
"Encountered EventWrite: event_type = {}, event_index = {}",
|
|
||||||
magic_enum::enum_name(event->event_type.Value()),
|
magic_enum::enum_name(event->event_type.Value()),
|
||||||
magic_enum::enum_name(event->event_index.Value()));
|
magic_enum::enum_name(event->event_index.Value()));
|
||||||
if (event->event_type.Value() == EventType::SoVgtStreamoutFlush) {
|
if (event->event_type.Value() == EventType::SoVgtStreamoutFlush) {
|
||||||
@ -673,6 +675,16 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case PM4ItOpcode::CopyData: {
|
||||||
|
const auto* copy_data = reinterpret_cast<const PM4CmdCopyData*>(header);
|
||||||
|
LOG_WARNING(Render,
|
||||||
|
"unhandled IT_COPY_DATA src_sel = {}, dst_sel = {}, "
|
||||||
|
"count_sel = {}, wr_confirm = {}, engine_sel = {}",
|
||||||
|
u32(copy_data->src_sel.Value()), u32(copy_data->dst_sel.Value()),
|
||||||
|
copy_data->count_sel.Value(), copy_data->wr_confirm.Value(),
|
||||||
|
u32(copy_data->engine_sel.Value()));
|
||||||
|
break;
|
||||||
|
}
|
||||||
case PM4ItOpcode::MemSemaphore: {
|
case PM4ItOpcode::MemSemaphore: {
|
||||||
const auto* mem_semaphore = reinterpret_cast<const PM4CmdMemSemaphore*>(header);
|
const auto* mem_semaphore = reinterpret_cast<const PM4CmdMemSemaphore*>(header);
|
||||||
if (mem_semaphore->IsSignaling()) {
|
if (mem_semaphore->IsSignaling()) {
|
||||||
@ -756,6 +768,19 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||||||
LOG_WARNING(Render_Vulkan, "Unimplemented IT_GET_LOD_STATS");
|
LOG_WARNING(Render_Vulkan, "Unimplemented IT_GET_LOD_STATS");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case PM4ItOpcode::CondExec: {
|
||||||
|
const auto* cond_exec = reinterpret_cast<const PM4CmdCondExec*>(header);
|
||||||
|
if (cond_exec->command.Value() != 0) {
|
||||||
|
LOG_WARNING(Render, "IT_COND_EXEC used a reserved command");
|
||||||
|
}
|
||||||
|
const auto skip = *cond_exec->Address() == false;
|
||||||
|
if (skip) {
|
||||||
|
dcb = NextPacket(dcb,
|
||||||
|
header->type3.NumWords() + 1 + cond_exec->exec_count.Value());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}",
|
UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}",
|
||||||
static_cast<u32>(opcode), count);
|
static_cast<u32>(opcode), count);
|
||||||
@ -804,6 +829,19 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (header->type == 2) {
|
||||||
|
// Type-2 packet are used for padding purposes
|
||||||
|
next_dw_off = 1;
|
||||||
|
acb += next_dw_off;
|
||||||
|
acb_dwords -= next_dw_off;
|
||||||
|
|
||||||
|
if constexpr (!is_indirect) {
|
||||||
|
*queue.read_addr += next_dw_off;
|
||||||
|
*queue.read_addr %= queue.ring_size_dw;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (header->type != 3) {
|
if (header->type != 3) {
|
||||||
// No other types of packets were spotted so far
|
// No other types of packets were spotted so far
|
||||||
UNREACHABLE_MSG("Invalid PM4 type {}", header->type.Value());
|
UNREACHABLE_MSG("Invalid PM4 type {}", header->type.Value());
|
||||||
|
@ -914,7 +914,7 @@ struct Liverpool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
size_t GetColorSliceSize() const {
|
size_t GetColorSliceSize() const {
|
||||||
const auto num_bytes_per_element = NumBits(info.format) / 8u;
|
const auto num_bytes_per_element = NumBitsPerBlock(info.format) / 8u;
|
||||||
const auto slice_size =
|
const auto slice_size =
|
||||||
num_bytes_per_element * (slice.tile_max + 1) * 64u * NumSamples();
|
num_bytes_per_element * (slice.tile_max + 1) * 64u * NumSamples();
|
||||||
return slice_size;
|
return slice_size;
|
||||||
@ -1179,8 +1179,16 @@ struct Liverpool {
|
|||||||
};
|
};
|
||||||
|
|
||||||
union GsMode {
|
union GsMode {
|
||||||
|
enum class Mode : u32 {
|
||||||
|
Off = 0,
|
||||||
|
ScenarioA = 1,
|
||||||
|
ScenarioB = 2,
|
||||||
|
ScenarioG = 3,
|
||||||
|
ScenarioC = 4,
|
||||||
|
};
|
||||||
|
|
||||||
u32 raw;
|
u32 raw;
|
||||||
BitField<0, 3, u32> mode;
|
BitField<0, 3, Mode> mode;
|
||||||
BitField<3, 2, u32> cut_mode;
|
BitField<3, 2, u32> cut_mode;
|
||||||
BitField<22, 2, u32> onchip;
|
BitField<22, 2, u32> onchip;
|
||||||
};
|
};
|
||||||
|
@ -111,136 +111,106 @@ std::string_view NameOf(NumberFormat fmt) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int NumComponents(DataFormat format) {
|
static constexpr std::array NUM_COMPONENTS = {
|
||||||
constexpr std::array num_components_per_element = {
|
0, // 0 FormatInvalid
|
||||||
0, 1, 1, 2, 1, 2, 3, 3, 4, 4, 4, 2, 4, 3, 4, -1, 3, 4, 4, 4, 2,
|
1, // 1 Format8
|
||||||
2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 3, 3, 4, 4, 4, 1, 2, 3, 4,
|
1, // 2 Format16
|
||||||
-1, -1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 1, 1};
|
2, // 3 Format8_8
|
||||||
|
1, // 4 Format32
|
||||||
const u32 index = static_cast<u32>(format);
|
2, // 5 Format16_16
|
||||||
if (index >= num_components_per_element.size()) {
|
3, // 6 Format10_11_11
|
||||||
return 0;
|
3, // 7 Format11_11_10
|
||||||
}
|
4, // 8 Format10_10_10_2
|
||||||
return num_components_per_element[index];
|
4, // 9 Format2_10_10_10
|
||||||
}
|
4, // 10 Format8_8_8_8
|
||||||
|
2, // 11 Format32_32
|
||||||
int NumBits(DataFormat format) {
|
4, // 12 Format16_16_16_16
|
||||||
const std::array num_bits_per_element = {
|
3, // 13 Format32_32_32
|
||||||
0, 8, 16, 16, 32, 32, 32, 32, 32, 32, 32, 64, 64, 96, 128, -1, 16, 16, 16, 16, 32,
|
4, // 14 Format32_32_32_32
|
||||||
32, 64, -1, -1, -1, -1, -1, -1, -1, -1, -1, 16, 16, 32, 4, 8, 8, 4, 8, 8, 8,
|
0, // 15
|
||||||
-1, -1, 8, 8, 8, 8, 8, 8, 16, 16, 32, 32, 32, 64, 64, 8, 16, 1, 1};
|
3, // 16 Format5_6_5
|
||||||
|
4, // 17 Format1_5_5_5
|
||||||
const u32 index = static_cast<u32>(format);
|
4, // 18 Format5_5_5_1
|
||||||
if (index >= num_bits_per_element.size()) {
|
4, // 19 Format4_4_4_4
|
||||||
return 0;
|
2, // 20 Format8_24
|
||||||
}
|
2, // 21 Format24_8
|
||||||
return num_bits_per_element[index];
|
2, // 22 FormatX24_8_32
|
||||||
}
|
0, // 23
|
||||||
|
0, // 24
|
||||||
static constexpr std::array component_bits = {
|
0, // 25
|
||||||
std::array{0, 0, 0, 0}, // 0 FormatInvalid
|
0, // 26
|
||||||
std::array{8, 0, 0, 0}, // 1 Format8
|
0, // 27
|
||||||
std::array{16, 0, 0, 0}, // 2 Format16
|
0, // 28
|
||||||
std::array{8, 8, 0, 0}, // 3 Format8_8
|
0, // 29
|
||||||
std::array{32, 0, 0, 0}, // 4 Format32
|
0, // 30
|
||||||
std::array{16, 16, 0, 0}, // 5 Format16_16
|
0, // 31
|
||||||
std::array{11, 11, 10, 0}, // 6 Format10_11_11
|
3, // 32 FormatGB_GR
|
||||||
std::array{10, 11, 11, 0}, // 7 Format11_11_10
|
3, // 33 FormatBG_RG
|
||||||
std::array{2, 10, 10, 10}, // 8 Format10_10_10_2
|
4, // 34 Format5_9_9_9
|
||||||
std::array{10, 10, 10, 2}, // 9 Format2_10_10_10
|
4, // 35 FormatBc1
|
||||||
std::array{8, 8, 8, 8}, // 10 Format8_8_8_8
|
4, // 36 FormatBc2
|
||||||
std::array{32, 32, 0, 0}, // 11 Format32_32
|
4, // 37 FormatBc3
|
||||||
std::array{16, 16, 16, 16}, // 12 Format16_16_16_16
|
1, // 38 FormatBc4
|
||||||
std::array{32, 32, 32, 0}, // 13 Format32_32_32
|
2, // 39 FormatBc5
|
||||||
std::array{32, 32, 32, 32}, // 14 Format32_32_32_32
|
3, // 40 FormatBc6
|
||||||
std::array{0, 0, 0, 0}, // 15
|
4, // 41 FormatBc7
|
||||||
std::array{5, 6, 5, 0}, // 16 Format5_6_5
|
|
||||||
std::array{5, 5, 5, 1}, // 17 Format1_5_5_5
|
|
||||||
std::array{1, 5, 5, 5}, // 18 Format5_5_5_1
|
|
||||||
std::array{4, 4, 4, 4}, // 19 Format4_4_4_4
|
|
||||||
std::array{24, 8, 0, 0}, // 20 Format8_24
|
|
||||||
std::array{8, 24, 0, 0}, // 21 Format24_8
|
|
||||||
std::array{8, 24, 0, 0}, // 22 FormatX24_8_32
|
|
||||||
std::array{0, 0, 0, 0}, // 23
|
|
||||||
std::array{0, 0, 0, 0}, // 24
|
|
||||||
std::array{0, 0, 0, 0}, // 25
|
|
||||||
std::array{0, 0, 0, 0}, // 26
|
|
||||||
std::array{0, 0, 0, 0}, // 27
|
|
||||||
std::array{0, 0, 0, 0}, // 28
|
|
||||||
std::array{0, 0, 0, 0}, // 29
|
|
||||||
std::array{0, 0, 0, 0}, // 30
|
|
||||||
std::array{0, 0, 0, 0}, // 31
|
|
||||||
std::array{0, 0, 0, 0}, // 32 FormatGB_GR
|
|
||||||
std::array{0, 0, 0, 0}, // 33 FormatBG_RG
|
|
||||||
std::array{0, 0, 0, 0}, // 34 Format5_9_9_9
|
|
||||||
std::array{0, 0, 0, 0}, // 35 FormatBc1
|
|
||||||
std::array{0, 0, 0, 0}, // 36 FormatBc2
|
|
||||||
std::array{0, 0, 0, 0}, // 37 FormatBc3
|
|
||||||
std::array{0, 0, 0, 0}, // 38 FormatBc4
|
|
||||||
std::array{0, 0, 0, 0}, // 39 FormatBc5
|
|
||||||
std::array{0, 0, 0, 0}, // 40 FormatBc6
|
|
||||||
std::array{0, 0, 0, 0}, // 41 FormatBc7
|
|
||||||
};
|
};
|
||||||
|
|
||||||
u32 ComponentBits(DataFormat format, u32 comp) {
|
u32 NumComponents(DataFormat format) {
|
||||||
const u32 index = static_cast<u32>(format);
|
const u32 index = static_cast<u32>(format);
|
||||||
if (index >= component_bits.size() || comp >= 4) {
|
ASSERT_MSG(index < NUM_COMPONENTS.size(), "Invalid data format = {}", format);
|
||||||
return 0;
|
return NUM_COMPONENTS[index];
|
||||||
}
|
|
||||||
return component_bits[index][comp];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr std::array component_offset = {
|
static constexpr std::array BITS_PER_BLOCK = {
|
||||||
std::array{-1, -1, -1, -1}, // 0 FormatInvalid
|
0, // 0 FormatInvalid
|
||||||
std::array{0, -1, -1, -1}, // 1 Format8
|
8, // 1 Format8
|
||||||
std::array{0, -1, -1, -1}, // 2 Format16
|
16, // 2 Format16
|
||||||
std::array{0, 8, -1, -1}, // 3 Format8_8
|
16, // 3 Format8_8
|
||||||
std::array{0, -1, -1, -1}, // 4 Format32
|
32, // 4 Format32
|
||||||
std::array{0, 16, -1, -1}, // 5 Format16_16
|
32, // 5 Format16_16
|
||||||
std::array{0, 11, 22, -1}, // 6 Format10_11_11
|
32, // 6 Format10_11_11
|
||||||
std::array{0, 10, 21, -1}, // 7 Format11_11_10
|
32, // 7 Format11_11_10
|
||||||
std::array{0, 2, 12, 22}, // 8 Format10_10_10_2
|
32, // 8 Format10_10_10_2
|
||||||
std::array{0, 10, 20, 30}, // 9 Format2_10_10_10
|
32, // 9 Format2_10_10_10
|
||||||
std::array{0, 8, 16, 24}, // 10 Format8_8_8_8
|
32, // 10 Format8_8_8_8
|
||||||
std::array{0, 32, -1, -1}, // 11 Format32_32
|
64, // 11 Format32_32
|
||||||
std::array{0, 16, 32, 48}, // 12 Format16_16_16_16
|
64, // 12 Format16_16_16_16
|
||||||
std::array{0, 32, 64, -1}, // 13 Format32_32_32
|
96, // 13 Format32_32_32
|
||||||
std::array{0, 32, 64, 96}, // 14 Format32_32_32_32
|
128, // 14 Format32_32_32_32
|
||||||
std::array{-1, -1, -1, -1}, // 15
|
0, // 15
|
||||||
std::array{0, 5, 11, -1}, // 16 Format5_6_5
|
16, // 16 Format5_6_5
|
||||||
std::array{0, 5, 10, 15}, // 17 Format1_5_5_5
|
16, // 17 Format1_5_5_5
|
||||||
std::array{0, 1, 6, 11}, // 18 Format5_5_5_1
|
16, // 18 Format5_5_5_1
|
||||||
std::array{0, 4, 8, 12}, // 19 Format4_4_4_4
|
16, // 19 Format4_4_4_4
|
||||||
std::array{0, 24, -1, -1}, // 20 Format8_24
|
32, // 20 Format8_24
|
||||||
std::array{0, 8, -1, -1}, // 21 Format24_8
|
32, // 21 Format24_8
|
||||||
std::array{0, 8, -1, -1}, // 22 FormatX24_8_32
|
64, // 22 FormatX24_8_32
|
||||||
std::array{-1, -1, -1, -1}, // 23
|
0, // 23
|
||||||
std::array{-1, -1, -1, -1}, // 24
|
0, // 24
|
||||||
std::array{-1, -1, -1, -1}, // 25
|
0, // 25
|
||||||
std::array{-1, -1, -1, -1}, // 26
|
0, // 26
|
||||||
std::array{-1, -1, -1, -1}, // 27
|
0, // 27
|
||||||
std::array{-1, -1, -1, -1}, // 28
|
0, // 28
|
||||||
std::array{-1, -1, -1, -1}, // 29
|
0, // 29
|
||||||
std::array{-1, -1, -1, -1}, // 30
|
0, // 30
|
||||||
std::array{-1, -1, -1, -1}, // 31
|
0, // 31
|
||||||
std::array{-1, -1, -1, -1}, // 32 FormatGB_GR
|
16, // 32 FormatGB_GR
|
||||||
std::array{-1, -1, -1, -1}, // 33 FormatBG_RG
|
16, // 33 FormatBG_RG
|
||||||
std::array{-1, -1, -1, -1}, // 34 Format5_9_9_9
|
32, // 34 Format5_9_9_9
|
||||||
std::array{-1, -1, -1, -1}, // 35 FormatBc1
|
64, // 35 FormatBc1
|
||||||
std::array{-1, -1, -1, -1}, // 36 FormatBc2
|
128, // 36 FormatBc2
|
||||||
std::array{-1, -1, -1, -1}, // 37 FormatBc3
|
128, // 37 FormatBc3
|
||||||
std::array{-1, -1, -1, -1}, // 38 FormatBc4
|
64, // 38 FormatBc4
|
||||||
std::array{-1, -1, -1, -1}, // 39 FormatBc5
|
128, // 39 FormatBc5
|
||||||
std::array{-1, -1, -1, -1}, // 40 FormatBc6
|
128, // 40 FormatBc6
|
||||||
std::array{-1, -1, -1, -1}, // 41 FormatBc7
|
128, // 41 FormatBc7
|
||||||
};
|
};
|
||||||
|
|
||||||
s32 ComponentOffset(DataFormat format, u32 comp) {
|
u32 NumBitsPerBlock(DataFormat format) {
|
||||||
const u32 index = static_cast<u32>(format);
|
const u32 index = static_cast<u32>(format);
|
||||||
if (index >= component_offset.size() || comp >= 4) {
|
ASSERT_MSG(index < BITS_PER_BLOCK.size(), "Invalid data format = {}", format);
|
||||||
return -1;
|
return BITS_PER_BLOCK[index];
|
||||||
}
|
|
||||||
return component_offset[index][comp];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace AmdGpu
|
} // namespace AmdGpu
|
||||||
|
@ -5,39 +5,313 @@
|
|||||||
|
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
#include "common/assert.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "video_core/amdgpu/types.h"
|
|
||||||
|
|
||||||
namespace AmdGpu {
|
namespace AmdGpu {
|
||||||
|
|
||||||
enum NumberClass {
|
// Table 8.13 Data and Image Formats [Sea Islands Series Instruction Set Architecture]
|
||||||
|
enum class DataFormat : u32 {
|
||||||
|
FormatInvalid = 0,
|
||||||
|
Format8 = 1,
|
||||||
|
Format16 = 2,
|
||||||
|
Format8_8 = 3,
|
||||||
|
Format32 = 4,
|
||||||
|
Format16_16 = 5,
|
||||||
|
Format10_11_11 = 6,
|
||||||
|
Format11_11_10 = 7,
|
||||||
|
Format10_10_10_2 = 8,
|
||||||
|
Format2_10_10_10 = 9,
|
||||||
|
Format8_8_8_8 = 10,
|
||||||
|
Format32_32 = 11,
|
||||||
|
Format16_16_16_16 = 12,
|
||||||
|
Format32_32_32 = 13,
|
||||||
|
Format32_32_32_32 = 14,
|
||||||
|
Format5_6_5 = 16,
|
||||||
|
Format1_5_5_5 = 17,
|
||||||
|
Format5_5_5_1 = 18,
|
||||||
|
Format4_4_4_4 = 19,
|
||||||
|
Format8_24 = 20,
|
||||||
|
Format24_8 = 21,
|
||||||
|
FormatX24_8_32 = 22,
|
||||||
|
FormatGB_GR = 32,
|
||||||
|
FormatBG_RG = 33,
|
||||||
|
Format5_9_9_9 = 34,
|
||||||
|
FormatBc1 = 35,
|
||||||
|
FormatBc2 = 36,
|
||||||
|
FormatBc3 = 37,
|
||||||
|
FormatBc4 = 38,
|
||||||
|
FormatBc5 = 39,
|
||||||
|
FormatBc6 = 40,
|
||||||
|
FormatBc7 = 41,
|
||||||
|
FormatFmask8_1 = 47,
|
||||||
|
FormatFmask8_2 = 48,
|
||||||
|
FormatFmask8_4 = 49,
|
||||||
|
FormatFmask16_1 = 50,
|
||||||
|
FormatFmask16_2 = 51,
|
||||||
|
FormatFmask32_2 = 52,
|
||||||
|
FormatFmask32_4 = 53,
|
||||||
|
FormatFmask32_8 = 54,
|
||||||
|
FormatFmask64_4 = 55,
|
||||||
|
FormatFmask64_8 = 56,
|
||||||
|
Format4_4 = 57,
|
||||||
|
Format6_5_5 = 58,
|
||||||
|
Format1 = 59,
|
||||||
|
Format1_Reversed = 60,
|
||||||
|
Format32_As_8 = 61,
|
||||||
|
Format32_As_8_8 = 62,
|
||||||
|
Format32_As_32_32_32_32 = 63,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class NumberFormat : u32 {
|
||||||
|
Unorm = 0,
|
||||||
|
Snorm = 1,
|
||||||
|
Uscaled = 2,
|
||||||
|
Sscaled = 3,
|
||||||
|
Uint = 4,
|
||||||
|
Sint = 5,
|
||||||
|
SnormNz = 6,
|
||||||
|
Float = 7,
|
||||||
|
Srgb = 9,
|
||||||
|
Ubnorm = 10,
|
||||||
|
UbnormNz = 11,
|
||||||
|
Ubint = 12,
|
||||||
|
Ubscaled = 13,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class NumberClass {
|
||||||
Float,
|
Float,
|
||||||
Sint,
|
Sint,
|
||||||
Uint,
|
Uint,
|
||||||
};
|
};
|
||||||
|
|
||||||
[[nodiscard]] constexpr NumberClass GetNumberClass(const NumberFormat nfmt) {
|
enum class CompSwizzle : u8 {
|
||||||
switch (nfmt) {
|
Zero = 0,
|
||||||
case NumberFormat::Sint:
|
One = 1,
|
||||||
return Sint;
|
Red = 4,
|
||||||
case NumberFormat::Uint:
|
Green = 5,
|
||||||
return Uint;
|
Blue = 6,
|
||||||
|
Alpha = 7,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class NumberConversion : u32 {
|
||||||
|
None = 0,
|
||||||
|
UintToUscaled = 1,
|
||||||
|
SintToSscaled = 2,
|
||||||
|
UnormToUbnorm = 3,
|
||||||
|
Sint8ToSnormNz = 4,
|
||||||
|
Sint16ToSnormNz = 5,
|
||||||
|
Uint32ToUnorm = 6,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CompMapping {
|
||||||
|
CompSwizzle r;
|
||||||
|
CompSwizzle g;
|
||||||
|
CompSwizzle b;
|
||||||
|
CompSwizzle a;
|
||||||
|
|
||||||
|
auto operator<=>(const CompMapping& other) const = default;
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
[[nodiscard]] std::array<T, 4> Apply(const std::array<T, 4>& data) const {
|
||||||
|
return {
|
||||||
|
ApplySingle(data, r),
|
||||||
|
ApplySingle(data, g),
|
||||||
|
ApplySingle(data, b),
|
||||||
|
ApplySingle(data, a),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] CompMapping Inverse() const {
|
||||||
|
CompMapping result{};
|
||||||
|
InverseSingle(result.r, CompSwizzle::Red);
|
||||||
|
InverseSingle(result.g, CompSwizzle::Green);
|
||||||
|
InverseSingle(result.b, CompSwizzle::Blue);
|
||||||
|
InverseSingle(result.a, CompSwizzle::Alpha);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
template <typename T>
|
||||||
|
T ApplySingle(const std::array<T, 4>& data, const CompSwizzle swizzle) const {
|
||||||
|
switch (swizzle) {
|
||||||
|
case CompSwizzle::Zero:
|
||||||
|
return T(0);
|
||||||
|
case CompSwizzle::One:
|
||||||
|
return T(1);
|
||||||
|
case CompSwizzle::Red:
|
||||||
|
return data[0];
|
||||||
|
case CompSwizzle::Green:
|
||||||
|
return data[1];
|
||||||
|
case CompSwizzle::Blue:
|
||||||
|
return data[2];
|
||||||
|
case CompSwizzle::Alpha:
|
||||||
|
return data[3];
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void InverseSingle(CompSwizzle& dst, const CompSwizzle target) const {
|
||||||
|
if (r == target) {
|
||||||
|
dst = CompSwizzle::Red;
|
||||||
|
} else if (g == target) {
|
||||||
|
dst = CompSwizzle::Green;
|
||||||
|
} else if (b == target) {
|
||||||
|
dst = CompSwizzle::Blue;
|
||||||
|
} else if (a == target) {
|
||||||
|
dst = CompSwizzle::Alpha;
|
||||||
|
} else {
|
||||||
|
dst = CompSwizzle::Zero;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static constexpr CompMapping IdentityMapping = {
|
||||||
|
.r = CompSwizzle::Red,
|
||||||
|
.g = CompSwizzle::Green,
|
||||||
|
.b = CompSwizzle::Blue,
|
||||||
|
.a = CompSwizzle::Alpha,
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr DataFormat RemapDataFormat(const DataFormat format) {
|
||||||
|
switch (format) {
|
||||||
|
case DataFormat::Format11_11_10:
|
||||||
|
return DataFormat::Format10_11_11;
|
||||||
|
case DataFormat::Format10_10_10_2:
|
||||||
|
return DataFormat::Format2_10_10_10;
|
||||||
|
case DataFormat::Format5_5_5_1:
|
||||||
|
return DataFormat::Format1_5_5_5;
|
||||||
default:
|
default:
|
||||||
return Float;
|
return format;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] constexpr bool IsInteger(const NumberFormat nfmt) {
|
constexpr NumberFormat RemapNumberFormat(const NumberFormat format, const DataFormat data_format) {
|
||||||
|
switch (format) {
|
||||||
|
case NumberFormat::Unorm: {
|
||||||
|
switch (data_format) {
|
||||||
|
case DataFormat::Format32:
|
||||||
|
case DataFormat::Format32_32:
|
||||||
|
case DataFormat::Format32_32_32:
|
||||||
|
case DataFormat::Format32_32_32_32:
|
||||||
|
return NumberFormat::Uint;
|
||||||
|
default:
|
||||||
|
return format;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case NumberFormat::Uscaled:
|
||||||
|
return NumberFormat::Uint;
|
||||||
|
case NumberFormat::Sscaled:
|
||||||
|
case NumberFormat::SnormNz:
|
||||||
|
return NumberFormat::Sint;
|
||||||
|
case NumberFormat::Ubnorm:
|
||||||
|
return NumberFormat::Unorm;
|
||||||
|
case NumberFormat::Float:
|
||||||
|
if (data_format == DataFormat::Format8) {
|
||||||
|
// Games may ask for 8-bit float when they want to access the stencil component
|
||||||
|
// of a depth-stencil image. Change to unsigned int to match the stencil format.
|
||||||
|
// This is also the closest approximation to pass the bits through unconverted.
|
||||||
|
return NumberFormat::Uint;
|
||||||
|
}
|
||||||
|
[[fallthrough]];
|
||||||
|
default:
|
||||||
|
return format;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr CompMapping RemapSwizzle(const DataFormat format, const CompMapping swizzle) {
|
||||||
|
switch (format) {
|
||||||
|
case DataFormat::Format1_5_5_5:
|
||||||
|
case DataFormat::Format11_11_10: {
|
||||||
|
CompMapping result;
|
||||||
|
result.r = swizzle.b;
|
||||||
|
result.g = swizzle.g;
|
||||||
|
result.b = swizzle.r;
|
||||||
|
result.a = swizzle.a;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
case DataFormat::Format10_10_10_2: {
|
||||||
|
CompMapping result;
|
||||||
|
result.r = swizzle.a;
|
||||||
|
result.g = swizzle.b;
|
||||||
|
result.b = swizzle.g;
|
||||||
|
result.a = swizzle.r;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
case DataFormat::Format4_4_4_4: {
|
||||||
|
// Remap to a more supported component order.
|
||||||
|
CompMapping result;
|
||||||
|
result.r = swizzle.g;
|
||||||
|
result.g = swizzle.b;
|
||||||
|
result.b = swizzle.a;
|
||||||
|
result.a = swizzle.r;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return swizzle;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr NumberConversion MapNumberConversion(const NumberFormat num_fmt,
|
||||||
|
const DataFormat data_fmt) {
|
||||||
|
switch (num_fmt) {
|
||||||
|
case NumberFormat::Unorm: {
|
||||||
|
switch (data_fmt) {
|
||||||
|
case DataFormat::Format32:
|
||||||
|
case DataFormat::Format32_32:
|
||||||
|
case DataFormat::Format32_32_32:
|
||||||
|
case DataFormat::Format32_32_32_32:
|
||||||
|
return NumberConversion::Uint32ToUnorm;
|
||||||
|
default:
|
||||||
|
return NumberConversion::None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case NumberFormat::Uscaled:
|
||||||
|
return NumberConversion::UintToUscaled;
|
||||||
|
case NumberFormat::Sscaled:
|
||||||
|
return NumberConversion::SintToSscaled;
|
||||||
|
case NumberFormat::Ubnorm:
|
||||||
|
return NumberConversion::UnormToUbnorm;
|
||||||
|
case NumberFormat::SnormNz: {
|
||||||
|
switch (data_fmt) {
|
||||||
|
case DataFormat::Format8:
|
||||||
|
case DataFormat::Format8_8:
|
||||||
|
case DataFormat::Format8_8_8_8:
|
||||||
|
return NumberConversion::Sint8ToSnormNz;
|
||||||
|
case DataFormat::Format16:
|
||||||
|
case DataFormat::Format16_16:
|
||||||
|
case DataFormat::Format16_16_16_16:
|
||||||
|
return NumberConversion::Sint16ToSnormNz;
|
||||||
|
default:
|
||||||
|
UNREACHABLE_MSG("data_fmt = {}", u32(data_fmt));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return NumberConversion::None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr NumberClass GetNumberClass(const NumberFormat nfmt) {
|
||||||
|
switch (nfmt) {
|
||||||
|
case NumberFormat::Sint:
|
||||||
|
return NumberClass::Sint;
|
||||||
|
case NumberFormat::Uint:
|
||||||
|
return NumberClass::Uint;
|
||||||
|
default:
|
||||||
|
return NumberClass::Float;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr bool IsInteger(const NumberFormat nfmt) {
|
||||||
return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint;
|
return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] std::string_view NameOf(DataFormat fmt);
|
std::string_view NameOf(DataFormat fmt);
|
||||||
[[nodiscard]] std::string_view NameOf(NumberFormat fmt);
|
std::string_view NameOf(NumberFormat fmt);
|
||||||
|
|
||||||
int NumComponents(DataFormat format);
|
u32 NumComponents(DataFormat format);
|
||||||
int NumBits(DataFormat format);
|
u32 NumBitsPerBlock(DataFormat format);
|
||||||
u32 ComponentBits(DataFormat format, u32 comp);
|
|
||||||
s32 ComponentOffset(DataFormat format, u32 comp);
|
|
||||||
|
|
||||||
} // namespace AmdGpu
|
} // namespace AmdGpu
|
||||||
|
|
||||||
|
@ -554,6 +554,61 @@ struct PM4DmaData {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class CopyDataSrc : u32 {
|
||||||
|
MappedRegister = 0,
|
||||||
|
Memory = 1,
|
||||||
|
TCL2 = 2,
|
||||||
|
Gds = 3,
|
||||||
|
// Reserved = 4,
|
||||||
|
Immediate = 5,
|
||||||
|
Atomic = 6,
|
||||||
|
GdsAtomic0 = 7,
|
||||||
|
GdsAtomic1 = 8,
|
||||||
|
GpuClock = 9,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class CopyDataDst : u32 {
|
||||||
|
MappedRegister = 0,
|
||||||
|
MemorySync = 1,
|
||||||
|
TCL2 = 2,
|
||||||
|
Gds = 3,
|
||||||
|
// Reserved = 4,
|
||||||
|
MemoryAsync = 5,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class CopyDataEngine : u32 {
|
||||||
|
Me = 0,
|
||||||
|
Pfp = 1,
|
||||||
|
Ce = 2,
|
||||||
|
// Reserved = 3
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PM4CmdCopyData {
|
||||||
|
PM4Type3Header header;
|
||||||
|
union {
|
||||||
|
BitField<0, 4, CopyDataSrc> src_sel;
|
||||||
|
BitField<8, 4, CopyDataDst> dst_sel;
|
||||||
|
BitField<16, 1, u32> count_sel;
|
||||||
|
BitField<20, 1, u32> wr_confirm;
|
||||||
|
BitField<30, 2, CopyDataEngine> engine_sel;
|
||||||
|
u32 control;
|
||||||
|
};
|
||||||
|
u32 src_addr_lo;
|
||||||
|
u32 src_addr_hi;
|
||||||
|
u32 dst_addr_lo;
|
||||||
|
u32 dst_addr_hi;
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T SrcAddress() const {
|
||||||
|
return std::bit_cast<T>(src_addr_lo | u64(src_addr_hi) << 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T DstAddress() const {
|
||||||
|
return std::bit_cast<T>(dst_addr_lo | u64(dst_addr_hi) << 32);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct PM4CmdRewind {
|
struct PM4CmdRewind {
|
||||||
PM4Type3Header header;
|
PM4Type3Header header;
|
||||||
union {
|
union {
|
||||||
@ -1104,4 +1159,25 @@ struct PM4CmdMemSemaphore {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct PM4CmdCondExec {
|
||||||
|
PM4Type3Header header;
|
||||||
|
union {
|
||||||
|
BitField<2, 30, u32> bool_addr_lo; ///< low 32 address bits for the block in memory from
|
||||||
|
///< where the CP will fetch the condition
|
||||||
|
};
|
||||||
|
union {
|
||||||
|
BitField<0, 16, u32> bool_addr_hi; ///< high address bits for the condition
|
||||||
|
BitField<28, 4, u32> command;
|
||||||
|
};
|
||||||
|
union {
|
||||||
|
BitField<0, 14, u32> exec_count; ///< Number of DWords that the CP will skip
|
||||||
|
///< if bool pointed to is zero
|
||||||
|
};
|
||||||
|
|
||||||
|
bool* Address() const {
|
||||||
|
return std::bit_cast<bool*>(u64(bool_addr_hi.Value()) << 32 | u64(bool_addr_lo.Value())
|
||||||
|
<< 2);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace AmdGpu
|
} // namespace AmdGpu
|
||||||
|
@ -6,7 +6,6 @@
|
|||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/bit_field.h"
|
#include "common/bit_field.h"
|
||||||
#include "common/types.h"
|
|
||||||
#include "video_core/amdgpu/pixel_format.h"
|
#include "video_core/amdgpu/pixel_format.h"
|
||||||
|
|
||||||
namespace AmdGpu {
|
namespace AmdGpu {
|
||||||
|
@ -5,7 +5,6 @@
|
|||||||
|
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
#include "common/assert.h"
|
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
|
||||||
namespace AmdGpu {
|
namespace AmdGpu {
|
||||||
@ -114,281 +113,6 @@ enum class GsOutputPrimitiveType : u32 {
|
|||||||
TriangleStrip = 2,
|
TriangleStrip = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Table 8.13 Data and Image Formats [Sea Islands Series Instruction Set Architecture]
|
|
||||||
enum class DataFormat : u32 {
|
|
||||||
FormatInvalid = 0,
|
|
||||||
Format8 = 1,
|
|
||||||
Format16 = 2,
|
|
||||||
Format8_8 = 3,
|
|
||||||
Format32 = 4,
|
|
||||||
Format16_16 = 5,
|
|
||||||
Format10_11_11 = 6,
|
|
||||||
Format11_11_10 = 7,
|
|
||||||
Format10_10_10_2 = 8,
|
|
||||||
Format2_10_10_10 = 9,
|
|
||||||
Format8_8_8_8 = 10,
|
|
||||||
Format32_32 = 11,
|
|
||||||
Format16_16_16_16 = 12,
|
|
||||||
Format32_32_32 = 13,
|
|
||||||
Format32_32_32_32 = 14,
|
|
||||||
Format5_6_5 = 16,
|
|
||||||
Format1_5_5_5 = 17,
|
|
||||||
Format5_5_5_1 = 18,
|
|
||||||
Format4_4_4_4 = 19,
|
|
||||||
Format8_24 = 20,
|
|
||||||
Format24_8 = 21,
|
|
||||||
FormatX24_8_32 = 22,
|
|
||||||
FormatGB_GR = 32,
|
|
||||||
FormatBG_RG = 33,
|
|
||||||
Format5_9_9_9 = 34,
|
|
||||||
FormatBc1 = 35,
|
|
||||||
FormatBc2 = 36,
|
|
||||||
FormatBc3 = 37,
|
|
||||||
FormatBc4 = 38,
|
|
||||||
FormatBc5 = 39,
|
|
||||||
FormatBc6 = 40,
|
|
||||||
FormatBc7 = 41,
|
|
||||||
FormatFmask8_1 = 47,
|
|
||||||
FormatFmask8_2 = 48,
|
|
||||||
FormatFmask8_4 = 49,
|
|
||||||
FormatFmask16_1 = 50,
|
|
||||||
FormatFmask16_2 = 51,
|
|
||||||
FormatFmask32_2 = 52,
|
|
||||||
FormatFmask32_4 = 53,
|
|
||||||
FormatFmask32_8 = 54,
|
|
||||||
FormatFmask64_4 = 55,
|
|
||||||
FormatFmask64_8 = 56,
|
|
||||||
Format4_4 = 57,
|
|
||||||
Format6_5_5 = 58,
|
|
||||||
Format1 = 59,
|
|
||||||
Format1_Reversed = 60,
|
|
||||||
Format32_As_8 = 61,
|
|
||||||
Format32_As_8_8 = 62,
|
|
||||||
Format32_As_32_32_32_32 = 63,
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class NumberFormat : u32 {
|
|
||||||
Unorm = 0,
|
|
||||||
Snorm = 1,
|
|
||||||
Uscaled = 2,
|
|
||||||
Sscaled = 3,
|
|
||||||
Uint = 4,
|
|
||||||
Sint = 5,
|
|
||||||
SnormNz = 6,
|
|
||||||
Float = 7,
|
|
||||||
Srgb = 9,
|
|
||||||
Ubnorm = 10,
|
|
||||||
UbnormNz = 11,
|
|
||||||
Ubint = 12,
|
|
||||||
Ubscaled = 13,
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class CompSwizzle : u8 {
|
|
||||||
Zero = 0,
|
|
||||||
One = 1,
|
|
||||||
Red = 4,
|
|
||||||
Green = 5,
|
|
||||||
Blue = 6,
|
|
||||||
Alpha = 7,
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class NumberConversion : u32 {
|
|
||||||
None = 0,
|
|
||||||
UintToUscaled = 1,
|
|
||||||
SintToSscaled = 2,
|
|
||||||
UnormToUbnorm = 3,
|
|
||||||
Sint8ToSnormNz = 4,
|
|
||||||
Sint16ToSnormNz = 5,
|
|
||||||
Uint32ToUnorm = 6,
|
|
||||||
};
|
|
||||||
|
|
||||||
struct CompMapping {
|
|
||||||
CompSwizzle r;
|
|
||||||
CompSwizzle g;
|
|
||||||
CompSwizzle b;
|
|
||||||
CompSwizzle a;
|
|
||||||
|
|
||||||
auto operator<=>(const CompMapping& other) const = default;
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
[[nodiscard]] std::array<T, 4> Apply(const std::array<T, 4>& data) const {
|
|
||||||
return {
|
|
||||||
ApplySingle(data, r),
|
|
||||||
ApplySingle(data, g),
|
|
||||||
ApplySingle(data, b),
|
|
||||||
ApplySingle(data, a),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
[[nodiscard]] CompMapping Inverse() const {
|
|
||||||
CompMapping result{};
|
|
||||||
InverseSingle(result.r, CompSwizzle::Red);
|
|
||||||
InverseSingle(result.g, CompSwizzle::Green);
|
|
||||||
InverseSingle(result.b, CompSwizzle::Blue);
|
|
||||||
InverseSingle(result.a, CompSwizzle::Alpha);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
template <typename T>
|
|
||||||
T ApplySingle(const std::array<T, 4>& data, const CompSwizzle swizzle) const {
|
|
||||||
switch (swizzle) {
|
|
||||||
case CompSwizzle::Zero:
|
|
||||||
return T(0);
|
|
||||||
case CompSwizzle::One:
|
|
||||||
return T(1);
|
|
||||||
case CompSwizzle::Red:
|
|
||||||
return data[0];
|
|
||||||
case CompSwizzle::Green:
|
|
||||||
return data[1];
|
|
||||||
case CompSwizzle::Blue:
|
|
||||||
return data[2];
|
|
||||||
case CompSwizzle::Alpha:
|
|
||||||
return data[3];
|
|
||||||
default:
|
|
||||||
UNREACHABLE();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void InverseSingle(CompSwizzle& dst, const CompSwizzle target) const {
|
|
||||||
if (r == target) {
|
|
||||||
dst = CompSwizzle::Red;
|
|
||||||
} else if (g == target) {
|
|
||||||
dst = CompSwizzle::Green;
|
|
||||||
} else if (b == target) {
|
|
||||||
dst = CompSwizzle::Blue;
|
|
||||||
} else if (a == target) {
|
|
||||||
dst = CompSwizzle::Alpha;
|
|
||||||
} else {
|
|
||||||
dst = CompSwizzle::Zero;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
static constexpr CompMapping IdentityMapping = {
|
|
||||||
.r = CompSwizzle::Red,
|
|
||||||
.g = CompSwizzle::Green,
|
|
||||||
.b = CompSwizzle::Blue,
|
|
||||||
.a = CompSwizzle::Alpha,
|
|
||||||
};
|
|
||||||
|
|
||||||
inline DataFormat RemapDataFormat(const DataFormat format) {
|
|
||||||
switch (format) {
|
|
||||||
case DataFormat::Format11_11_10:
|
|
||||||
return DataFormat::Format10_11_11;
|
|
||||||
case DataFormat::Format10_10_10_2:
|
|
||||||
return DataFormat::Format2_10_10_10;
|
|
||||||
case DataFormat::Format5_5_5_1:
|
|
||||||
return DataFormat::Format1_5_5_5;
|
|
||||||
default:
|
|
||||||
return format;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline NumberFormat RemapNumberFormat(const NumberFormat format, const DataFormat data_format) {
|
|
||||||
switch (format) {
|
|
||||||
case NumberFormat::Unorm: {
|
|
||||||
switch (data_format) {
|
|
||||||
case DataFormat::Format32:
|
|
||||||
case DataFormat::Format32_32:
|
|
||||||
case DataFormat::Format32_32_32:
|
|
||||||
case DataFormat::Format32_32_32_32:
|
|
||||||
return NumberFormat::Uint;
|
|
||||||
default:
|
|
||||||
return format;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case NumberFormat::Uscaled:
|
|
||||||
return NumberFormat::Uint;
|
|
||||||
case NumberFormat::Sscaled:
|
|
||||||
case NumberFormat::SnormNz:
|
|
||||||
return NumberFormat::Sint;
|
|
||||||
case NumberFormat::Ubnorm:
|
|
||||||
return NumberFormat::Unorm;
|
|
||||||
case NumberFormat::Float:
|
|
||||||
if (data_format == DataFormat::Format8) {
|
|
||||||
// Games may ask for 8-bit float when they want to access the stencil component
|
|
||||||
// of a depth-stencil image. Change to unsigned int to match the stencil format.
|
|
||||||
// This is also the closest approximation to pass the bits through unconverted.
|
|
||||||
return NumberFormat::Uint;
|
|
||||||
}
|
|
||||||
[[fallthrough]];
|
|
||||||
default:
|
|
||||||
return format;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline CompMapping RemapSwizzle(const DataFormat format, const CompMapping swizzle) {
|
|
||||||
switch (format) {
|
|
||||||
case DataFormat::Format1_5_5_5:
|
|
||||||
case DataFormat::Format11_11_10: {
|
|
||||||
CompMapping result;
|
|
||||||
result.r = swizzle.b;
|
|
||||||
result.g = swizzle.g;
|
|
||||||
result.b = swizzle.r;
|
|
||||||
result.a = swizzle.a;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
case DataFormat::Format10_10_10_2: {
|
|
||||||
CompMapping result;
|
|
||||||
result.r = swizzle.a;
|
|
||||||
result.g = swizzle.b;
|
|
||||||
result.b = swizzle.g;
|
|
||||||
result.a = swizzle.r;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
case DataFormat::Format4_4_4_4: {
|
|
||||||
// Remap to a more supported component order.
|
|
||||||
CompMapping result;
|
|
||||||
result.r = swizzle.g;
|
|
||||||
result.g = swizzle.b;
|
|
||||||
result.b = swizzle.a;
|
|
||||||
result.a = swizzle.r;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
return swizzle;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline NumberConversion MapNumberConversion(const NumberFormat num_fmt, const DataFormat data_fmt) {
|
|
||||||
switch (num_fmt) {
|
|
||||||
case NumberFormat::Unorm: {
|
|
||||||
switch (data_fmt) {
|
|
||||||
case DataFormat::Format32:
|
|
||||||
case DataFormat::Format32_32:
|
|
||||||
case DataFormat::Format32_32_32:
|
|
||||||
case DataFormat::Format32_32_32_32:
|
|
||||||
return NumberConversion::Uint32ToUnorm;
|
|
||||||
default:
|
|
||||||
return NumberConversion::None;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case NumberFormat::Uscaled:
|
|
||||||
return NumberConversion::UintToUscaled;
|
|
||||||
case NumberFormat::Sscaled:
|
|
||||||
return NumberConversion::SintToSscaled;
|
|
||||||
case NumberFormat::Ubnorm:
|
|
||||||
return NumberConversion::UnormToUbnorm;
|
|
||||||
case NumberFormat::SnormNz: {
|
|
||||||
switch (data_fmt) {
|
|
||||||
case DataFormat::Format8:
|
|
||||||
case DataFormat::Format8_8:
|
|
||||||
case DataFormat::Format8_8_8_8:
|
|
||||||
return NumberConversion::Sint8ToSnormNz;
|
|
||||||
case DataFormat::Format16:
|
|
||||||
case DataFormat::Format16_16:
|
|
||||||
case DataFormat::Format16_16_16_16:
|
|
||||||
return NumberConversion::Sint16ToSnormNz;
|
|
||||||
default:
|
|
||||||
UNREACHABLE_MSG("data_fmt = {}", u32(data_fmt));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
return NumberConversion::None;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace AmdGpu
|
} // namespace AmdGpu
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#include "common/debug.h"
|
#include "common/debug.h"
|
||||||
#include "common/scope_exit.h"
|
#include "common/scope_exit.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
#include "core/memory.h"
|
||||||
#include "video_core/amdgpu/liverpool.h"
|
#include "video_core/amdgpu/liverpool.h"
|
||||||
#include "video_core/buffer_cache/buffer_cache.h"
|
#include "video_core/buffer_cache/buffer_cache.h"
|
||||||
#include "video_core/host_shaders/fault_buffer_process_comp.h"
|
#include "video_core/host_shaders/fault_buffer_process_comp.h"
|
||||||
@ -28,7 +29,7 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
|||||||
Vulkan::Rasterizer& rasterizer_, AmdGpu::Liverpool* liverpool_,
|
Vulkan::Rasterizer& rasterizer_, AmdGpu::Liverpool* liverpool_,
|
||||||
TextureCache& texture_cache_, PageManager& tracker_)
|
TextureCache& texture_cache_, PageManager& tracker_)
|
||||||
: instance{instance_}, scheduler{scheduler_}, rasterizer{rasterizer_}, liverpool{liverpool_},
|
: instance{instance_}, scheduler{scheduler_}, rasterizer{rasterizer_}, liverpool{liverpool_},
|
||||||
texture_cache{texture_cache_}, tracker{tracker_},
|
memory{Core::Memory::Instance()}, texture_cache{texture_cache_}, tracker{tracker_},
|
||||||
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
||||||
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
||||||
download_buffer(instance, scheduler, MemoryUsage::Download, DownloadBufferSize),
|
download_buffer(instance, scheduler, MemoryUsage::Download, DownloadBufferSize),
|
||||||
@ -293,7 +294,7 @@ void BufferCache::BindIndexBuffer(u32 index_offset) {
|
|||||||
|
|
||||||
void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
|
void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
|
||||||
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
|
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
|
||||||
if (!is_gds && !IsRegionRegistered(address, num_bytes)) {
|
if (!is_gds && !IsRegionGpuModified(address, num_bytes)) {
|
||||||
memcpy(std::bit_cast<void*>(address), value, num_bytes);
|
memcpy(std::bit_cast<void*>(address), value, num_bytes);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -365,7 +366,9 @@ std::pair<Buffer*, u32> BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size,
|
|||||||
return ObtainBuffer(gpu_addr, size, false, false);
|
return ObtainBuffer(gpu_addr, size, false, false);
|
||||||
}
|
}
|
||||||
// In all other cases, just do a CPU copy to the staging buffer.
|
// In all other cases, just do a CPU copy to the staging buffer.
|
||||||
const u32 offset = staging_buffer.Copy(gpu_addr, size, 16);
|
const auto [data, offset] = staging_buffer.Map(size, 16);
|
||||||
|
memory->CopySparseMemory(gpu_addr, data, size);
|
||||||
|
staging_buffer.Commit();
|
||||||
return {&staging_buffer, offset};
|
return {&staging_buffer, offset};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -798,24 +801,45 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) {
|
bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) {
|
||||||
static constexpr FindFlags find_flags =
|
boost::container::small_vector<ImageId, 6> image_ids;
|
||||||
FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize;
|
texture_cache.ForEachImageInRegion(device_addr, size, [&](ImageId image_id, Image& image) {
|
||||||
TextureCache::BaseDesc desc{};
|
if (image.info.guest_address != device_addr) {
|
||||||
desc.info.guest_address = device_addr;
|
return;
|
||||||
desc.info.guest_size = size;
|
}
|
||||||
const ImageId image_id = texture_cache.FindImage(desc, find_flags);
|
// Only perform sync if image is:
|
||||||
if (!image_id) {
|
// - GPU modified; otherwise there are no changes to synchronize.
|
||||||
|
// - Not CPU dirty; otherwise we could overwrite CPU changes with stale GPU changes.
|
||||||
|
// - Not GPU dirty; otherwise we could overwrite GPU changes with stale image data.
|
||||||
|
if (False(image.flags & ImageFlagBits::GpuModified) ||
|
||||||
|
True(image.flags & ImageFlagBits::Dirty)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
image_ids.push_back(image_id);
|
||||||
|
});
|
||||||
|
if (image_ids.empty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
ImageId image_id{};
|
||||||
|
if (image_ids.size() == 1) {
|
||||||
|
// Sometimes image size might not exactly match with requested buffer size
|
||||||
|
// If we only found 1 candidate image use it without too many questions.
|
||||||
|
image_id = image_ids[0];
|
||||||
|
} else {
|
||||||
|
for (s32 i = 0; i < image_ids.size(); ++i) {
|
||||||
|
Image& image = texture_cache.GetImage(image_ids[i]);
|
||||||
|
if (image.info.guest_size == size) {
|
||||||
|
image_id = image_ids[i];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!image_id) {
|
||||||
|
LOG_WARNING(Render_Vulkan,
|
||||||
|
"Failed to find exact image match for copy addr={:#x}, size={:#x}",
|
||||||
|
device_addr, size);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
Image& image = texture_cache.GetImage(image_id);
|
Image& image = texture_cache.GetImage(image_id);
|
||||||
// Only perform sync if image is:
|
|
||||||
// - GPU modified; otherwise there are no changes to synchronize.
|
|
||||||
// - Not CPU dirty; otherwise we could overwrite CPU changes with stale GPU changes.
|
|
||||||
// - Not GPU dirty; otherwise we could overwrite GPU changes with stale image data.
|
|
||||||
if (False(image.flags & ImageFlagBits::GpuModified) ||
|
|
||||||
True(image.flags & ImageFlagBits::Dirty)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
ASSERT_MSG(device_addr == image.info.guest_address,
|
ASSERT_MSG(device_addr == image.info.guest_address,
|
||||||
"Texel buffer aliases image subresources {:x} : {:x}", device_addr,
|
"Texel buffer aliases image subresources {:x} : {:x}", device_addr,
|
||||||
image.info.guest_address);
|
image.info.guest_address);
|
||||||
|
@ -17,6 +17,10 @@ namespace AmdGpu {
|
|||||||
struct Liverpool;
|
struct Liverpool;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace Core {
|
||||||
|
class MemoryManager;
|
||||||
|
}
|
||||||
|
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
namespace Gcn {
|
namespace Gcn {
|
||||||
struct FetchShaderData;
|
struct FetchShaderData;
|
||||||
@ -183,6 +187,7 @@ private:
|
|||||||
Vulkan::Scheduler& scheduler;
|
Vulkan::Scheduler& scheduler;
|
||||||
Vulkan::Rasterizer& rasterizer;
|
Vulkan::Rasterizer& rasterizer;
|
||||||
AmdGpu::Liverpool* liverpool;
|
AmdGpu::Liverpool* liverpool;
|
||||||
|
Core::MemoryManager* memory;
|
||||||
TextureCache& texture_cache;
|
TextureCache& texture_cache;
|
||||||
PageManager& tracker;
|
PageManager& tracker;
|
||||||
StreamBuffer staging_buffer;
|
StreamBuffer staging_buffer;
|
||||||
|
@ -16,7 +16,7 @@ layout(push_constant) uniform image_info {
|
|||||||
uint num_levels;
|
uint num_levels;
|
||||||
uint pitch;
|
uint pitch;
|
||||||
uint height;
|
uint height;
|
||||||
uint sizes[14];
|
uint sizes[16];
|
||||||
} info;
|
} info;
|
||||||
|
|
||||||
// Inverse morton LUT, small enough to fit into K$
|
// Inverse morton LUT, small enough to fit into K$
|
||||||
|
@ -18,7 +18,7 @@ layout(push_constant) uniform image_info {
|
|||||||
uint num_levels;
|
uint num_levels;
|
||||||
uint pitch;
|
uint pitch;
|
||||||
uint height;
|
uint height;
|
||||||
uint sizes[14];
|
uint sizes[16];
|
||||||
} info;
|
} info;
|
||||||
|
|
||||||
#define MICRO_TILE_DIM 8
|
#define MICRO_TILE_DIM 8
|
||||||
|
@ -16,7 +16,7 @@ layout(push_constant) uniform image_info {
|
|||||||
uint num_levels;
|
uint num_levels;
|
||||||
uint pitch;
|
uint pitch;
|
||||||
uint height;
|
uint height;
|
||||||
uint sizes[14];
|
uint sizes[16];
|
||||||
} info;
|
} info;
|
||||||
|
|
||||||
// Inverse morton LUT, small enough to fit into K$
|
// Inverse morton LUT, small enough to fit into K$
|
||||||
|
@ -16,7 +16,7 @@ layout(push_constant) uniform image_info {
|
|||||||
uint num_levels;
|
uint num_levels;
|
||||||
uint pitch;
|
uint pitch;
|
||||||
uint height;
|
uint height;
|
||||||
uint sizes[14];
|
uint sizes[16];
|
||||||
} info;
|
} info;
|
||||||
|
|
||||||
// Inverse morton LUT, small enough to fit into K$
|
// Inverse morton LUT, small enough to fit into K$
|
||||||
|
@ -19,7 +19,7 @@ layout(push_constant) uniform image_info {
|
|||||||
uint num_levels;
|
uint num_levels;
|
||||||
uint pitch;
|
uint pitch;
|
||||||
uint height;
|
uint height;
|
||||||
uint sizes[14];
|
uint sizes[16];
|
||||||
} info;
|
} info;
|
||||||
|
|
||||||
#define MICRO_TILE_DIM 8
|
#define MICRO_TILE_DIM 8
|
||||||
|
@ -214,6 +214,19 @@ vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor) {
|
||||||
|
using BlendFactor = Liverpool::BlendControl::BlendFactor;
|
||||||
|
switch (factor) {
|
||||||
|
case BlendFactor::Src1Color:
|
||||||
|
case BlendFactor::Src1Alpha:
|
||||||
|
case BlendFactor::InvSrc1Color:
|
||||||
|
case BlendFactor::InvSrc1Alpha:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) {
|
vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) {
|
||||||
using BlendFunc = Liverpool::BlendControl::BlendFunc;
|
using BlendFunc = Liverpool::BlendControl::BlendFunc;
|
||||||
switch (func) {
|
switch (func) {
|
||||||
|
@ -30,6 +30,8 @@ vk::FrontFace FrontFace(Liverpool::FrontFace mode);
|
|||||||
|
|
||||||
vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor);
|
vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor);
|
||||||
|
|
||||||
|
bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor);
|
||||||
|
|
||||||
vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func);
|
vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func);
|
||||||
|
|
||||||
vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode);
|
vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode);
|
||||||
|
@ -212,7 +212,8 @@ bool Instance::CreateDevice() {
|
|||||||
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT,
|
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT,
|
||||||
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
|
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
|
||||||
vk::PhysicalDevicePortabilitySubsetFeaturesKHR,
|
vk::PhysicalDevicePortabilitySubsetFeaturesKHR,
|
||||||
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
|
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT,
|
||||||
|
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR>();
|
||||||
features = feature_chain.get().features;
|
features = feature_chain.get().features;
|
||||||
|
|
||||||
const vk::StructureChain properties_chain = physical_device.getProperties2<
|
const vk::StructureChain properties_chain = physical_device.getProperties2<
|
||||||
@ -283,6 +284,20 @@ bool Instance::CreateDevice() {
|
|||||||
LOG_INFO(Render_Vulkan, "- shaderImageFloat32AtomicMinMax: {}",
|
LOG_INFO(Render_Vulkan, "- shaderImageFloat32AtomicMinMax: {}",
|
||||||
shader_atomic_float2_features.shaderImageFloat32AtomicMinMax);
|
shader_atomic_float2_features.shaderImageFloat32AtomicMinMax);
|
||||||
}
|
}
|
||||||
|
workgroup_memory_explicit_layout =
|
||||||
|
add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
|
||||||
|
if (workgroup_memory_explicit_layout) {
|
||||||
|
workgroup_memory_explicit_layout_features =
|
||||||
|
feature_chain.get<vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR>();
|
||||||
|
LOG_INFO(Render_Vulkan, "- workgroupMemoryExplicitLayout: {}",
|
||||||
|
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout);
|
||||||
|
LOG_INFO(Render_Vulkan, "- workgroupMemoryExplicitLayoutScalarBlockLayout: {}",
|
||||||
|
workgroup_memory_explicit_layout_features
|
||||||
|
.workgroupMemoryExplicitLayoutScalarBlockLayout);
|
||||||
|
LOG_INFO(
|
||||||
|
Render_Vulkan, "- workgroupMemoryExplicitLayout16BitAccess: {}",
|
||||||
|
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess);
|
||||||
|
}
|
||||||
const bool calibrated_timestamps =
|
const bool calibrated_timestamps =
|
||||||
TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false;
|
TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false;
|
||||||
|
|
||||||
@ -420,6 +435,15 @@ bool Instance::CreateDevice() {
|
|||||||
.shaderImageFloat32AtomicMinMax =
|
.shaderImageFloat32AtomicMinMax =
|
||||||
shader_atomic_float2_features.shaderImageFloat32AtomicMinMax,
|
shader_atomic_float2_features.shaderImageFloat32AtomicMinMax,
|
||||||
},
|
},
|
||||||
|
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR{
|
||||||
|
.workgroupMemoryExplicitLayout =
|
||||||
|
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout,
|
||||||
|
.workgroupMemoryExplicitLayoutScalarBlockLayout =
|
||||||
|
workgroup_memory_explicit_layout_features
|
||||||
|
.workgroupMemoryExplicitLayoutScalarBlockLayout,
|
||||||
|
.workgroupMemoryExplicitLayout16BitAccess =
|
||||||
|
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess,
|
||||||
|
},
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
portability_features,
|
portability_features,
|
||||||
#endif
|
#endif
|
||||||
@ -452,6 +476,9 @@ bool Instance::CreateDevice() {
|
|||||||
if (!shader_atomic_float2) {
|
if (!shader_atomic_float2) {
|
||||||
device_chain.unlink<vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
|
device_chain.unlink<vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
|
||||||
}
|
}
|
||||||
|
if (!workgroup_memory_explicit_layout) {
|
||||||
|
device_chain.unlink<vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR>();
|
||||||
|
}
|
||||||
|
|
||||||
auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get());
|
auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get());
|
||||||
if (device_result != vk::Result::eSuccess) {
|
if (device_result != vk::Result::eSuccess) {
|
||||||
|
@ -171,6 +171,12 @@ public:
|
|||||||
return shader_atomic_float2 && shader_atomic_float2_features.shaderImageFloat32AtomicMinMax;
|
return shader_atomic_float2 && shader_atomic_float2_features.shaderImageFloat32AtomicMinMax;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true when VK_KHR_workgroup_memory_explicit_layout is supported.
|
||||||
|
bool IsWorkgroupMemoryExplicitLayoutSupported() const {
|
||||||
|
return workgroup_memory_explicit_layout &&
|
||||||
|
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true when geometry shaders are supported by the device
|
/// Returns true when geometry shaders are supported by the device
|
||||||
bool IsGeometryStageSupported() const {
|
bool IsGeometryStageSupported() const {
|
||||||
return features.geometryShader;
|
return features.geometryShader;
|
||||||
@ -349,6 +355,8 @@ private:
|
|||||||
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3_features;
|
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3_features;
|
||||||
vk::PhysicalDeviceRobustness2FeaturesEXT robustness2_features;
|
vk::PhysicalDeviceRobustness2FeaturesEXT robustness2_features;
|
||||||
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT shader_atomic_float2_features;
|
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT shader_atomic_float2_features;
|
||||||
|
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR
|
||||||
|
workgroup_memory_explicit_layout_features;
|
||||||
vk::DriverIdKHR driver_id;
|
vk::DriverIdKHR driver_id;
|
||||||
vk::UniqueDebugUtilsMessengerEXT debug_callback{};
|
vk::UniqueDebugUtilsMessengerEXT debug_callback{};
|
||||||
std::string vendor_name;
|
std::string vendor_name;
|
||||||
@ -374,6 +382,7 @@ private:
|
|||||||
bool amd_gcn_shader{};
|
bool amd_gcn_shader{};
|
||||||
bool amd_shader_trinary_minmax{};
|
bool amd_shader_trinary_minmax{};
|
||||||
bool shader_atomic_float2{};
|
bool shader_atomic_float2{};
|
||||||
|
bool workgroup_memory_explicit_layout{};
|
||||||
bool portability_subset{};
|
bool portability_subset{};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -146,6 +146,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
|
|||||||
}
|
}
|
||||||
gs_info.in_vertex_data_size = regs.vgt_esgs_ring_itemsize;
|
gs_info.in_vertex_data_size = regs.vgt_esgs_ring_itemsize;
|
||||||
gs_info.out_vertex_data_size = regs.vgt_gs_vert_itemsize[0];
|
gs_info.out_vertex_data_size = regs.vgt_gs_vert_itemsize[0];
|
||||||
|
gs_info.mode = regs.vgt_gs_mode.mode;
|
||||||
const auto params_vc = Liverpool::GetParams(regs.vs_program);
|
const auto params_vc = Liverpool::GetParams(regs.vs_program);
|
||||||
gs_info.vs_copy = params_vc.code;
|
gs_info.vs_copy = params_vc.code;
|
||||||
gs_info.vs_copy_hash = params_vc.hash;
|
gs_info.vs_copy_hash = params_vc.hash;
|
||||||
@ -158,6 +159,15 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
|
|||||||
info.fs_info.addr_flags = regs.ps_input_addr;
|
info.fs_info.addr_flags = regs.ps_input_addr;
|
||||||
const auto& ps_inputs = regs.ps_inputs;
|
const auto& ps_inputs = regs.ps_inputs;
|
||||||
info.fs_info.num_inputs = regs.num_interp;
|
info.fs_info.num_inputs = regs.num_interp;
|
||||||
|
const auto& cb0_blend = regs.blend_control[0];
|
||||||
|
info.fs_info.dual_source_blending =
|
||||||
|
LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.color_dst_factor) ||
|
||||||
|
LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.color_src_factor);
|
||||||
|
if (cb0_blend.separate_alpha_blend) {
|
||||||
|
info.fs_info.dual_source_blending |=
|
||||||
|
LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.alpha_dst_factor) ||
|
||||||
|
LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.alpha_src_factor);
|
||||||
|
}
|
||||||
for (u32 i = 0; i < regs.num_interp; i++) {
|
for (u32 i = 0; i < regs.num_interp; i++) {
|
||||||
info.fs_info.inputs[i] = {
|
info.fs_info.inputs[i] = {
|
||||||
.param_index = u8(ps_inputs[i].input_offset.Value()),
|
.param_index = u8(ps_inputs[i].input_offset.Value()),
|
||||||
@ -200,7 +210,6 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
|||||||
.support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32),
|
.support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32),
|
||||||
.support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32),
|
.support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32),
|
||||||
.support_fp32_round_to_zero = bool(vk12_props.shaderRoundingModeRTZFloat32),
|
.support_fp32_round_to_zero = bool(vk12_props.shaderRoundingModeRTZFloat32),
|
||||||
.support_explicit_workgroup_layout = true,
|
|
||||||
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
|
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
|
||||||
.supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(),
|
.supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(),
|
||||||
.supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(),
|
.supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(),
|
||||||
@ -208,6 +217,8 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
|||||||
// TODO: Emitted bounds checks cause problems with phi control flow; needs to be fixed.
|
// TODO: Emitted bounds checks cause problems with phi control flow; needs to be fixed.
|
||||||
.supports_robust_buffer_access = true, // instance_.IsRobustBufferAccess2Supported(),
|
.supports_robust_buffer_access = true, // instance_.IsRobustBufferAccess2Supported(),
|
||||||
.supports_image_fp32_atomic_min_max = instance_.IsShaderAtomicFloatImage32MinMaxSupported(),
|
.supports_image_fp32_atomic_min_max = instance_.IsShaderAtomicFloatImage32MinMaxSupported(),
|
||||||
|
.supports_workgroup_explicit_memory_layout =
|
||||||
|
instance_.IsWorkgroupMemoryExplicitLayoutSupported(),
|
||||||
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
|
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
|
||||||
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
|
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
|
||||||
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||
|
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||
|
||||||
|
220
src/video_core/texture_cache/host_compatibility.cpp
Normal file
220
src/video_core/texture_cache/host_compatibility.cpp
Normal file
@ -0,0 +1,220 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
// Copyright © 2023 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||||
|
// Copyright © 2015-2023 The Khronos Group Inc.
|
||||||
|
// Copyright © 2015-2023 Valve Corporation
|
||||||
|
// Copyright © 2015-2023 LunarG, Inc.
|
||||||
|
|
||||||
|
#include <unordered_map>
|
||||||
|
#include "common/enum.h"
|
||||||
|
#include "video_core/texture_cache/host_compatibility.h"
|
||||||
|
|
||||||
|
namespace VideoCore {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief All classes of format compatibility according to the Vulkan specification
|
||||||
|
* @url
|
||||||
|
* https://github.com/KhronosGroup/Vulkan-ValidationLayers/blob/d37c676f/layers/generated/vk_format_utils.h#L47-L131
|
||||||
|
*/
|
||||||
|
enum class CompatibilityClass {
|
||||||
|
NONE = 0,
|
||||||
|
_128BIT = 1 << 0,
|
||||||
|
_16BIT = 1 << 1,
|
||||||
|
_192BIT = 1 << 2,
|
||||||
|
_24BIT = 1 << 3,
|
||||||
|
_256BIT = 1 << 4,
|
||||||
|
_32BIT = 1 << 5,
|
||||||
|
_48BIT = 1 << 6,
|
||||||
|
_64BIT = 1 << 7,
|
||||||
|
_8BIT = 1 << 8,
|
||||||
|
_96BIT = 1 << 9,
|
||||||
|
BC1_RGB = 1 << 10,
|
||||||
|
BC1_RGBA = 1 << 11,
|
||||||
|
BC2 = 1 << 12,
|
||||||
|
BC3 = 1 << 13,
|
||||||
|
BC4 = 1 << 14,
|
||||||
|
BC5 = 1 << 15,
|
||||||
|
BC6H = 1 << 16,
|
||||||
|
BC7 = 1 << 17,
|
||||||
|
D16 = 1 << 18,
|
||||||
|
D16S8 = 1 << 19,
|
||||||
|
D24 = 1 << 20,
|
||||||
|
D24S8 = 1 << 21,
|
||||||
|
D32 = 1 << 22,
|
||||||
|
D32S8 = 1 << 23,
|
||||||
|
S8 = 1 << 24,
|
||||||
|
};
|
||||||
|
DECLARE_ENUM_FLAG_OPERATORS(CompatibilityClass)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The format compatibility class according to the Vulkan specification
|
||||||
|
* @url
|
||||||
|
* https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#formats-compatibility-classes
|
||||||
|
* @url
|
||||||
|
* https://github.com/KhronosGroup/Vulkan-ValidationLayers/blob/d37c676f/layers/generated/vk_format_utils.cpp#L70-L812
|
||||||
|
*/
|
||||||
|
static const std::unordered_map<vk::Format, CompatibilityClass> FORMAT_TABLE = {
|
||||||
|
{vk::Format::eA1R5G5B5UnormPack16, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eA2B10G10R10SintPack32, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eA2B10G10R10SnormPack32, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eA2B10G10R10SscaledPack32, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eA2B10G10R10UintPack32, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eA2B10G10R10UnormPack32, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eA2B10G10R10UscaledPack32, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eA2R10G10B10SintPack32, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eA2R10G10B10SnormPack32, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eA2R10G10B10SscaledPack32, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eA2R10G10B10UintPack32, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eA2R10G10B10UnormPack32, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eA2R10G10B10UscaledPack32, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eA4B4G4R4UnormPack16, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eA4R4G4B4UnormPack16, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eA8B8G8R8SintPack32, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eA8B8G8R8SnormPack32, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eA8B8G8R8SrgbPack32, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eA8B8G8R8SscaledPack32, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eA8B8G8R8UintPack32, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eA8B8G8R8UnormPack32, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eA8B8G8R8UscaledPack32, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eB10G11R11UfloatPack32, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eB4G4R4A4UnormPack16, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eB5G5R5A1UnormPack16, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eB5G6R5UnormPack16, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eB8G8R8A8Sint, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eB8G8R8A8Snorm, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eB8G8R8A8Srgb, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eB8G8R8A8Sscaled, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eB8G8R8A8Uint, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eB8G8R8A8Unorm, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eB8G8R8A8Uscaled, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eB8G8R8Sint, CompatibilityClass::_24BIT},
|
||||||
|
{vk::Format::eB8G8R8Snorm, CompatibilityClass::_24BIT},
|
||||||
|
{vk::Format::eB8G8R8Srgb, CompatibilityClass::_24BIT},
|
||||||
|
{vk::Format::eB8G8R8Sscaled, CompatibilityClass::_24BIT},
|
||||||
|
{vk::Format::eB8G8R8Uint, CompatibilityClass::_24BIT},
|
||||||
|
{vk::Format::eB8G8R8Unorm, CompatibilityClass::_24BIT},
|
||||||
|
{vk::Format::eB8G8R8Uscaled, CompatibilityClass::_24BIT},
|
||||||
|
{vk::Format::eBc1RgbaSrgbBlock, CompatibilityClass::BC1_RGBA | CompatibilityClass::_64BIT},
|
||||||
|
{vk::Format::eBc1RgbaUnormBlock, CompatibilityClass::BC1_RGBA | CompatibilityClass::_64BIT},
|
||||||
|
{vk::Format::eBc1RgbSrgbBlock, CompatibilityClass::BC1_RGB | CompatibilityClass::_64BIT},
|
||||||
|
{vk::Format::eBc1RgbUnormBlock, CompatibilityClass::BC1_RGB | CompatibilityClass::_64BIT},
|
||||||
|
{vk::Format::eBc2SrgbBlock, CompatibilityClass::BC2 | CompatibilityClass::_128BIT},
|
||||||
|
{vk::Format::eBc2UnormBlock, CompatibilityClass::BC2 | CompatibilityClass::_128BIT},
|
||||||
|
{vk::Format::eBc3SrgbBlock, CompatibilityClass::BC3 | CompatibilityClass::_128BIT},
|
||||||
|
{vk::Format::eBc3UnormBlock, CompatibilityClass::BC3 | CompatibilityClass::_128BIT},
|
||||||
|
{vk::Format::eBc4SnormBlock, CompatibilityClass::BC4 | CompatibilityClass::_64BIT},
|
||||||
|
{vk::Format::eBc4UnormBlock, CompatibilityClass::BC4 | CompatibilityClass::_64BIT},
|
||||||
|
{vk::Format::eBc5SnormBlock, CompatibilityClass::BC5 | CompatibilityClass::_128BIT},
|
||||||
|
{vk::Format::eBc5UnormBlock, CompatibilityClass::BC5 | CompatibilityClass::_128BIT},
|
||||||
|
{vk::Format::eBc6HSfloatBlock, CompatibilityClass::BC6H | CompatibilityClass::_128BIT},
|
||||||
|
{vk::Format::eBc6HUfloatBlock, CompatibilityClass::BC6H | CompatibilityClass::_128BIT},
|
||||||
|
{vk::Format::eBc7SrgbBlock, CompatibilityClass::BC7 | CompatibilityClass::_128BIT},
|
||||||
|
{vk::Format::eBc7UnormBlock, CompatibilityClass::BC7 | CompatibilityClass::_128BIT},
|
||||||
|
{vk::Format::eD16Unorm, CompatibilityClass::D16},
|
||||||
|
{vk::Format::eD16UnormS8Uint, CompatibilityClass::D16S8},
|
||||||
|
{vk::Format::eD24UnormS8Uint, CompatibilityClass::D24S8},
|
||||||
|
{vk::Format::eD32Sfloat, CompatibilityClass::D32},
|
||||||
|
{vk::Format::eD32SfloatS8Uint, CompatibilityClass::D32S8},
|
||||||
|
{vk::Format::eE5B9G9R9UfloatPack32, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eR10X6G10X6Unorm2Pack16, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eR10X6UnormPack16, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eR12X4G12X4Unorm2Pack16, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eR12X4UnormPack16, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eR16G16B16A16Sfloat, CompatibilityClass::_64BIT},
|
||||||
|
{vk::Format::eR16G16B16A16Sint, CompatibilityClass::_64BIT},
|
||||||
|
{vk::Format::eR16G16B16A16Snorm, CompatibilityClass::_64BIT},
|
||||||
|
{vk::Format::eR16G16B16A16Sscaled, CompatibilityClass::_64BIT},
|
||||||
|
{vk::Format::eR16G16B16A16Uint, CompatibilityClass::_64BIT},
|
||||||
|
{vk::Format::eR16G16B16A16Unorm, CompatibilityClass::_64BIT},
|
||||||
|
{vk::Format::eR16G16B16A16Uscaled, CompatibilityClass::_64BIT},
|
||||||
|
{vk::Format::eR16G16B16Sfloat, CompatibilityClass::_48BIT},
|
||||||
|
{vk::Format::eR16G16B16Sint, CompatibilityClass::_48BIT},
|
||||||
|
{vk::Format::eR16G16B16Snorm, CompatibilityClass::_48BIT},
|
||||||
|
{vk::Format::eR16G16B16Sscaled, CompatibilityClass::_48BIT},
|
||||||
|
{vk::Format::eR16G16B16Uint, CompatibilityClass::_48BIT},
|
||||||
|
{vk::Format::eR16G16B16Unorm, CompatibilityClass::_48BIT},
|
||||||
|
{vk::Format::eR16G16B16Uscaled, CompatibilityClass::_48BIT},
|
||||||
|
{vk::Format::eR16G16Sfloat, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eR16G16Sint, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eR16G16Snorm, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eR16G16Sscaled, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eR16G16Uint, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eR16G16Unorm, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eR16G16Uscaled, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eR16Sfloat, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eR16Sint, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eR16Snorm, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eR16Sscaled, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eR16Uint, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eR16Unorm, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eR16Uscaled, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eR32G32B32A32Sfloat, CompatibilityClass::_128BIT},
|
||||||
|
{vk::Format::eR32G32B32A32Sint, CompatibilityClass::_128BIT},
|
||||||
|
{vk::Format::eR32G32B32A32Uint, CompatibilityClass::_128BIT},
|
||||||
|
{vk::Format::eR32G32B32Sfloat, CompatibilityClass::_96BIT},
|
||||||
|
{vk::Format::eR32G32B32Sint, CompatibilityClass::_96BIT},
|
||||||
|
{vk::Format::eR32G32B32Uint, CompatibilityClass::_96BIT},
|
||||||
|
{vk::Format::eR32G32Sfloat, CompatibilityClass::_64BIT},
|
||||||
|
{vk::Format::eR32G32Sint, CompatibilityClass::_64BIT},
|
||||||
|
{vk::Format::eR32G32Uint, CompatibilityClass::_64BIT},
|
||||||
|
{vk::Format::eR32Sfloat, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eR32Sint, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eR32Uint, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eR4G4B4A4UnormPack16, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eR4G4UnormPack8, CompatibilityClass::_8BIT},
|
||||||
|
{vk::Format::eR5G5B5A1UnormPack16, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eR5G6B5UnormPack16, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eR64G64B64A64Sfloat, CompatibilityClass::_256BIT},
|
||||||
|
{vk::Format::eR64G64B64A64Sint, CompatibilityClass::_256BIT},
|
||||||
|
{vk::Format::eR64G64B64A64Uint, CompatibilityClass::_256BIT},
|
||||||
|
{vk::Format::eR64G64B64Sfloat, CompatibilityClass::_192BIT},
|
||||||
|
{vk::Format::eR64G64B64Sint, CompatibilityClass::_192BIT},
|
||||||
|
{vk::Format::eR64G64B64Uint, CompatibilityClass::_192BIT},
|
||||||
|
{vk::Format::eR64G64Sfloat, CompatibilityClass::_128BIT},
|
||||||
|
{vk::Format::eR64G64Sint, CompatibilityClass::_128BIT},
|
||||||
|
{vk::Format::eR64G64Uint, CompatibilityClass::_128BIT},
|
||||||
|
{vk::Format::eR64Sfloat, CompatibilityClass::_64BIT},
|
||||||
|
{vk::Format::eR64Sint, CompatibilityClass::_64BIT},
|
||||||
|
{vk::Format::eR64Uint, CompatibilityClass::_64BIT},
|
||||||
|
{vk::Format::eR8G8B8A8Sint, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eR8G8B8A8Snorm, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eR8G8B8A8Srgb, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eR8G8B8A8Sscaled, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eR8G8B8A8Uint, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eR8G8B8A8Unorm, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eR8G8B8A8Uscaled, CompatibilityClass::_32BIT},
|
||||||
|
{vk::Format::eR8G8B8Sint, CompatibilityClass::_24BIT},
|
||||||
|
{vk::Format::eR8G8B8Snorm, CompatibilityClass::_24BIT},
|
||||||
|
{vk::Format::eR8G8B8Srgb, CompatibilityClass::_24BIT},
|
||||||
|
{vk::Format::eR8G8B8Sscaled, CompatibilityClass::_24BIT},
|
||||||
|
{vk::Format::eR8G8B8Uint, CompatibilityClass::_24BIT},
|
||||||
|
{vk::Format::eR8G8B8Unorm, CompatibilityClass::_24BIT},
|
||||||
|
{vk::Format::eR8G8B8Uscaled, CompatibilityClass::_24BIT},
|
||||||
|
{vk::Format::eR8G8Sint, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eR8G8Snorm, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eR8G8Srgb, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eR8G8Sscaled, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eR8G8Uint, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eR8G8Unorm, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eR8G8Uscaled, CompatibilityClass::_16BIT},
|
||||||
|
{vk::Format::eR8Sint, CompatibilityClass::_8BIT},
|
||||||
|
{vk::Format::eR8Snorm, CompatibilityClass::_8BIT},
|
||||||
|
{vk::Format::eR8Srgb, CompatibilityClass::_8BIT},
|
||||||
|
{vk::Format::eR8Sscaled, CompatibilityClass::_8BIT},
|
||||||
|
{vk::Format::eR8Uint, CompatibilityClass::_8BIT},
|
||||||
|
{vk::Format::eR8Unorm, CompatibilityClass::_8BIT},
|
||||||
|
{vk::Format::eR8Uscaled, CompatibilityClass::_8BIT},
|
||||||
|
{vk::Format::eS8Uint, CompatibilityClass::S8},
|
||||||
|
{vk::Format::eX8D24UnormPack32, CompatibilityClass::D24},
|
||||||
|
{vk::Format::eUndefined, CompatibilityClass::NONE},
|
||||||
|
};
|
||||||
|
|
||||||
|
bool IsVulkanFormatCompatible(vk::Format base, vk::Format view) {
|
||||||
|
if (base == view) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
const auto base_comp = FORMAT_TABLE.at(base);
|
||||||
|
const auto view_comp = FORMAT_TABLE.at(view);
|
||||||
|
return (base_comp & view_comp) == view_comp;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace VideoCore
|
@ -6,387 +6,11 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <unordered_map>
|
|
||||||
#include "video_core/renderer_vulkan/vk_common.h"
|
#include "video_core/renderer_vulkan/vk_common.h"
|
||||||
|
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
/**
|
|
||||||
* @brief All classes of format compatibility according to the Vulkan specification
|
|
||||||
* @url
|
|
||||||
* https://github.com/KhronosGroup/Vulkan-ValidationLayers/blob/d37c676f75f545a3e5a98d7dfb89864391a1db1e/layers/generated/vk_format_utils.h#L47-L131
|
|
||||||
* @note This is copied directly from Vulkan Validation Layers and doesn't follow the Skyline naming
|
|
||||||
* conventions
|
|
||||||
*/
|
|
||||||
enum class FORMAT_COMPATIBILITY_CLASS {
|
|
||||||
NONE = 0,
|
|
||||||
_10BIT_2PLANE_420,
|
|
||||||
_10BIT_2PLANE_422,
|
|
||||||
_10BIT_2PLANE_444,
|
|
||||||
_10BIT_3PLANE_420,
|
|
||||||
_10BIT_3PLANE_422,
|
|
||||||
_10BIT_3PLANE_444,
|
|
||||||
_12BIT_2PLANE_420,
|
|
||||||
_12BIT_2PLANE_422,
|
|
||||||
_12BIT_2PLANE_444,
|
|
||||||
_12BIT_3PLANE_420,
|
|
||||||
_12BIT_3PLANE_422,
|
|
||||||
_12BIT_3PLANE_444,
|
|
||||||
_128BIT,
|
|
||||||
_16BIT,
|
|
||||||
_16BIT_2PLANE_420,
|
|
||||||
_16BIT_2PLANE_422,
|
|
||||||
_16BIT_2PLANE_444,
|
|
||||||
_16BIT_3PLANE_420,
|
|
||||||
_16BIT_3PLANE_422,
|
|
||||||
_16BIT_3PLANE_444,
|
|
||||||
_192BIT,
|
|
||||||
_24BIT,
|
|
||||||
_256BIT,
|
|
||||||
_32BIT,
|
|
||||||
_32BIT_B8G8R8G8,
|
|
||||||
_32BIT_G8B8G8R8,
|
|
||||||
_48BIT,
|
|
||||||
_64BIT,
|
|
||||||
_64BIT_B10G10R10G10,
|
|
||||||
_64BIT_B12G12R12G12,
|
|
||||||
_64BIT_B16G16R16G16,
|
|
||||||
_64BIT_G10B10G10R10,
|
|
||||||
_64BIT_G12B12G12R12,
|
|
||||||
_64BIT_G16B16G16R16,
|
|
||||||
_64BIT_R10G10B10A10,
|
|
||||||
_64BIT_R12G12B12A12,
|
|
||||||
_8BIT,
|
|
||||||
_8BIT_2PLANE_420,
|
|
||||||
_8BIT_2PLANE_422,
|
|
||||||
_8BIT_2PLANE_444,
|
|
||||||
_8BIT_3PLANE_420,
|
|
||||||
_8BIT_3PLANE_422,
|
|
||||||
_8BIT_3PLANE_444,
|
|
||||||
_96BIT,
|
|
||||||
ASTC_10X10,
|
|
||||||
ASTC_10X5,
|
|
||||||
ASTC_10X6,
|
|
||||||
ASTC_10X8,
|
|
||||||
ASTC_12X10,
|
|
||||||
ASTC_12X12,
|
|
||||||
ASTC_4X4,
|
|
||||||
ASTC_5X4,
|
|
||||||
ASTC_5X5,
|
|
||||||
ASTC_6X5,
|
|
||||||
ASTC_6X6,
|
|
||||||
ASTC_8X5,
|
|
||||||
ASTC_8X6,
|
|
||||||
ASTC_8X8,
|
|
||||||
BC1_RGB,
|
|
||||||
BC1_RGBA,
|
|
||||||
BC2,
|
|
||||||
BC3,
|
|
||||||
BC4,
|
|
||||||
BC5,
|
|
||||||
BC6H,
|
|
||||||
BC7,
|
|
||||||
D16,
|
|
||||||
D16S8,
|
|
||||||
D24,
|
|
||||||
D24S8,
|
|
||||||
D32,
|
|
||||||
D32S8,
|
|
||||||
EAC_R,
|
|
||||||
EAC_RG,
|
|
||||||
ETC2_EAC_RGBA,
|
|
||||||
ETC2_RGB,
|
|
||||||
ETC2_RGBA,
|
|
||||||
PVRTC1_2BPP,
|
|
||||||
PVRTC1_4BPP,
|
|
||||||
PVRTC2_2BPP,
|
|
||||||
PVRTC2_4BPP,
|
|
||||||
S8
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
/// Returns true if the two formats are compatible according to Vulkan's format compatibility rules
|
||||||
* @brief The format compatibility class according to the Vulkan specification
|
bool IsVulkanFormatCompatible(vk::Format base, vk::Format view);
|
||||||
* @url
|
|
||||||
* https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#formats-compatibility-classes
|
|
||||||
* @url
|
|
||||||
* https://github.com/KhronosGroup/Vulkan-ValidationLayers/blob/d37c676f75f545a3e5a98d7dfb89864391a1db1e/layers/generated/vk_format_utils.cpp#L70-L812
|
|
||||||
* @note This is copied directly from Vulkan Validation Layers and doesn't follow the Skyline naming
|
|
||||||
* conventions
|
|
||||||
*/
|
|
||||||
static const std::unordered_map<VkFormat, FORMAT_COMPATIBILITY_CLASS> vkFormatClassTable{
|
|
||||||
{VK_FORMAT_A1R5G5B5_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_A2B10G10R10_SINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_A2B10G10R10_SNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_A2B10G10R10_SSCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_A2B10G10R10_UINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_A2B10G10R10_UNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_A2B10G10R10_USCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_A2R10G10B10_SINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_A2R10G10B10_SNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_A2R10G10B10_SSCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_A2R10G10B10_UINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_A2R10G10B10_UNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_A2R10G10B10_USCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_A8B8G8R8_SINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_A8B8G8R8_SNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_A8B8G8R8_SRGB_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_A8B8G8R8_SSCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_A8B8G8R8_UINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_A8B8G8R8_UNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_A8B8G8R8_USCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_ASTC_10x10_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_10X10},
|
|
||||||
{VK_FORMAT_ASTC_10x10_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X10},
|
|
||||||
{VK_FORMAT_ASTC_10x10_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X10},
|
|
||||||
{VK_FORMAT_ASTC_10x5_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_10X5},
|
|
||||||
{VK_FORMAT_ASTC_10x5_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X5},
|
|
||||||
{VK_FORMAT_ASTC_10x5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X5},
|
|
||||||
{VK_FORMAT_ASTC_10x6_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_10X6},
|
|
||||||
{VK_FORMAT_ASTC_10x6_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X6},
|
|
||||||
{VK_FORMAT_ASTC_10x6_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X6},
|
|
||||||
{VK_FORMAT_ASTC_10x8_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_10X8},
|
|
||||||
{VK_FORMAT_ASTC_10x8_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X8},
|
|
||||||
{VK_FORMAT_ASTC_10x8_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X8},
|
|
||||||
{VK_FORMAT_ASTC_12x10_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_12X10},
|
|
||||||
{VK_FORMAT_ASTC_12x10_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_12X10},
|
|
||||||
{VK_FORMAT_ASTC_12x10_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_12X10},
|
|
||||||
{VK_FORMAT_ASTC_12x12_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_12X12},
|
|
||||||
{VK_FORMAT_ASTC_12x12_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_12X12},
|
|
||||||
{VK_FORMAT_ASTC_12x12_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_12X12},
|
|
||||||
{VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_4X4},
|
|
||||||
{VK_FORMAT_ASTC_4x4_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_4X4},
|
|
||||||
{VK_FORMAT_ASTC_4x4_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_4X4},
|
|
||||||
{VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_5X4},
|
|
||||||
{VK_FORMAT_ASTC_5x4_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_5X4},
|
|
||||||
{VK_FORMAT_ASTC_5x4_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_5X4},
|
|
||||||
{VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_5X5},
|
|
||||||
{VK_FORMAT_ASTC_5x5_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_5X5},
|
|
||||||
{VK_FORMAT_ASTC_5x5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_5X5},
|
|
||||||
{VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_6X5},
|
|
||||||
{VK_FORMAT_ASTC_6x5_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_6X5},
|
|
||||||
{VK_FORMAT_ASTC_6x5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_6X5},
|
|
||||||
{VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_6X6},
|
|
||||||
{VK_FORMAT_ASTC_6x6_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_6X6},
|
|
||||||
{VK_FORMAT_ASTC_6x6_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_6X6},
|
|
||||||
{VK_FORMAT_ASTC_8x5_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_8X5},
|
|
||||||
{VK_FORMAT_ASTC_8x5_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X5},
|
|
||||||
{VK_FORMAT_ASTC_8x5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X5},
|
|
||||||
{VK_FORMAT_ASTC_8x6_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_8X6},
|
|
||||||
{VK_FORMAT_ASTC_8x6_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X6},
|
|
||||||
{VK_FORMAT_ASTC_8x6_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X6},
|
|
||||||
{VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_8X8},
|
|
||||||
{VK_FORMAT_ASTC_8x8_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X8},
|
|
||||||
{VK_FORMAT_ASTC_8x8_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X8},
|
|
||||||
{VK_FORMAT_B10G11R11_UFLOAT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16,
|
|
||||||
FORMAT_COMPATIBILITY_CLASS::_64BIT_B10G10R10G10},
|
|
||||||
{VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16,
|
|
||||||
FORMAT_COMPATIBILITY_CLASS::_64BIT_B12G12R12G12},
|
|
||||||
{VK_FORMAT_B16G16R16G16_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_64BIT_B16G16R16G16},
|
|
||||||
{VK_FORMAT_B4G4R4A4_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_B5G5R5A1_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_B5G6R5_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_B8G8R8A8_SINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_B8G8R8A8_SNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_B8G8R8A8_SRGB, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_B8G8R8A8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_B8G8R8A8_UINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_B8G8R8A8_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_B8G8R8A8_USCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_B8G8R8G8_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT_B8G8R8G8},
|
|
||||||
{VK_FORMAT_B8G8R8_SINT, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
|
||||||
{VK_FORMAT_B8G8R8_SNORM, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
|
||||||
{VK_FORMAT_B8G8R8_SRGB, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
|
||||||
{VK_FORMAT_B8G8R8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
|
||||||
{VK_FORMAT_B8G8R8_UINT, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
|
||||||
{VK_FORMAT_B8G8R8_UNORM, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
|
||||||
{VK_FORMAT_B8G8R8_USCALED, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
|
||||||
{VK_FORMAT_BC1_RGBA_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC1_RGBA},
|
|
||||||
{VK_FORMAT_BC1_RGBA_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC1_RGBA},
|
|
||||||
{VK_FORMAT_BC1_RGB_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC1_RGB},
|
|
||||||
{VK_FORMAT_BC1_RGB_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC1_RGB},
|
|
||||||
{VK_FORMAT_BC2_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC2},
|
|
||||||
{VK_FORMAT_BC2_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC2},
|
|
||||||
{VK_FORMAT_BC3_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC3},
|
|
||||||
{VK_FORMAT_BC3_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC3},
|
|
||||||
{VK_FORMAT_BC4_SNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC4},
|
|
||||||
{VK_FORMAT_BC4_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC4},
|
|
||||||
{VK_FORMAT_BC5_SNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC5},
|
|
||||||
{VK_FORMAT_BC5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC5},
|
|
||||||
{VK_FORMAT_BC6H_SFLOAT_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC6H},
|
|
||||||
{VK_FORMAT_BC6H_UFLOAT_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC6H},
|
|
||||||
{VK_FORMAT_BC7_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC7},
|
|
||||||
{VK_FORMAT_BC7_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC7},
|
|
||||||
{VK_FORMAT_D16_UNORM, FORMAT_COMPATIBILITY_CLASS::D16},
|
|
||||||
{VK_FORMAT_D16_UNORM_S8_UINT, FORMAT_COMPATIBILITY_CLASS::D16S8},
|
|
||||||
{VK_FORMAT_D24_UNORM_S8_UINT, FORMAT_COMPATIBILITY_CLASS::D24S8},
|
|
||||||
{VK_FORMAT_D32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::D32},
|
|
||||||
{VK_FORMAT_D32_SFLOAT_S8_UINT, FORMAT_COMPATIBILITY_CLASS::D32S8},
|
|
||||||
{VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_EAC_R11G11_SNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::EAC_RG},
|
|
||||||
{VK_FORMAT_EAC_R11G11_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::EAC_RG},
|
|
||||||
{VK_FORMAT_EAC_R11_SNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::EAC_R},
|
|
||||||
{VK_FORMAT_EAC_R11_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::EAC_R},
|
|
||||||
{VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_RGBA},
|
|
||||||
{VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_RGBA},
|
|
||||||
{VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_EAC_RGBA},
|
|
||||||
{VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_EAC_RGBA},
|
|
||||||
{VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_RGB},
|
|
||||||
{VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_RGB},
|
|
||||||
{VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16,
|
|
||||||
FORMAT_COMPATIBILITY_CLASS::_64BIT_G10B10G10R10},
|
|
||||||
{VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
|
|
||||||
FORMAT_COMPATIBILITY_CLASS::_10BIT_2PLANE_420},
|
|
||||||
{VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16,
|
|
||||||
FORMAT_COMPATIBILITY_CLASS::_10BIT_2PLANE_422},
|
|
||||||
{VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT,
|
|
||||||
FORMAT_COMPATIBILITY_CLASS::_10BIT_2PLANE_444},
|
|
||||||
{VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16,
|
|
||||||
FORMAT_COMPATIBILITY_CLASS::_10BIT_3PLANE_420},
|
|
||||||
{VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16,
|
|
||||||
FORMAT_COMPATIBILITY_CLASS::_10BIT_3PLANE_422},
|
|
||||||
{VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16,
|
|
||||||
FORMAT_COMPATIBILITY_CLASS::_10BIT_3PLANE_444},
|
|
||||||
{VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16,
|
|
||||||
FORMAT_COMPATIBILITY_CLASS::_64BIT_G12B12G12R12},
|
|
||||||
{VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16,
|
|
||||||
FORMAT_COMPATIBILITY_CLASS::_12BIT_2PLANE_420},
|
|
||||||
{VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16,
|
|
||||||
FORMAT_COMPATIBILITY_CLASS::_12BIT_2PLANE_422},
|
|
||||||
{VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT,
|
|
||||||
FORMAT_COMPATIBILITY_CLASS::_12BIT_2PLANE_444},
|
|
||||||
{VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16,
|
|
||||||
FORMAT_COMPATIBILITY_CLASS::_12BIT_3PLANE_420},
|
|
||||||
{VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16,
|
|
||||||
FORMAT_COMPATIBILITY_CLASS::_12BIT_3PLANE_422},
|
|
||||||
{VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16,
|
|
||||||
FORMAT_COMPATIBILITY_CLASS::_12BIT_3PLANE_444},
|
|
||||||
{VK_FORMAT_G16B16G16R16_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_64BIT_G16B16G16R16},
|
|
||||||
{VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_2PLANE_420},
|
|
||||||
{VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_2PLANE_422},
|
|
||||||
{VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT, FORMAT_COMPATIBILITY_CLASS::_16BIT_2PLANE_444},
|
|
||||||
{VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_3PLANE_420},
|
|
||||||
{VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_3PLANE_422},
|
|
||||||
{VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_3PLANE_444},
|
|
||||||
{VK_FORMAT_G8B8G8R8_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT_G8B8G8R8},
|
|
||||||
{VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_2PLANE_420},
|
|
||||||
{VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_2PLANE_422},
|
|
||||||
{VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT, FORMAT_COMPATIBILITY_CLASS::_8BIT_2PLANE_444},
|
|
||||||
{VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_3PLANE_420},
|
|
||||||
{VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_3PLANE_422},
|
|
||||||
{VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_3PLANE_444},
|
|
||||||
{VK_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC1_2BPP},
|
|
||||||
{VK_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC1_2BPP},
|
|
||||||
{VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC1_4BPP},
|
|
||||||
{VK_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC1_4BPP},
|
|
||||||
{VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC2_2BPP},
|
|
||||||
{VK_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC2_2BPP},
|
|
||||||
{VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC2_4BPP},
|
|
||||||
{VK_FORMAT_PVRTC2_4BPP_UNORM_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC2_4BPP},
|
|
||||||
{VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16, FORMAT_COMPATIBILITY_CLASS::_64BIT_R10G10B10A10},
|
|
||||||
{VK_FORMAT_R10X6G10X6_UNORM_2PACK16, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_R10X6_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16, FORMAT_COMPATIBILITY_CLASS::_64BIT_R12G12B12A12},
|
|
||||||
{VK_FORMAT_R12X4G12X4_UNORM_2PACK16, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_R12X4_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_R16G16B16A16_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
|
||||||
{VK_FORMAT_R16G16B16A16_SINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
|
||||||
{VK_FORMAT_R16G16B16A16_SNORM, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
|
||||||
{VK_FORMAT_R16G16B16A16_SSCALED, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
|
||||||
{VK_FORMAT_R16G16B16A16_UINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
|
||||||
{VK_FORMAT_R16G16B16A16_UNORM, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
|
||||||
{VK_FORMAT_R16G16B16A16_USCALED, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
|
||||||
{VK_FORMAT_R16G16B16_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_48BIT},
|
|
||||||
{VK_FORMAT_R16G16B16_SINT, FORMAT_COMPATIBILITY_CLASS::_48BIT},
|
|
||||||
{VK_FORMAT_R16G16B16_SNORM, FORMAT_COMPATIBILITY_CLASS::_48BIT},
|
|
||||||
{VK_FORMAT_R16G16B16_SSCALED, FORMAT_COMPATIBILITY_CLASS::_48BIT},
|
|
||||||
{VK_FORMAT_R16G16B16_UINT, FORMAT_COMPATIBILITY_CLASS::_48BIT},
|
|
||||||
{VK_FORMAT_R16G16B16_UNORM, FORMAT_COMPATIBILITY_CLASS::_48BIT},
|
|
||||||
{VK_FORMAT_R16G16B16_USCALED, FORMAT_COMPATIBILITY_CLASS::_48BIT},
|
|
||||||
{VK_FORMAT_R16G16_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_R16G16_SINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_R16G16_SNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_R16G16_SSCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_R16G16_UINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_R16G16_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_R16G16_USCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_R16_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_R16_SINT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_R16_SNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_R16_SSCALED, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_R16_UINT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_R16_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_R16_USCALED, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_R32G32B32A32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
|
|
||||||
{VK_FORMAT_R32G32B32A32_SINT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
|
|
||||||
{VK_FORMAT_R32G32B32A32_UINT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
|
|
||||||
{VK_FORMAT_R32G32B32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_96BIT},
|
|
||||||
{VK_FORMAT_R32G32B32_SINT, FORMAT_COMPATIBILITY_CLASS::_96BIT},
|
|
||||||
{VK_FORMAT_R32G32B32_UINT, FORMAT_COMPATIBILITY_CLASS::_96BIT},
|
|
||||||
{VK_FORMAT_R32G32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
|
||||||
{VK_FORMAT_R32G32_SINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
|
||||||
{VK_FORMAT_R32G32_UINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
|
||||||
{VK_FORMAT_R32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_R32_SINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_R32_UINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_R4G4B4A4_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_R4G4_UNORM_PACK8, FORMAT_COMPATIBILITY_CLASS::_8BIT},
|
|
||||||
{VK_FORMAT_R5G5B5A1_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_R5G6B5_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_R64G64B64A64_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_256BIT},
|
|
||||||
{VK_FORMAT_R64G64B64A64_SINT, FORMAT_COMPATIBILITY_CLASS::_256BIT},
|
|
||||||
{VK_FORMAT_R64G64B64A64_UINT, FORMAT_COMPATIBILITY_CLASS::_256BIT},
|
|
||||||
{VK_FORMAT_R64G64B64_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_192BIT},
|
|
||||||
{VK_FORMAT_R64G64B64_SINT, FORMAT_COMPATIBILITY_CLASS::_192BIT},
|
|
||||||
{VK_FORMAT_R64G64B64_UINT, FORMAT_COMPATIBILITY_CLASS::_192BIT},
|
|
||||||
{VK_FORMAT_R64G64_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
|
|
||||||
{VK_FORMAT_R64G64_SINT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
|
|
||||||
{VK_FORMAT_R64G64_UINT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
|
|
||||||
{VK_FORMAT_R64_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
|
||||||
{VK_FORMAT_R64_SINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
|
||||||
{VK_FORMAT_R64_UINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
|
|
||||||
{VK_FORMAT_R8G8B8A8_SINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_R8G8B8A8_SNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_R8G8B8A8_SRGB, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_R8G8B8A8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_R8G8B8A8_UINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_R8G8B8A8_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_R8G8B8A8_USCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
|
|
||||||
{VK_FORMAT_R8G8B8_SINT, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
|
||||||
{VK_FORMAT_R8G8B8_SNORM, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
|
||||||
{VK_FORMAT_R8G8B8_SRGB, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
|
||||||
{VK_FORMAT_R8G8B8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
|
||||||
{VK_FORMAT_R8G8B8_UINT, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
|
||||||
{VK_FORMAT_R8G8B8_UNORM, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
|
||||||
{VK_FORMAT_R8G8B8_USCALED, FORMAT_COMPATIBILITY_CLASS::_24BIT},
|
|
||||||
{VK_FORMAT_R8G8_SINT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_R8G8_SNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_R8G8_SRGB, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_R8G8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_R8G8_UINT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_R8G8_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_R8G8_USCALED, FORMAT_COMPATIBILITY_CLASS::_16BIT},
|
|
||||||
{VK_FORMAT_R8_SINT, FORMAT_COMPATIBILITY_CLASS::_8BIT},
|
|
||||||
{VK_FORMAT_R8_SNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT},
|
|
||||||
{VK_FORMAT_R8_SRGB, FORMAT_COMPATIBILITY_CLASS::_8BIT},
|
|
||||||
{VK_FORMAT_R8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_8BIT},
|
|
||||||
{VK_FORMAT_R8_UINT, FORMAT_COMPATIBILITY_CLASS::_8BIT},
|
|
||||||
{VK_FORMAT_R8_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT},
|
|
||||||
{VK_FORMAT_R8_USCALED, FORMAT_COMPATIBILITY_CLASS::_8BIT},
|
|
||||||
{VK_FORMAT_S8_UINT, FORMAT_COMPATIBILITY_CLASS::S8},
|
|
||||||
{VK_FORMAT_X8_D24_UNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::D24},
|
|
||||||
{VK_FORMAT_UNDEFINED, FORMAT_COMPATIBILITY_CLASS::NONE},
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return If the two formats are compatible according to Vulkan's format compatibility rules
|
|
||||||
* @url
|
|
||||||
* https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#formats-compatibility
|
|
||||||
*/
|
|
||||||
static bool IsVulkanFormatCompatible(vk::Format lhs, vk::Format rhs) {
|
|
||||||
if (lhs == rhs) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return vkFormatClassTable.at(VkFormat(lhs)) == vkFormatClassTable.at(VkFormat(rhs));
|
|
||||||
}
|
|
||||||
} // namespace VideoCore
|
} // namespace VideoCore
|
||||||
|
@ -14,62 +14,6 @@ namespace VideoCore {
|
|||||||
|
|
||||||
using namespace Vulkan;
|
using namespace Vulkan;
|
||||||
|
|
||||||
bool ImageInfo::IsBlockCoded() const {
|
|
||||||
switch (pixel_format) {
|
|
||||||
case vk::Format::eBc1RgbaSrgbBlock:
|
|
||||||
case vk::Format::eBc1RgbaUnormBlock:
|
|
||||||
case vk::Format::eBc1RgbSrgbBlock:
|
|
||||||
case vk::Format::eBc1RgbUnormBlock:
|
|
||||||
case vk::Format::eBc2SrgbBlock:
|
|
||||||
case vk::Format::eBc2UnormBlock:
|
|
||||||
case vk::Format::eBc3SrgbBlock:
|
|
||||||
case vk::Format::eBc3UnormBlock:
|
|
||||||
case vk::Format::eBc4SnormBlock:
|
|
||||||
case vk::Format::eBc4UnormBlock:
|
|
||||||
case vk::Format::eBc5SnormBlock:
|
|
||||||
case vk::Format::eBc5UnormBlock:
|
|
||||||
case vk::Format::eBc6HSfloatBlock:
|
|
||||||
case vk::Format::eBc6HUfloatBlock:
|
|
||||||
case vk::Format::eBc7SrgbBlock:
|
|
||||||
case vk::Format::eBc7UnormBlock:
|
|
||||||
return true;
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool ImageInfo::IsPacked() const {
|
|
||||||
switch (pixel_format) {
|
|
||||||
case vk::Format::eB5G5R5A1UnormPack16:
|
|
||||||
[[fallthrough]];
|
|
||||||
case vk::Format::eB5G6R5UnormPack16:
|
|
||||||
return true;
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool ImageInfo::IsDepthStencil() const {
|
|
||||||
switch (pixel_format) {
|
|
||||||
case vk::Format::eD16Unorm:
|
|
||||||
case vk::Format::eD16UnormS8Uint:
|
|
||||||
case vk::Format::eD32Sfloat:
|
|
||||||
case vk::Format::eD32SfloatS8Uint:
|
|
||||||
return true;
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool ImageInfo::HasStencil() const {
|
|
||||||
if (pixel_format == vk::Format::eD32SfloatS8Uint ||
|
|
||||||
pixel_format == vk::Format::eD24UnormS8Uint ||
|
|
||||||
pixel_format == vk::Format::eD16UnormS8Uint) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) {
|
static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) {
|
||||||
vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferSrc |
|
vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferSrc |
|
||||||
vk::ImageUsageFlagBits::eTransferDst |
|
vk::ImageUsageFlagBits::eTransferDst |
|
||||||
@ -161,6 +105,10 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
|||||||
if (info.props.is_volume) {
|
if (info.props.is_volume) {
|
||||||
flags |= vk::ImageCreateFlagBits::e2DArrayCompatible;
|
flags |= vk::ImageCreateFlagBits::e2DArrayCompatible;
|
||||||
}
|
}
|
||||||
|
// Not supported by MoltenVK.
|
||||||
|
if (info.props.is_block && instance->GetDriverID() != vk::DriverId::eMoltenvk) {
|
||||||
|
flags |= vk::ImageCreateFlagBits::eBlockTexelViewCompatible;
|
||||||
|
}
|
||||||
|
|
||||||
usage_flags = ImageUsageFlags(info);
|
usage_flags = ImageUsageFlags(info);
|
||||||
format_features = FormatFeatureFlags(usage_flags);
|
format_features = FormatFeatureFlags(usage_flags);
|
||||||
@ -372,9 +320,9 @@ void Image::CopyImage(const Image& image) {
|
|||||||
|
|
||||||
boost::container::small_vector<vk::ImageCopy, 14> image_copy{};
|
boost::container::small_vector<vk::ImageCopy, 14> image_copy{};
|
||||||
for (u32 m = 0; m < image.info.resources.levels; ++m) {
|
for (u32 m = 0; m < image.info.resources.levels; ++m) {
|
||||||
const auto mip_w = std::max(info.size.width >> m, 1u);
|
const auto mip_w = std::max(image.info.size.width >> m, 1u);
|
||||||
const auto mip_h = std::max(info.size.height >> m, 1u);
|
const auto mip_h = std::max(image.info.size.height >> m, 1u);
|
||||||
const auto mip_d = std::max(info.size.depth >> m, 1u);
|
const auto mip_d = std::max(image.info.size.depth >> m, 1u);
|
||||||
|
|
||||||
image_copy.emplace_back(vk::ImageCopy{
|
image_copy.emplace_back(vk::ImageCopy{
|
||||||
.srcSubresource{
|
.srcSubresource{
|
||||||
|
@ -81,7 +81,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
|||||||
tiling_mode = buffer.GetTilingMode();
|
tiling_mode = buffer.GetTilingMode();
|
||||||
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt());
|
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt());
|
||||||
num_samples = buffer.NumSamples();
|
num_samples = buffer.NumSamples();
|
||||||
num_bits = NumBits(buffer.GetDataFmt());
|
num_bits = NumBitsPerBlock(buffer.GetDataFmt());
|
||||||
type = vk::ImageType::e2D;
|
type = vk::ImageType::e2D;
|
||||||
size.width = hint.Valid() ? hint.width : buffer.Pitch();
|
size.width = hint.Valid() ? hint.width : buffer.Pitch();
|
||||||
size.height = hint.Valid() ? hint.height : buffer.Height();
|
size.height = hint.Valid() ? hint.height : buffer.Height();
|
||||||
@ -142,7 +142,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& de
|
|||||||
resources.levels = image.NumLevels();
|
resources.levels = image.NumLevels();
|
||||||
resources.layers = image.NumLayers();
|
resources.layers = image.NumLayers();
|
||||||
num_samples = image.NumSamples();
|
num_samples = image.NumSamples();
|
||||||
num_bits = NumBits(image.GetDataFmt());
|
num_bits = NumBitsPerBlock(image.GetDataFmt());
|
||||||
|
|
||||||
guest_address = image.Address();
|
guest_address = image.Address();
|
||||||
|
|
||||||
@ -152,6 +152,80 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& de
|
|||||||
UpdateSize();
|
UpdateSize();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ImageInfo::IsBlockCoded() const {
|
||||||
|
switch (pixel_format) {
|
||||||
|
case vk::Format::eBc1RgbaSrgbBlock:
|
||||||
|
case vk::Format::eBc1RgbaUnormBlock:
|
||||||
|
case vk::Format::eBc1RgbSrgbBlock:
|
||||||
|
case vk::Format::eBc1RgbUnormBlock:
|
||||||
|
case vk::Format::eBc2SrgbBlock:
|
||||||
|
case vk::Format::eBc2UnormBlock:
|
||||||
|
case vk::Format::eBc3SrgbBlock:
|
||||||
|
case vk::Format::eBc3UnormBlock:
|
||||||
|
case vk::Format::eBc4SnormBlock:
|
||||||
|
case vk::Format::eBc4UnormBlock:
|
||||||
|
case vk::Format::eBc5SnormBlock:
|
||||||
|
case vk::Format::eBc5UnormBlock:
|
||||||
|
case vk::Format::eBc6HSfloatBlock:
|
||||||
|
case vk::Format::eBc6HUfloatBlock:
|
||||||
|
case vk::Format::eBc7SrgbBlock:
|
||||||
|
case vk::Format::eBc7UnormBlock:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ImageInfo::IsPacked() const {
|
||||||
|
switch (pixel_format) {
|
||||||
|
case vk::Format::eB5G5R5A1UnormPack16:
|
||||||
|
[[fallthrough]];
|
||||||
|
case vk::Format::eB5G6R5UnormPack16:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ImageInfo::IsDepthStencil() const {
|
||||||
|
switch (pixel_format) {
|
||||||
|
case vk::Format::eD16Unorm:
|
||||||
|
case vk::Format::eD16UnormS8Uint:
|
||||||
|
case vk::Format::eD32Sfloat:
|
||||||
|
case vk::Format::eD32SfloatS8Uint:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ImageInfo::HasStencil() const {
|
||||||
|
if (pixel_format == vk::Format::eD32SfloatS8Uint ||
|
||||||
|
pixel_format == vk::Format::eD24UnormS8Uint ||
|
||||||
|
pixel_format == vk::Format::eD16UnormS8Uint) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ImageInfo::IsCompatible(const ImageInfo& info) const {
|
||||||
|
return (pixel_format == info.pixel_format && num_samples == info.num_samples &&
|
||||||
|
num_bits == info.num_bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ImageInfo::IsTilingCompatible(u32 lhs, u32 rhs) const {
|
||||||
|
if (lhs == rhs) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (lhs == 0x0e && rhs == 0x0d) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (lhs == 0x0d && rhs == 0x0e) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
void ImageInfo::UpdateSize() {
|
void ImageInfo::UpdateSize() {
|
||||||
mips_layout.clear();
|
mips_layout.clear();
|
||||||
MipInfo mip_info{};
|
MipInfo mip_info{};
|
||||||
@ -163,7 +237,6 @@ void ImageInfo::UpdateSize() {
|
|||||||
if (props.is_block) {
|
if (props.is_block) {
|
||||||
mip_w = (mip_w + 3) / 4;
|
mip_w = (mip_w + 3) / 4;
|
||||||
mip_h = (mip_h + 3) / 4;
|
mip_h = (mip_h + 3) / 4;
|
||||||
bpp *= 16;
|
|
||||||
}
|
}
|
||||||
mip_w = std::max(mip_w, 1u);
|
mip_w = std::max(mip_w, 1u);
|
||||||
mip_h = std::max(mip_h, 1u);
|
mip_h = std::max(mip_h, 1u);
|
||||||
|
@ -25,6 +25,11 @@ struct ImageInfo {
|
|||||||
bool IsTiled() const {
|
bool IsTiled() const {
|
||||||
return tiling_mode != AmdGpu::TilingMode::Display_Linear;
|
return tiling_mode != AmdGpu::TilingMode::Display_Linear;
|
||||||
}
|
}
|
||||||
|
Extent3D BlockDim() const {
|
||||||
|
const u32 shift = props.is_block ? 2 : 0;
|
||||||
|
return Extent3D{size.width >> shift, size.height >> shift, size.depth};
|
||||||
|
}
|
||||||
|
|
||||||
bool IsBlockCoded() const;
|
bool IsBlockCoded() const;
|
||||||
bool IsPacked() const;
|
bool IsPacked() const;
|
||||||
bool IsDepthStencil() const;
|
bool IsDepthStencil() const;
|
||||||
@ -33,24 +38,8 @@ struct ImageInfo {
|
|||||||
s32 MipOf(const ImageInfo& info) const;
|
s32 MipOf(const ImageInfo& info) const;
|
||||||
s32 SliceOf(const ImageInfo& info, s32 mip) const;
|
s32 SliceOf(const ImageInfo& info, s32 mip) const;
|
||||||
|
|
||||||
/// Verifies if images are compatible for subresource merging.
|
bool IsCompatible(const ImageInfo& info) const;
|
||||||
bool IsCompatible(const ImageInfo& info) const {
|
bool IsTilingCompatible(u32 lhs, u32 rhs) const;
|
||||||
return (pixel_format == info.pixel_format && num_samples == info.num_samples &&
|
|
||||||
num_bits == info.num_bits);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool IsTilingCompatible(u32 lhs, u32 rhs) const {
|
|
||||||
if (lhs == rhs) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (lhs == 0x0e && rhs == 0x0d) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (lhs == 0x0d && rhs == 0x0e) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void UpdateSize();
|
void UpdateSize();
|
||||||
|
|
||||||
|
@ -199,7 +199,8 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
|
|||||||
scheduler.CurrentTick() - tex_cache_image.tick_accessed_last > NumFramesBeforeRemoval;
|
scheduler.CurrentTick() - tex_cache_image.tick_accessed_last > NumFramesBeforeRemoval;
|
||||||
|
|
||||||
if (image_info.guest_address == tex_cache_image.info.guest_address) { // Equal address
|
if (image_info.guest_address == tex_cache_image.info.guest_address) { // Equal address
|
||||||
if (image_info.size != tex_cache_image.info.size) {
|
if (image_info.BlockDim() != tex_cache_image.info.BlockDim() ||
|
||||||
|
image_info.num_bits != tex_cache_image.info.num_bits) {
|
||||||
// Very likely this kind of overlap is caused by allocation from a pool.
|
// Very likely this kind of overlap is caused by allocation from a pool.
|
||||||
if (safe_to_delete) {
|
if (safe_to_delete) {
|
||||||
FreeImage(cache_image_id);
|
FreeImage(cache_image_id);
|
||||||
@ -211,15 +212,19 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
|
|||||||
return {depth_image_id, -1, -1};
|
return {depth_image_id, -1, -1};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (image_info.IsBlockCoded() && !tex_cache_image.info.IsBlockCoded()) {
|
||||||
|
// Compressed view of uncompressed image with same block size.
|
||||||
|
// We need to recreate the image with compressed format and copy.
|
||||||
|
return {ExpandImage(image_info, cache_image_id), -1, -1};
|
||||||
|
}
|
||||||
|
|
||||||
if (image_info.pixel_format != tex_cache_image.info.pixel_format ||
|
if (image_info.pixel_format != tex_cache_image.info.pixel_format ||
|
||||||
image_info.guest_size <= tex_cache_image.info.guest_size) {
|
image_info.guest_size <= tex_cache_image.info.guest_size) {
|
||||||
auto result_id = merged_image_id ? merged_image_id : cache_image_id;
|
auto result_id = merged_image_id ? merged_image_id : cache_image_id;
|
||||||
const auto& result_image = slot_images[result_id];
|
const auto& result_image = slot_images[result_id];
|
||||||
return {
|
const bool is_compatible =
|
||||||
IsVulkanFormatCompatible(image_info.pixel_format, result_image.info.pixel_format)
|
IsVulkanFormatCompatible(result_image.info.pixel_format, image_info.pixel_format);
|
||||||
? result_id
|
return {is_compatible ? result_id : ImageId{}, -1, -1};
|
||||||
: ImageId{},
|
|
||||||
-1, -1};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (image_info.type == tex_cache_image.info.type &&
|
if (image_info.type == tex_cache_image.info.type &&
|
||||||
@ -299,6 +304,7 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) {
|
|||||||
auto& new_image = slot_images[new_image_id];
|
auto& new_image = slot_images[new_image_id];
|
||||||
|
|
||||||
src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
|
src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
|
||||||
|
RefreshImage(new_image);
|
||||||
new_image.CopyImage(src_image);
|
new_image.CopyImage(src_image);
|
||||||
|
|
||||||
if (src_image.binding.is_bound || src_image.binding.is_target) {
|
if (src_image.binding.is_bound || src_image.binding.is_target) {
|
||||||
@ -339,7 +345,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (False(flags & FindFlags::RelaxFmt) &&
|
if (False(flags & FindFlags::RelaxFmt) &&
|
||||||
(!IsVulkanFormatCompatible(info.pixel_format, cache_image.info.pixel_format) ||
|
(!IsVulkanFormatCompatible(cache_image.info.pixel_format, info.pixel_format) ||
|
||||||
(cache_image.info.type != info.type && info.size != Extent3D{1, 1, 1}))) {
|
(cache_image.info.type != info.type && info.size != Extent3D{1, 1, 1}))) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -511,9 +517,9 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
|
|||||||
// So this calculation should be very uncommon and reasonably fast
|
// So this calculation should be very uncommon and reasonably fast
|
||||||
// For now we'll just check up to 64 first pixels
|
// For now we'll just check up to 64 first pixels
|
||||||
const auto addr = std::bit_cast<u8*>(image.info.guest_address);
|
const auto addr = std::bit_cast<u8*>(image.info.guest_address);
|
||||||
const auto w = std::min(image.info.size.width, u32(8));
|
const u32 w = std::min(image.info.size.width, u32(8));
|
||||||
const auto h = std::min(image.info.size.height, u32(8));
|
const u32 h = std::min(image.info.size.height, u32(8));
|
||||||
const auto size = w * h * image.info.num_bits / 8;
|
const u32 size = w * h * image.info.num_bits >> (3 + image.info.props.is_block ? 4 : 0);
|
||||||
const u64 hash = XXH3_64bits(addr, size);
|
const u64 hash = XXH3_64bits(addr, size);
|
||||||
if (image.hash == hash) {
|
if (image.hash == hash) {
|
||||||
image.flags &= ~ImageFlagBits::MaybeCpuDirty;
|
image.flags &= ~ImageFlagBits::MaybeCpuDirty;
|
||||||
|
@ -25,10 +25,9 @@
|
|||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
|
||||||
const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const {
|
const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const {
|
||||||
const auto bpp = info.num_bits * (info.props.is_block ? 16 : 1);
|
|
||||||
switch (info.tiling_mode) {
|
switch (info.tiling_mode) {
|
||||||
case AmdGpu::TilingMode::Texture_MicroTiled:
|
case AmdGpu::TilingMode::Texture_MicroTiled:
|
||||||
switch (bpp) {
|
switch (info.num_bits) {
|
||||||
case 8:
|
case 8:
|
||||||
return &detilers[DetilerType::Micro8];
|
return &detilers[DetilerType::Micro8];
|
||||||
case 16:
|
case 16:
|
||||||
@ -43,7 +42,7 @@ const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
case AmdGpu::TilingMode::Texture_Volume:
|
case AmdGpu::TilingMode::Texture_Volume:
|
||||||
switch (bpp) {
|
switch (info.num_bits) {
|
||||||
case 8:
|
case 8:
|
||||||
return &detilers[DetilerType::Macro8];
|
return &detilers[DetilerType::Macro8];
|
||||||
case 32:
|
case 32:
|
||||||
@ -55,7 +54,7 @@ const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case AmdGpu::TilingMode::Display_MicroTiled:
|
case AmdGpu::TilingMode::Display_MicroTiled:
|
||||||
switch (bpp) {
|
switch (info.num_bits) {
|
||||||
case 64:
|
case 64:
|
||||||
return &detilers[DetilerType::Display_Micro64];
|
return &detilers[DetilerType::Display_Micro64];
|
||||||
default:
|
default:
|
||||||
@ -71,7 +70,7 @@ struct DetilerParams {
|
|||||||
u32 num_levels;
|
u32 num_levels;
|
||||||
u32 pitch0;
|
u32 pitch0;
|
||||||
u32 height;
|
u32 height;
|
||||||
u32 sizes[14];
|
std::array<u32, 16> sizes;
|
||||||
};
|
};
|
||||||
|
|
||||||
TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler)
|
TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler)
|
||||||
@ -270,13 +269,16 @@ std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o
|
|||||||
params.height = info.size.height;
|
params.height = info.size.height;
|
||||||
if (info.tiling_mode == AmdGpu::TilingMode::Texture_Volume ||
|
if (info.tiling_mode == AmdGpu::TilingMode::Texture_Volume ||
|
||||||
info.tiling_mode == AmdGpu::TilingMode::Display_MicroTiled) {
|
info.tiling_mode == AmdGpu::TilingMode::Display_MicroTiled) {
|
||||||
ASSERT(info.resources.levels == 1);
|
if (info.resources.levels != 1) {
|
||||||
|
LOG_ERROR(Render_Vulkan, "Unexpected mipmaps for volume and display tilings {}",
|
||||||
|
info.resources.levels);
|
||||||
|
}
|
||||||
const auto tiles_per_row = info.pitch / 8u;
|
const auto tiles_per_row = info.pitch / 8u;
|
||||||
const auto tiles_per_slice = tiles_per_row * ((info.size.height + 7u) / 8u);
|
const auto tiles_per_slice = tiles_per_row * ((info.size.height + 7u) / 8u);
|
||||||
params.sizes[0] = tiles_per_row;
|
params.sizes[0] = tiles_per_row;
|
||||||
params.sizes[1] = tiles_per_slice;
|
params.sizes[1] = tiles_per_slice;
|
||||||
} else {
|
} else {
|
||||||
ASSERT(info.resources.levels <= 14);
|
ASSERT(info.resources.levels <= params.sizes.size());
|
||||||
std::memset(¶ms.sizes, 0, sizeof(params.sizes));
|
std::memset(¶ms.sizes, 0, sizeof(params.sizes));
|
||||||
for (int m = 0; m < info.resources.levels; ++m) {
|
for (int m = 0; m < info.resources.levels; ++m) {
|
||||||
params.sizes[m] = info.mips_layout[m].size + (m > 0 ? params.sizes[m - 1] : 0);
|
params.sizes[m] = info.mips_layout[m].size + (m > 0 ? params.sizes[m - 1] : 0);
|
||||||
@ -287,8 +289,7 @@ std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o
|
|||||||
¶ms);
|
¶ms);
|
||||||
|
|
||||||
ASSERT((image_size % 64) == 0);
|
ASSERT((image_size % 64) == 0);
|
||||||
const auto bpp = info.num_bits * (info.props.is_block ? 16u : 1u);
|
const auto num_tiles = image_size / (64 * (info.num_bits / 8));
|
||||||
const auto num_tiles = image_size / (64 * (bpp / 8));
|
|
||||||
cmdbuf.dispatch(num_tiles, 1, 1);
|
cmdbuf.dispatch(num_tiles, 1, 1);
|
||||||
return {out_buffer.first, 0};
|
return {out_buffer.first, 0};
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user