Get rid (s)elf loader errors

This commit is contained in:
mailwl 2024-12-13 21:45:35 +03:00
parent f587931ed3
commit 541f21d611
36 changed files with 458 additions and 96 deletions

View File

@ -89,7 +89,7 @@ jobs:
arch: amd64
- name: Configure CMake
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $env:NUMBER_OF_PROCESSORS
@ -143,7 +143,7 @@ jobs:
arch: amd64
- name: Configure CMake
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $env:NUMBER_OF_PROCESSORS
@ -201,7 +201,7 @@ jobs:
variant: sccache
- name: Configure CMake
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu)
@ -265,7 +265,7 @@ jobs:
variant: sccache
- name: Configure CMake
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu)
@ -312,7 +312,7 @@ jobs:
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
- name: Configure CMake
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc)
@ -368,7 +368,7 @@ jobs:
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
- name: Configure CMake
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc)

View File

@ -8,6 +8,9 @@ set_directory_properties(PROPERTIES
SYSTEM ON
)
# Set CMP0069 policy to "NEW" in order to ensure consistent behavior when building external targets with LTO enabled
set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
if (MSVC)
# Silence "deprecation" warnings
add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE -D_SCL_SECURE_NO_WARNINGS)

2
externals/sirit vendored

@ -1 +1 @@
Subproject commit 6cecb95d679c82c413d1f989e0b7ad9af130600d
Subproject commit e12b6b592ce9917a85303c555259488643c56f47

View File

@ -23,7 +23,8 @@ constexpr VAddr CODE_BASE_OFFSET = 0x100000000ULL;
constexpr VAddr SYSTEM_MANAGED_MIN = 0x00000400000ULL;
constexpr VAddr SYSTEM_MANAGED_MAX = 0x07FFFFBFFFULL;
constexpr VAddr SYSTEM_RESERVED_MIN = 0x07FFFFC000ULL;
// align to bunch of zeroes, to fit IDA Pro "ps4_module_loader" loader plugin
constexpr VAddr SYSTEM_RESERVED_MIN = 0x0800000000ULL;
#if defined(__APPLE__) && defined(ARCH_X86_64)
// Can only comfortably reserve the first 0x7C0000000 of system reserved space.
constexpr VAddr SYSTEM_RESERVED_MAX = 0xFBFFFFFFFULL;

View File

@ -80,7 +80,7 @@ int PS4_SYSV_ABI sceAudio3dPortGetAttributesSupported(OrbisAudio3dPortId uiPortI
int PS4_SYSV_ABI sceAudio3dPortGetQueueLevel(OrbisAudio3dPortId uiPortId, u32* pQueueLevel,
u32* pQueueAvailable) {
LOG_INFO(Lib_Audio3d, "uiPortId = {}", uiPortId);
LOG_TRACE(Lib_Audio3d, "uiPortId = {}", uiPortId);
return ORBIS_OK;
}

View File

@ -971,7 +971,7 @@ s32 PS4_SYSV_ABI sceGnmFindResourcesPublic() {
}
void PS4_SYSV_ABI sceGnmFlushGarlic() {
LOG_WARNING(Lib_GnmDriver, "(STUBBED) called");
LOG_TRACE(Lib_GnmDriver, "(STUBBED) called");
}
int PS4_SYSV_ABI sceGnmGetCoredumpAddress() {

View File

@ -50,6 +50,9 @@ s32 PS4_SYSV_ABI sceKernelLoadStartModule(const char* moduleFileName, size_t arg
return handle;
}
handle = linker->LoadModule(path, true);
if (handle == -1) {
return ORBIS_KERNEL_ERROR_ESRCH;
}
auto* module = linker->GetModule(handle);
linker->RelocateAnyImports(module);

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cmath>
#include <cstdio>
#include "common/assert.h"
#include "common/logging/log.h"
@ -65,6 +66,15 @@ char* PS4_SYSV_ABI internal_strncpy(char* dest, const char* src, std::size_t cou
return std::strncpy(dest, src, count);
}
int PS4_SYSV_ABI internal_strncpy_s(char* dest, size_t destsz, const char* src, size_t count) {
#ifdef _WIN64
return strncpy_s(dest, destsz, src, count);
#else
std::strcpy(dest, src);
return 0;
#endif
}
char* PS4_SYSV_ABI internal_strcat(char* dest, const char* src) {
return std::strcat(dest, src);
}
@ -237,6 +247,8 @@ void RegisterlibSceLibcInternal(Core::Loader::SymbolsResolver* sym) {
internal_strlen);
LIB_FUNCTION("6sJWiWSRuqk", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1,
internal_strncpy);
LIB_FUNCTION("YNzNkJzYqEg", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1,
internal_strncpy_s);
LIB_FUNCTION("Ls4tzzhimqQ", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1,
internal_strcat);
LIB_FUNCTION("ob5xAW4ln-0", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1,

View File

@ -347,8 +347,10 @@ struct elf_program_id_header {
constexpr s64 DT_NULL = 0;
constexpr s64 DT_NEEDED = 0x00000001;
constexpr s64 DT_RELA = 0x00000007;
constexpr s64 DT_STRSZ = 0x00000009;
constexpr s64 DT_INIT = 0x0000000c;
constexpr s64 DT_FINI = 0x0000000d;
constexpr s64 DT_SONAME = 0x0000000e;
constexpr s64 DT_DEBUG = 0x00000015;
constexpr s64 DT_TEXTREL = 0x00000016;
constexpr s64 DT_INIT_ARRAY = 0x00000019;
@ -365,6 +367,7 @@ constexpr s64 DT_SCE_NEEDED_MODULE = 0x6100000f;
constexpr s64 DT_SCE_MODULE_ATTR = 0x61000011;
constexpr s64 DT_SCE_EXPORT_LIB = 0x61000013;
constexpr s64 DT_SCE_IMPORT_LIB = 0x61000015;
constexpr s64 DT_SCE_EXPORT_LIB_ATTR = 0x61000017;
constexpr s64 DT_SCE_IMPORT_LIB_ATTR = 0x61000019;
constexpr s64 DT_SCE_HASH = 0x61000025;
constexpr s64 DT_SCE_PLTGOT = 0x61000027;
@ -458,6 +461,20 @@ struct eh_frame_hdr {
uint32_t fde_count;
};
// Values for DT_SCE_MODULE_ATTR
constexpr u32 MODULE_ATTR_NONE = 0x00;
constexpr u32 MODULE_ATTR_SCE_CANT_STOP = 0x01;
constexpr u32 MODULE_ATTR_SCE_EXCLUSIVE_LOAD = 0x02;
constexpr u32 MODULE_ATTR_SCE_EXCLUSIVE_START = 0x04;
constexpr u32 MODULE_ATTR_SCE_CAN_RESTART = 0x08;
constexpr u32 MODULE_ATTR_SCE_CAN_RELOCATE = 0x10;
constexpr u32 MODULE_ATTR_SCE_CANT_SHARE = 0x20;
// Values for DT_SCE_IMPORT_LIB_ATTR & DT_SCE_EXPORT_LIB_ATTR
constexpr u32 LIB_ATTR_AUTO_EXPORT = 0x01;
constexpr u32 LIB_ATTR_WEAK_EXPORT = 0x02;
constexpr u32 LIB_ATTR_LOOSE_IMPORT = 0x08;
namespace Core::Loader {
class Elf {

View File

@ -215,6 +215,37 @@ void Module::LoadModuleToMemory(u32& max_tls_index) {
}
break;
}
case PT_SCE_LIBVERSION:
// contains list of used libraries and binary version in format:
// u8 size; name[size-5]; ':'; u32 version
LOG_INFO(Core_Linker, "PT_SCE_LIBVERSION unused blob: offset = {:#x}, size = {}",
elf_pheader[i].p_offset, elf_pheader[i].p_filesz);
break;
case PT_SCE_MODULE_PARAM:
// contains unknown struct, first u32 - sizeof this struct, currently unused
LOG_INFO(Core_Linker, "PT_SCE_MODULE_PARAM unused blob: offset = {:#x}, size = {}",
elf_pheader[i].p_offset, elf_pheader[i].p_filesz);
break;
case PT_SCE_COMMENT:
// Contains path to compiled executable, unused
LOG_INFO(Core_Linker, "PT_SCE_COMMENT unused blob: offset = {:#x}, size = {}",
elf_pheader[i].p_offset, elf_pheader[i].p_filesz);
break;
case PT_INTERP: {
std::vector<char> interpeter_path(elf_pheader[i].p_filesz);
const VAddr segment_addr = std::bit_cast<VAddr>(interpeter_path.data());
elf.LoadSegment(segment_addr, elf_pheader[i].p_offset, elf_pheader[i].p_filesz);
LOG_INFO(Core_Linker, "Interpreter: {}", interpeter_path.data());
break;
}
case PT_NOTE:
if (elf_pheader[i].p_offset && elf_pheader[i].p_filesz) {
std::vector<char> note(elf_pheader[i].p_filesz);
const VAddr segment_addr = std::bit_cast<VAddr>(note.data());
elf.LoadSegment(segment_addr, elf_pheader[i].p_offset, elf_pheader[i].p_filesz);
LOG_INFO(Core_Linker, "note: {}", note.data());
}
break;
default:
LOG_ERROR(Core_Linker, "Unimplemented type {}", header_type);
}
@ -232,6 +263,46 @@ void Module::LoadModuleToMemory(u32& max_tls_index) {
}
}
static std::string_view ModuleAttrString(u32 module_attr) {
switch (module_attr) {
case MODULE_ATTR_NONE:
return "NONE";
case MODULE_ATTR_SCE_CANT_STOP:
return "SCE_CANT_STOP";
case MODULE_ATTR_SCE_EXCLUSIVE_LOAD:
return "SCE_EXCLUSIVE_LOAD";
case MODULE_ATTR_SCE_EXCLUSIVE_START:
return "SCE_EXCLUSIVE_START";
case MODULE_ATTR_SCE_CAN_RESTART:
return "SCE_CAN_RESTART";
case MODULE_ATTR_SCE_CAN_RELOCATE:
return "SCE_CAN_RESTART";
case MODULE_ATTR_SCE_CANT_SHARE:
return "SCE_CANT_SHARE";
default:
return "UNKNOWN";
}
}
static std::string LibAttrString(u32 lib_attr) {
std::vector<std::string> strings;
if (lib_attr & LIB_ATTR_AUTO_EXPORT)
strings.push_back("AUTO_EXPORT");
if (lib_attr & LIB_ATTR_WEAK_EXPORT)
strings.push_back("WEAK_EXPORT");
if (lib_attr & LIB_ATTR_LOOSE_IMPORT)
strings.push_back("LOOSE_IMPORT");
std::string result;
for (const auto& str : strings) {
if (!result.empty())
result += "|";
result += str;
}
return result;
}
void Module::LoadDynamicInfo() {
for (const auto* dyn = reinterpret_cast<elf_dynamic*>(m_dynamic.data()); dyn->d_tag != DT_NULL;
dyn++) {
@ -326,7 +397,8 @@ void Module::LoadDynamicInfo() {
dynamic_info.flags = dyn->d_un.d_val;
// This value should always be DF_TEXTREL (0x04)
if (dynamic_info.flags != 0x04) {
LOG_WARNING(Core_Linker, "DT_FLAGS is NOT 0x04 should check!");
LOG_WARNING(Core_Linker, "DT_FLAGS is NOT 0x04 should check! Current: {:#x}",
dynamic_info.flags);
}
break;
case DT_NEEDED:
@ -338,6 +410,14 @@ void Module::LoadDynamicInfo() {
LOG_ERROR(Core_Linker, "DT_NEEDED str table is not loaded should check!");
}
break;
case DT_SONAME:
if (dynamic_info.str_table) {
LOG_INFO(Core_Linker, "DT_SONAME value = {}",
dynamic_info.str_table + dyn->d_un.d_val);
} else {
LOG_ERROR(Core_Linker, "DT_SONAME str table is not loaded should check!");
}
break;
case DT_SCE_NEEDED_MODULE: {
ModuleInfo& info = dynamic_info.import_modules.emplace_back();
info.value = dyn->d_un.d_val;
@ -357,14 +437,18 @@ void Module::LoadDynamicInfo() {
// the given app. How exactly this is generated isn't known, however it is not necessary
// to have a valid fingerprint. While an invalid fingerprint will cause a warning to be
// printed to the kernel log, the ELF will still load and run.
LOG_INFO(Core_Linker, "DT_SCE_FINGERPRINT value = {:#018x}", dyn->d_un.d_val);
LOG_DEBUG(Core_Linker, "DT_SCE_FINGERPRINT value = {:#018x}", dyn->d_un.d_val);
std::memcpy(info.fingerprint.data(), &dyn->d_un.d_val, sizeof(SCE_DBG_NUM_FINGERPRINT));
break;
case DT_SCE_IMPORT_LIB_ATTR:
// The upper 32-bits should contain the module index multiplied by 0x10000. The lower
// 32-bits should be a constant 0x9.
LOG_INFO(Core_Linker, "unsupported DT_SCE_IMPORT_LIB_ATTR value = ......: {:#018x}",
dyn->d_un.d_val);
// 32-bits library attributes flag.
LOG_DEBUG(Core_Linker, "DT_SCE_IMPORT_LIB_ATTR Library ID: {:#02x}, value = {}",
dyn->d_un.d_val >> 48, LibAttrString(dyn->d_un.d_val & 0xFFFFFFFF));
break;
case DT_SCE_EXPORT_LIB_ATTR:
LOG_DEBUG(Core_Linker, "DT_SCE_EXPORT_LIB_ATTR Library ID: {:#02x}, value = {}",
dyn->d_un.d_val >> 48, LibAttrString(dyn->d_un.d_val & 0xFFFFFFFF));
break;
case DT_SCE_ORIGINAL_FILENAME:
dynamic_info.filename = dynamic_info.str_table + dyn->d_un.d_val;
@ -379,8 +463,8 @@ void Module::LoadDynamicInfo() {
break;
};
case DT_SCE_MODULE_ATTR:
LOG_INFO(Core_Linker, "unsupported DT_SCE_MODULE_ATTR value = ..........: {:#018x}",
dyn->d_un.d_val);
LOG_INFO(Core_Linker, "DT_SCE_MODULE_ATTR value = {}",
ModuleAttrString(dyn->d_un.d_val & 0xFFFFFFFF));
break;
case DT_SCE_EXPORT_LIB: {
LibraryInfo& info = dynamic_info.export_libs.emplace_back();
@ -389,6 +473,10 @@ void Module::LoadDynamicInfo() {
info.enc_id = EncodeId(info.id);
break;
}
case DT_STRSZ:
LOG_INFO(Core_Linker, "unsupported DT_STRSZ value = ..........: {:#018x}",
dyn->d_un.d_val);
break;
default:
LOG_INFO(Core_Linker, "unsupported dynamic tag ..........: {:#018x}", dyn->d_tag);
}

View File

@ -266,7 +266,7 @@ void Emulator::Run(const std::filesystem::path& file) {
}
void Emulator::LoadSystemModules(const std::filesystem::path& file, std::string game_serial) {
constexpr std::array<SysModules, 10> ModulesToLoad{
constexpr std::array<SysModules, 13> ModulesToLoad{
{{"libSceNgs2.sprx", &Libraries::Ngs2::RegisterlibSceNgs2},
{"libSceFiber.sprx", &Libraries::Fiber::RegisterlibSceFiber},
{"libSceUlt.sprx", nullptr},
@ -276,7 +276,10 @@ void Emulator::LoadSystemModules(const std::filesystem::path& file, std::string
{"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap},
{"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc},
{"libSceJpegEnc.sprx", &Libraries::JpegEnc::RegisterlibSceJpegEnc},
{"libSceCesCs.sprx", nullptr}}};
{"libSceCesCs.sprx", nullptr},
{"libSceFont.sprx", nullptr},
{"libSceFontFt.sprx", nullptr},
{"libSceFreeTypeOt.sprx", nullptr}}};
std::vector<std::filesystem::path> found_modules;
const auto& sys_module_path = Common::FS::GetUserPath(Common::FS::PathType::SysModuleDir);

View File

@ -87,6 +87,14 @@ Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b));
}
Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return Decorate(ctx, inst, ctx.OpFDiv(ctx.F32[1], a, b));
}
Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return Decorate(ctx, inst, ctx.OpFDiv(ctx.F64[1], a, b));
}
Id EmitFPNeg16(EmitContext& ctx, Id value) {
return ctx.OpFNegate(ctx.F16[1], value);
}
@ -217,10 +225,34 @@ Id EmitFPTrunc64(EmitContext& ctx, Id value) {
return ctx.OpTrunc(ctx.F64[1], value);
}
Id EmitFPFract(EmitContext& ctx, Id value) {
Id EmitFPFract32(EmitContext& ctx, Id value) {
return ctx.OpFract(ctx.F32[1], value);
}
Id EmitFPFract64(EmitContext& ctx, Id value) {
return ctx.OpFract(ctx.F64[1], value);
}
Id EmitFPFrexpSig32(EmitContext& ctx, Id value) {
const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f32, value);
return ctx.OpCompositeExtract(ctx.F32[1], frexp, 0);
}
Id EmitFPFrexpSig64(EmitContext& ctx, Id value) {
const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f64, value);
return ctx.OpCompositeExtract(ctx.F64[1], frexp, 0);
}
Id EmitFPFrexpExp32(EmitContext& ctx, Id value) {
const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f32, value);
return ctx.OpCompositeExtract(ctx.U32[1], frexp, 1);
}
Id EmitFPFrexpExp64(EmitContext& ctx, Id value) {
const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f64, value);
return ctx.OpCompositeExtract(ctx.U32[1], frexp, 1);
}
Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpFOrdEqual(ctx.U1[1], lhs, rhs);
}

View File

@ -189,6 +189,8 @@ Id EmitFPMin64(EmitContext& ctx, Id a, Id b);
Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPNeg16(EmitContext& ctx, Id value);
Id EmitFPNeg32(EmitContext& ctx, Id value);
Id EmitFPNeg64(EmitContext& ctx, Id value);
@ -220,7 +222,12 @@ Id EmitFPCeil64(EmitContext& ctx, Id value);
Id EmitFPTrunc16(EmitContext& ctx, Id value);
Id EmitFPTrunc32(EmitContext& ctx, Id value);
Id EmitFPTrunc64(EmitContext& ctx, Id value);
Id EmitFPFract(EmitContext& ctx, Id value);
Id EmitFPFract32(EmitContext& ctx, Id value);
Id EmitFPFract64(EmitContext& ctx, Id value);
Id EmitFPFrexpSig32(EmitContext& ctx, Id value);
Id EmitFPFrexpSig64(EmitContext& ctx, Id value);
Id EmitFPFrexpExp32(EmitContext& ctx, Id value);
Id EmitFPFrexpExp64(EmitContext& ctx, Id value);
Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs);

View File

@ -147,6 +147,10 @@ void EmitContext::DefineArithmeticTypes() {
full_result_i32x2 = Name(TypeStruct(S32[1], S32[1]), "full_result_i32x2");
full_result_u32x2 = Name(TypeStruct(U32[1], U32[1]), "full_result_u32x2");
frexp_result_f32 = Name(TypeStruct(F32[1], U32[1]), "frexp_result_f32");
if (info.uses_fp64) {
frexp_result_f64 = Name(TypeStruct(F64[1], U32[1]), "frexp_result_f64");
}
}
void EmitContext::DefineInterfaces() {

View File

@ -148,6 +148,8 @@ public:
Id full_result_i32x2;
Id full_result_u32x2;
Id frexp_result_f32;
Id frexp_result_f64;
Id pi_x2;

View File

@ -13,6 +13,11 @@ void Translator::EmitExport(const GcnInst& inst) {
const auto& exp = inst.control.exp;
const IR::Attribute attrib{exp.target};
if (attrib == IR::Attribute::Depth && exp.en != 1) {
LOG_WARNING(Render_Vulkan, "Unsupported depth export");
return;
}
const std::array vsrc = {
IR::VectorReg(inst.src[0].code),
IR::VectorReg(inst.src[1].code),

View File

@ -200,6 +200,11 @@ public:
void V_BFREV_B32(const GcnInst& inst);
void V_FFBH_U32(const GcnInst& inst);
void V_FFBL_B32(const GcnInst& inst);
void V_FREXP_EXP_I32_F64(const GcnInst& inst);
void V_FREXP_MANT_F64(const GcnInst& inst);
void V_FRACT_F64(const GcnInst& inst);
void V_FREXP_EXP_I32_F32(const GcnInst& inst);
void V_FREXP_MANT_F32(const GcnInst& inst);
void V_MOVRELD_B32(const GcnInst& inst);
void V_MOVRELS_B32(const GcnInst& inst);
void V_MOVRELSD_B32(const GcnInst& inst);

View File

@ -179,6 +179,16 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
return V_FFBH_U32(inst);
case Opcode::V_FFBL_B32:
return V_FFBL_B32(inst);
case Opcode::V_FREXP_EXP_I32_F64:
return V_FREXP_EXP_I32_F64(inst);
case Opcode::V_FREXP_MANT_F64:
return V_FREXP_MANT_F64(inst);
case Opcode::V_FRACT_F64:
return V_FRACT_F64(inst);
case Opcode::V_FREXP_EXP_I32_F32:
return V_FREXP_EXP_I32_F32(inst);
case Opcode::V_FREXP_MANT_F32:
return V_FREXP_MANT_F32(inst);
case Opcode::V_MOVRELD_B32:
return V_MOVRELD_B32(inst);
case Opcode::V_MOVRELS_B32:
@ -733,7 +743,7 @@ void Translator::V_CVT_F32_UBYTE(u32 index, const GcnInst& inst) {
void Translator::V_FRACT_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
SetDst(inst.dst[0], ir.Fract(src0));
SetDst(inst.dst[0], ir.FPFract(src0));
}
void Translator::V_TRUNC_F32(const GcnInst& inst) {
@ -822,6 +832,31 @@ void Translator::V_FFBL_B32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FindILsb(src0));
}
void Translator::V_FREXP_EXP_I32_F64(const GcnInst& inst) {
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
SetDst(inst.dst[0], ir.FPFrexpExp(src0));
}
void Translator::V_FREXP_MANT_F64(const GcnInst& inst) {
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
SetDst64(inst.dst[0], ir.FPFrexpSig(src0));
}
void Translator::V_FRACT_F64(const GcnInst& inst) {
const IR::F32 src0{GetSrc64<IR::F64>(inst.src[0])};
SetDst64(inst.dst[0], ir.FPFract(src0));
}
void Translator::V_FREXP_EXP_I32_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
SetDst(inst.dst[0], ir.FPFrexpExp(src0));
}
void Translator::V_FREXP_MANT_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
SetDst(inst.dst[0], ir.FPFrexpSig(src0));
}
void Translator::V_MOVRELD_B32(const GcnInst& inst) {
const IR::U32 src_val{GetSrc(inst.src[0])};
u32 dst_vgprno = inst.dst[0].code - static_cast<u32>(IR::VectorReg::V0);

View File

@ -527,6 +527,7 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal
info.has_offset.Assign(flags.test(MimgModifier::Offset));
info.has_lod.Assign(flags.any(MimgModifier::Lod));
info.is_array.Assign(mimg.da);
info.is_unnormalized.Assign(mimg.unrm);
if (gather) {
info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1);

View File

@ -692,6 +692,20 @@ F32F64 IREmitter::FPMul(const F32F64& a, const F32F64& b) {
}
}
F32F64 IREmitter::FPDiv(const F32F64& a, const F32F64& b) {
if (a.Type() != b.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
}
switch (a.Type()) {
case Type::F32:
return Inst<F32>(Opcode::FPDiv32, a, b);
case Type::F64:
return Inst<F64>(Opcode::FPDiv64, a, b);
default:
ThrowInvalidType(a.Type());
}
}
F32F64 IREmitter::FPFma(const F32F64& a, const F32F64& b, const F32F64& c) {
if (a.Type() != b.Type() || a.Type() != c.Type()) {
UNREACHABLE_MSG("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type());
@ -855,8 +869,37 @@ F32F64 IREmitter::FPTrunc(const F32F64& value) {
}
}
F32 IREmitter::Fract(const F32& value) {
return Inst<F32>(Opcode::FPFract, value);
F32F64 IREmitter::FPFract(const F32F64& value) {
switch (value.Type()) {
case Type::F32:
return Inst<F32>(Opcode::FPFract32, value);
case Type::F64:
return Inst<F64>(Opcode::FPFract64, value);
default:
ThrowInvalidType(value.Type());
}
}
F32F64 IREmitter::FPFrexpSig(const F32F64& value) {
switch (value.Type()) {
case Type::F32:
return Inst<F32>(Opcode::FPFrexpSig32, value);
case Type::F64:
return Inst<F64>(Opcode::FPFrexpSig64, value);
default:
ThrowInvalidType(value.Type());
}
}
U32 IREmitter::FPFrexpExp(const F32F64& value) {
switch (value.Type()) {
case Type::F32:
return Inst<U32>(Opcode::FPFrexpExp32, value);
case Type::F64:
return Inst<U32>(Opcode::FPFrexpExp64, value);
default:
ThrowInvalidType(value.Type());
}
}
U1 IREmitter::FPEqual(const F32F64& lhs, const F32F64& rhs, bool ordered) {

View File

@ -158,6 +158,7 @@ public:
[[nodiscard]] F32F64 FPAdd(const F32F64& a, const F32F64& b);
[[nodiscard]] F32F64 FPSub(const F32F64& a, const F32F64& b);
[[nodiscard]] F32F64 FPMul(const F32F64& a, const F32F64& b);
[[nodiscard]] F32F64 FPDiv(const F32F64& a, const F32F64& b);
[[nodiscard]] F32F64 FPFma(const F32F64& a, const F32F64& b, const F32F64& c);
[[nodiscard]] F32F64 FPAbs(const F32F64& value);
@ -179,7 +180,9 @@ public:
[[nodiscard]] F32F64 FPFloor(const F32F64& value);
[[nodiscard]] F32F64 FPCeil(const F32F64& value);
[[nodiscard]] F32F64 FPTrunc(const F32F64& value);
[[nodiscard]] F32 Fract(const F32& value);
[[nodiscard]] F32F64 FPFract(const F32F64& value);
[[nodiscard]] F32F64 FPFrexpSig(const F32F64& value);
[[nodiscard]] U32 FPFrexpExp(const F32F64& value);
[[nodiscard]] U1 FPEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
[[nodiscard]] U1 FPNotEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true);

View File

@ -184,6 +184,8 @@ OPCODE(FPMin32, F32, F32,
OPCODE(FPMin64, F64, F64, F64, )
OPCODE(FPMul32, F32, F32, F32, )
OPCODE(FPMul64, F64, F64, F64, )
OPCODE(FPDiv32, F32, F32, F32, )
OPCODE(FPDiv64, F64, F64, F64, )
OPCODE(FPNeg32, F32, F32, )
OPCODE(FPNeg64, F64, F64, )
OPCODE(FPRecip32, F32, F32, )
@ -208,7 +210,12 @@ OPCODE(FPCeil32, F32, F32,
OPCODE(FPCeil64, F64, F64, )
OPCODE(FPTrunc32, F32, F32, )
OPCODE(FPTrunc64, F64, F64, )
OPCODE(FPFract, F32, F32, )
OPCODE(FPFract32, F32, F32, )
OPCODE(FPFract64, F64, F64, )
OPCODE(FPFrexpSig32, F32, F32, )
OPCODE(FPFrexpSig64, F64, F64, )
OPCODE(FPFrexpExp32, U32, F32, )
OPCODE(FPFrexpExp64, U32, F64, )
OPCODE(FPOrdEqual32, U1, F32, F32, )
OPCODE(FPOrdEqual64, U1, F64, F64, )

View File

@ -137,6 +137,35 @@ bool IsImageInstruction(const IR::Inst& inst) {
}
}
IR::Value SwizzleVector(IR::IREmitter& ir, auto sharp, IR::Value texel) {
boost::container::static_vector<IR::Value, 4> comps;
for (u32 i = 0; i < 4; i++) {
switch (sharp.GetSwizzle(i)) {
case AmdGpu::CompSwizzle::Zero:
comps.emplace_back(ir.Imm32(0.f));
break;
case AmdGpu::CompSwizzle::One:
comps.emplace_back(ir.Imm32(1.f));
break;
case AmdGpu::CompSwizzle::Red:
comps.emplace_back(ir.CompositeExtract(texel, 0));
break;
case AmdGpu::CompSwizzle::Green:
comps.emplace_back(ir.CompositeExtract(texel, 1));
break;
case AmdGpu::CompSwizzle::Blue:
comps.emplace_back(ir.CompositeExtract(texel, 2));
break;
case AmdGpu::CompSwizzle::Alpha:
comps.emplace_back(ir.CompositeExtract(texel, 3));
break;
default:
UNREACHABLE();
}
}
return ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]);
};
class Descriptors {
public:
explicit Descriptors(Info& info_)
@ -388,6 +417,15 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(0, ir.Imm32(binding));
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
// Apply dst_sel swizzle on formatted buffer instructions
if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) {
inst.SetArg(2, SwizzleVector(ir, buffer, inst.Arg(2)));
} else {
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info);
inst.ReplaceUsesWith(SwizzleVector(ir, buffer, texel));
}
}
IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t,
@ -420,26 +458,29 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
Descriptors& descriptors, const IR::Inst* producer,
const u32 image_binding, const AmdGpu::Image& image) {
// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
const u32 sampler_binding = [&] {
const auto [sampler_binding, sampler] = [&] -> std::pair<u32, AmdGpu::Sampler> {
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
const IR::Value& handle = producer->Arg(1);
// Inline sampler resource.
if (handle.IsImmediate()) {
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
return descriptors.Add(SamplerResource{
const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()};
const auto binding = descriptors.Add(SamplerResource{
.sharp_idx = std::numeric_limits<u32>::max(),
.inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()},
.inline_sampler = inline_sampler,
});
return {binding, inline_sampler};
}
// Normal sampler resource.
const auto ssharp_handle = handle.InstRecursive();
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
const auto ssharp = TrackSharp(ssharp_ud, info);
return descriptors.Add(SamplerResource{
const auto binding = descriptors.Add(SamplerResource{
.sharp_idx = ssharp,
.associated_image = image_binding,
.disable_aniso = disable_aniso,
});
return {binding, info.ReadUdSharp<AmdGpu::Sampler>(ssharp)};
}();
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
@ -539,28 +580,46 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
}
}();
const auto unnormalized = sampler.force_unnormalized || inst_info.is_unnormalized;
// Query dimensions of image if needed for normalization.
// We can't use the image sharp because it could be bound to a different image later.
const auto dimensions =
unnormalized ? ir.ImageQueryDimension(ir.Imm32(image_binding), ir.Imm32(0u), ir.Imm1(false))
: IR::Value{};
const auto get_coord = [&](u32 idx, u32 dim_idx) -> IR::Value {
const auto coord = get_addr_reg(idx);
if (unnormalized) {
// Normalize the coordinate for sampling, dividing by its corresponding dimension.
return ir.FPDiv(coord,
ir.BitCast<IR::F32>(IR::U32{ir.CompositeExtract(dimensions, dim_idx)}));
}
return coord;
};
// Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler
const IR::Value coords = [&] -> IR::Value {
switch (image.GetType()) {
case AmdGpu::ImageType::Color1D: // x
addr_reg = addr_reg + 1;
return get_addr_reg(addr_reg - 1);
return get_coord(addr_reg - 1, 0);
case AmdGpu::ImageType::Color1DArray: // x, slice
[[fallthrough]];
case AmdGpu::ImageType::Color2D: // x, y
addr_reg = addr_reg + 2;
return ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1));
return ir.CompositeConstruct(get_coord(addr_reg - 2, 0), get_coord(addr_reg - 1, 1));
case AmdGpu::ImageType::Color2DArray: // x, y, slice
[[fallthrough]];
case AmdGpu::ImageType::Color2DMsaa: // x, y, frag
[[fallthrough]];
addr_reg = addr_reg + 3;
return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
get_addr_reg(addr_reg - 1));
case AmdGpu::ImageType::Color3D: // x, y, z
addr_reg = addr_reg + 3;
return ir.CompositeConstruct(get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2),
get_addr_reg(addr_reg - 1));
return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
get_coord(addr_reg - 1, 2));
case AmdGpu::ImageType::Cube: // x, y, face
addr_reg = addr_reg + 3;
return PatchCubeCoord(ir, get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2),
return PatchCubeCoord(ir, get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
get_addr_reg(addr_reg - 1), false, inst_info.is_array);
default:
UNREACHABLE();
@ -711,6 +770,10 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
}();
inst.SetArg(1, coords);
if (inst.GetOpcode() == IR::Opcode::ImageWrite) {
inst.SetArg(2, SwizzleVector(ir, image, inst.Arg(2)));
}
if (inst_info.has_lod) {
ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch);
ASSERT(image.GetType() != AmdGpu::ImageType::Color2DMsaa &&

View File

@ -40,7 +40,8 @@ union TextureInstInfo {
BitField<6, 2, u32> gather_comp;
BitField<8, 1, u32> has_derivatives;
BitField<9, 1, u32> is_array;
BitField<10, 1, u32> is_gather;
BitField<10, 1, u32> is_unnormalized;
BitField<11, 1, u32> is_gather;
};
union BufferInstInfo {

View File

@ -31,6 +31,7 @@ struct BufferSpecialization {
struct TextureBufferSpecialization {
bool is_integer = false;
u32 dst_select = 0;
auto operator<=>(const TextureBufferSpecialization&) const = default;
};
@ -38,8 +39,12 @@ struct TextureBufferSpecialization {
struct ImageSpecialization {
AmdGpu::ImageType type = AmdGpu::ImageType::Color2D;
bool is_integer = false;
u32 dst_select = 0;
auto operator<=>(const ImageSpecialization&) const = default;
bool operator==(const ImageSpecialization& other) const {
return type == other.type && is_integer == other.is_integer &&
(dst_select != 0 ? dst_select == other.dst_select : true);
}
};
struct FMaskSpecialization {
@ -49,6 +54,12 @@ struct FMaskSpecialization {
auto operator<=>(const FMaskSpecialization&) const = default;
};
struct SamplerSpecialization {
bool force_unnormalized = false;
auto operator<=>(const SamplerSpecialization&) const = default;
};
/**
* Alongside runtime information, this structure also checks bound resources
* for compatibility. Can be used as a key for storing shader permutations.
@ -67,6 +78,7 @@ struct StageSpecialization {
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
boost::container::small_vector<ImageSpecialization, 16> images;
boost::container::small_vector<FMaskSpecialization, 8> fmasks;
boost::container::small_vector<SamplerSpecialization, 16> samplers;
Backend::Bindings start{};
explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_,
@ -96,17 +108,25 @@ struct StageSpecialization {
ForEachSharp(binding, tex_buffers, info->texture_buffers,
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
spec.dst_select = sharp.DstSelect();
});
ForEachSharp(binding, images, info->images,
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
spec.type = sharp.GetBoundType();
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
if (desc.is_storage) {
spec.dst_select = sharp.DstSelect();
}
});
ForEachSharp(binding, fmasks, info->fmasks,
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
spec.width = sharp.width;
spec.height = sharp.height;
});
ForEachSharp(samplers, info->samplers,
[](auto& spec, const auto& desc, AmdGpu::Sampler sharp) {
spec.force_unnormalized = sharp.force_unnormalized;
});
}
void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) {
@ -175,6 +195,11 @@ struct StageSpecialization {
return false;
}
}
for (u32 i = 0; i < samplers.size(); i++) {
if (samplers[i] != other.samplers[i]) {
return false;
}
}
return true;
}
};

View File

@ -431,6 +431,10 @@ struct Liverpool {
return u64(z_read_base) << 8;
}
u64 StencilAddress() const {
return u64(stencil_read_base) << 8;
}
u32 NumSamples() const {
return 1u << z_info.num_samples; // spec doesn't say it is a log2
}

View File

@ -52,6 +52,10 @@ struct Buffer {
return std::memcmp(this, &other, sizeof(Buffer)) == 0;
}
u32 DstSelect() const {
return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9);
}
CompSwizzle GetSwizzle(u32 comp) const noexcept {
const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w};
return static_cast<CompSwizzle>(select[comp]);
@ -204,6 +208,11 @@ struct Image {
return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9);
}
CompSwizzle GetSwizzle(u32 comp) const noexcept {
const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w};
return static_cast<CompSwizzle>(select[comp]);
}
static char SelectComp(u32 sel) {
switch (sel) {
case 0:

View File

@ -699,15 +699,6 @@ vk::Format AdjustColorBufferFormat(vk::Format base_format,
default:
break;
}
} else if (comp_swap_reverse) {
switch (base_format) {
case vk::Format::eR8G8B8A8Unorm:
return vk::Format::eA8B8G8R8UnormPack32;
case vk::Format::eR8G8B8A8Srgb:
return vk::Format::eA8B8G8R8SrgbPack32;
default:
break;
}
}
return base_format;
}

View File

@ -616,18 +616,24 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
auto& [image_id, desc] = image_bindings.emplace_back(std::piecewise_construct, std::tuple{},
std::tuple{tsharp, image_desc});
image_id = texture_cache.FindImage(desc);
auto& image = texture_cache.GetImage(image_id);
if (image.binding.is_bound) {
auto* image = &texture_cache.GetImage(image_id);
if (image->depth_id) {
// If this image has an associated depth image, it's a stencil attachment.
// Redirect the access to the actual depth-stencil buffer.
image_id = image->depth_id;
image = &texture_cache.GetImage(image_id);
}
if (image->binding.is_bound) {
// The image is already bound. In case if it is about to be used as storage we need
// to force general layout on it.
image.binding.force_general |= image_desc.is_storage;
image->binding.force_general |= image_desc.is_storage;
}
if (image.binding.is_target) {
if (image->binding.is_target) {
// The image is already bound as target. Since we read and output to it need to force
// general layout too.
image.binding.force_general = 1u;
image->binding.force_general = 1u;
}
image.binding.is_bound = 1u;
image->binding.is_bound = 1u;
}
// Second pass to re-bind images that were updated after binding

View File

@ -145,8 +145,10 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
const ImageInfo& info_)
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
image{instance->GetDevice(), instance->GetAllocator()} {
if (info.pixel_format == vk::Format::eUndefined) {
return;
}
mip_hashes.resize(info.resources.levels);
ASSERT(info.pixel_format != vk::Format::eUndefined);
// Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case
// the texture cache should re-create the resource with the usage requested
vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat |

View File

@ -92,6 +92,10 @@ struct Image {
return image_view_ids[std::distance(image_view_infos.begin(), it)];
}
void AssociateDepth(ImageId image_id) {
depth_id = image_id;
}
boost::container::small_vector<vk::ImageMemoryBarrier2, 32> GetBarriers(
vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
vk::PipelineStageFlags2 dst_stage, std::optional<SubresourceRange> subres_range);
@ -116,6 +120,7 @@ struct Image {
VAddr track_addr_end = 0;
std::vector<ImageViewInfo> image_view_infos;
std::vector<ImageViewId> image_view_ids;
ImageId depth_id{};
// Resource state tracking
struct {

View File

@ -298,6 +298,9 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice
resources.layers = num_slices;
meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0;
stencil_addr = buffer.StencilAddress();
stencil_size = pitch * size.height * sizeof(u8);
guest_address = buffer.Address();
const auto depth_slice_sz = buffer.GetDepthSliceSize();
guest_size_bytes = depth_slice_sz * num_slices;

View File

@ -69,7 +69,7 @@ struct ImageInfo {
} props{}; // Surface properties with impact on various calculation factors
vk::Format pixel_format = vk::Format::eUndefined;
vk::ImageType type = vk::ImageType::e1D;
vk::ImageType type = vk::ImageType::e2D;
SubresourceExtent resources;
Extent3D size{1, 1, 1};
u32 num_bits{};

View File

@ -50,34 +50,6 @@ vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) {
}
}
bool IsIdentityMapping(u32 dst_sel, u32 num_components) {
return (num_components == 1 && dst_sel == 0b001'000'000'100) ||
(num_components == 2 && dst_sel == 0b001'000'101'100) ||
(num_components == 3 && dst_sel == 0b001'110'101'100) ||
(num_components == 4 && dst_sel == 0b111'110'101'100);
}
vk::Format TrySwizzleFormat(vk::Format format, u32 dst_sel) {
// BGRA
if (dst_sel == 0b111100101110) {
switch (format) {
case vk::Format::eR8G8B8A8Unorm:
return vk::Format::eB8G8R8A8Unorm;
case vk::Format::eR8G8B8A8Snorm:
return vk::Format::eB8G8R8A8Snorm;
case vk::Format::eR8G8B8A8Uint:
return vk::Format::eB8G8R8A8Uint;
case vk::Format::eR8G8B8A8Sint:
return vk::Format::eB8G8R8A8Sint;
case vk::Format::eR8G8B8A8Srgb:
return vk::Format::eB8G8R8A8Srgb;
default:
break;
}
}
return format;
}
ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept
: is_storage{desc.is_storage} {
const auto dfmt = image.GetDataFmt();
@ -120,17 +92,6 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageReso
mapping.b = ConvertComponentSwizzle(image.dst_sel_z);
mapping.a = ConvertComponentSwizzle(image.dst_sel_w);
}
// Check for unfortunate case of storage images being swizzled
const u32 num_comps = AmdGpu::NumComponents(image.GetDataFmt());
const u32 dst_sel = image.DstSelect();
if (is_storage && !IsIdentityMapping(dst_sel, num_comps)) {
if (auto new_format = TrySwizzleFormat(format, dst_sel); new_format != format) {
format = new_format;
return;
}
LOG_ERROR(Render_Vulkan, "Storage image (num_comps = {}) requires swizzling {}", num_comps,
image.DstSelectName());
}
}
ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer) noexcept {
@ -170,7 +131,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
format = image.info.pixel_format;
aspect = vk::ImageAspectFlagBits::eDepth;
}
if (image.aspect_mask & vk::ImageAspectFlagBits::eStencil && format == vk::Format::eR8Unorm) {
if (image.aspect_mask & vk::ImageAspectFlagBits::eStencil && format == vk::Format::eR8Uint) {
format = image.info.pixel_format;
aspect = vk::ImageAspectFlagBits::eStencil;
}

View File

@ -25,7 +25,7 @@ Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sample
.minLod = sampler.MinLod(),
.maxLod = sampler.MaxLod(),
.borderColor = LiverpoolToVK::BorderColor(sampler.border_color_type),
.unnormalizedCoordinates = bool(sampler.force_unnormalized),
.unnormalizedCoordinates = false, // Handled in shader due to Vulkan limitations.
};
auto [sampler_result, smplr] = instance.GetDevice().createSamplerUnique(sampler_ci);
ASSERT_MSG(sampler_result == vk::Result::eSuccess, "Failed to create sampler: {}",

View File

@ -443,6 +443,27 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) {
}
}
// If there is a stencil attachment, link depth and stencil.
if (desc.info.stencil_addr != 0) {
ImageId stencil_id{};
ForEachImageInRegion(desc.info.stencil_addr, desc.info.stencil_size,
[&](ImageId image_id, Image& image) {
if (image.info.guest_address == desc.info.stencil_addr) {
stencil_id = image_id;
}
});
if (!stencil_id) {
ImageInfo info{};
info.guest_address = desc.info.stencil_addr;
info.guest_size_bytes = desc.info.stencil_size;
info.size = desc.info.size;
stencil_id = slot_images.insert(instance, scheduler, info);
RegisterImage(stencil_id);
}
Image& image = slot_images[stencil_id];
image.AssociateDepth(image_id);
}
return RegisterImageView(image_id, desc.view_info);
}