mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-04 16:32:39 +00:00
Get rid (s)elf loader errors
This commit is contained in:
parent
f587931ed3
commit
541f21d611
12
.github/workflows/build.yml
vendored
12
.github/workflows/build.yml
vendored
@ -89,7 +89,7 @@ jobs:
|
||||
arch: amd64
|
||||
|
||||
- name: Configure CMake
|
||||
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
|
||||
- name: Build
|
||||
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $env:NUMBER_OF_PROCESSORS
|
||||
@ -143,7 +143,7 @@ jobs:
|
||||
arch: amd64
|
||||
|
||||
- name: Configure CMake
|
||||
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
|
||||
- name: Build
|
||||
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $env:NUMBER_OF_PROCESSORS
|
||||
@ -201,7 +201,7 @@ jobs:
|
||||
variant: sccache
|
||||
|
||||
- name: Configure CMake
|
||||
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
|
||||
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
|
||||
|
||||
- name: Build
|
||||
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu)
|
||||
@ -265,7 +265,7 @@ jobs:
|
||||
variant: sccache
|
||||
|
||||
- name: Configure CMake
|
||||
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
|
||||
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
|
||||
|
||||
- name: Build
|
||||
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu)
|
||||
@ -312,7 +312,7 @@ jobs:
|
||||
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
|
||||
|
||||
- name: Configure CMake
|
||||
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
|
||||
- name: Build
|
||||
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc)
|
||||
@ -368,7 +368,7 @@ jobs:
|
||||
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
|
||||
|
||||
- name: Configure CMake
|
||||
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
|
||||
- name: Build
|
||||
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc)
|
||||
|
3
externals/CMakeLists.txt
vendored
3
externals/CMakeLists.txt
vendored
@ -8,6 +8,9 @@ set_directory_properties(PROPERTIES
|
||||
SYSTEM ON
|
||||
)
|
||||
|
||||
# Set CMP0069 policy to "NEW" in order to ensure consistent behavior when building external targets with LTO enabled
|
||||
set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
|
||||
|
||||
if (MSVC)
|
||||
# Silence "deprecation" warnings
|
||||
add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE -D_SCL_SECURE_NO_WARNINGS)
|
||||
|
2
externals/sirit
vendored
2
externals/sirit
vendored
@ -1 +1 @@
|
||||
Subproject commit 6cecb95d679c82c413d1f989e0b7ad9af130600d
|
||||
Subproject commit e12b6b592ce9917a85303c555259488643c56f47
|
@ -23,7 +23,8 @@ constexpr VAddr CODE_BASE_OFFSET = 0x100000000ULL;
|
||||
|
||||
constexpr VAddr SYSTEM_MANAGED_MIN = 0x00000400000ULL;
|
||||
constexpr VAddr SYSTEM_MANAGED_MAX = 0x07FFFFBFFFULL;
|
||||
constexpr VAddr SYSTEM_RESERVED_MIN = 0x07FFFFC000ULL;
|
||||
// align to bunch of zeroes, to fit IDA Pro "ps4_module_loader" loader plugin
|
||||
constexpr VAddr SYSTEM_RESERVED_MIN = 0x0800000000ULL;
|
||||
#if defined(__APPLE__) && defined(ARCH_X86_64)
|
||||
// Can only comfortably reserve the first 0x7C0000000 of system reserved space.
|
||||
constexpr VAddr SYSTEM_RESERVED_MAX = 0xFBFFFFFFFULL;
|
||||
|
@ -80,7 +80,7 @@ int PS4_SYSV_ABI sceAudio3dPortGetAttributesSupported(OrbisAudio3dPortId uiPortI
|
||||
|
||||
int PS4_SYSV_ABI sceAudio3dPortGetQueueLevel(OrbisAudio3dPortId uiPortId, u32* pQueueLevel,
|
||||
u32* pQueueAvailable) {
|
||||
LOG_INFO(Lib_Audio3d, "uiPortId = {}", uiPortId);
|
||||
LOG_TRACE(Lib_Audio3d, "uiPortId = {}", uiPortId);
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
@ -971,7 +971,7 @@ s32 PS4_SYSV_ABI sceGnmFindResourcesPublic() {
|
||||
}
|
||||
|
||||
void PS4_SYSV_ABI sceGnmFlushGarlic() {
|
||||
LOG_WARNING(Lib_GnmDriver, "(STUBBED) called");
|
||||
LOG_TRACE(Lib_GnmDriver, "(STUBBED) called");
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmGetCoredumpAddress() {
|
||||
|
@ -50,6 +50,9 @@ s32 PS4_SYSV_ABI sceKernelLoadStartModule(const char* moduleFileName, size_t arg
|
||||
return handle;
|
||||
}
|
||||
handle = linker->LoadModule(path, true);
|
||||
if (handle == -1) {
|
||||
return ORBIS_KERNEL_ERROR_ESRCH;
|
||||
}
|
||||
auto* module = linker->GetModule(handle);
|
||||
linker->RelocateAnyImports(module);
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
@ -65,6 +66,15 @@ char* PS4_SYSV_ABI internal_strncpy(char* dest, const char* src, std::size_t cou
|
||||
return std::strncpy(dest, src, count);
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI internal_strncpy_s(char* dest, size_t destsz, const char* src, size_t count) {
|
||||
#ifdef _WIN64
|
||||
return strncpy_s(dest, destsz, src, count);
|
||||
#else
|
||||
std::strcpy(dest, src);
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
char* PS4_SYSV_ABI internal_strcat(char* dest, const char* src) {
|
||||
return std::strcat(dest, src);
|
||||
}
|
||||
@ -237,6 +247,8 @@ void RegisterlibSceLibcInternal(Core::Loader::SymbolsResolver* sym) {
|
||||
internal_strlen);
|
||||
LIB_FUNCTION("6sJWiWSRuqk", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1,
|
||||
internal_strncpy);
|
||||
LIB_FUNCTION("YNzNkJzYqEg", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1,
|
||||
internal_strncpy_s);
|
||||
LIB_FUNCTION("Ls4tzzhimqQ", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1,
|
||||
internal_strcat);
|
||||
LIB_FUNCTION("ob5xAW4ln-0", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1,
|
||||
|
@ -347,8 +347,10 @@ struct elf_program_id_header {
|
||||
constexpr s64 DT_NULL = 0;
|
||||
constexpr s64 DT_NEEDED = 0x00000001;
|
||||
constexpr s64 DT_RELA = 0x00000007;
|
||||
constexpr s64 DT_STRSZ = 0x00000009;
|
||||
constexpr s64 DT_INIT = 0x0000000c;
|
||||
constexpr s64 DT_FINI = 0x0000000d;
|
||||
constexpr s64 DT_SONAME = 0x0000000e;
|
||||
constexpr s64 DT_DEBUG = 0x00000015;
|
||||
constexpr s64 DT_TEXTREL = 0x00000016;
|
||||
constexpr s64 DT_INIT_ARRAY = 0x00000019;
|
||||
@ -365,6 +367,7 @@ constexpr s64 DT_SCE_NEEDED_MODULE = 0x6100000f;
|
||||
constexpr s64 DT_SCE_MODULE_ATTR = 0x61000011;
|
||||
constexpr s64 DT_SCE_EXPORT_LIB = 0x61000013;
|
||||
constexpr s64 DT_SCE_IMPORT_LIB = 0x61000015;
|
||||
constexpr s64 DT_SCE_EXPORT_LIB_ATTR = 0x61000017;
|
||||
constexpr s64 DT_SCE_IMPORT_LIB_ATTR = 0x61000019;
|
||||
constexpr s64 DT_SCE_HASH = 0x61000025;
|
||||
constexpr s64 DT_SCE_PLTGOT = 0x61000027;
|
||||
@ -458,6 +461,20 @@ struct eh_frame_hdr {
|
||||
uint32_t fde_count;
|
||||
};
|
||||
|
||||
// Values for DT_SCE_MODULE_ATTR
|
||||
constexpr u32 MODULE_ATTR_NONE = 0x00;
|
||||
constexpr u32 MODULE_ATTR_SCE_CANT_STOP = 0x01;
|
||||
constexpr u32 MODULE_ATTR_SCE_EXCLUSIVE_LOAD = 0x02;
|
||||
constexpr u32 MODULE_ATTR_SCE_EXCLUSIVE_START = 0x04;
|
||||
constexpr u32 MODULE_ATTR_SCE_CAN_RESTART = 0x08;
|
||||
constexpr u32 MODULE_ATTR_SCE_CAN_RELOCATE = 0x10;
|
||||
constexpr u32 MODULE_ATTR_SCE_CANT_SHARE = 0x20;
|
||||
|
||||
// Values for DT_SCE_IMPORT_LIB_ATTR & DT_SCE_EXPORT_LIB_ATTR
|
||||
constexpr u32 LIB_ATTR_AUTO_EXPORT = 0x01;
|
||||
constexpr u32 LIB_ATTR_WEAK_EXPORT = 0x02;
|
||||
constexpr u32 LIB_ATTR_LOOSE_IMPORT = 0x08;
|
||||
|
||||
namespace Core::Loader {
|
||||
|
||||
class Elf {
|
||||
|
@ -215,6 +215,37 @@ void Module::LoadModuleToMemory(u32& max_tls_index) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PT_SCE_LIBVERSION:
|
||||
// contains list of used libraries and binary version in format:
|
||||
// u8 size; name[size-5]; ':'; u32 version
|
||||
LOG_INFO(Core_Linker, "PT_SCE_LIBVERSION unused blob: offset = {:#x}, size = {}",
|
||||
elf_pheader[i].p_offset, elf_pheader[i].p_filesz);
|
||||
break;
|
||||
case PT_SCE_MODULE_PARAM:
|
||||
// contains unknown struct, first u32 - sizeof this struct, currently unused
|
||||
LOG_INFO(Core_Linker, "PT_SCE_MODULE_PARAM unused blob: offset = {:#x}, size = {}",
|
||||
elf_pheader[i].p_offset, elf_pheader[i].p_filesz);
|
||||
break;
|
||||
case PT_SCE_COMMENT:
|
||||
// Contains path to compiled executable, unused
|
||||
LOG_INFO(Core_Linker, "PT_SCE_COMMENT unused blob: offset = {:#x}, size = {}",
|
||||
elf_pheader[i].p_offset, elf_pheader[i].p_filesz);
|
||||
break;
|
||||
case PT_INTERP: {
|
||||
std::vector<char> interpeter_path(elf_pheader[i].p_filesz);
|
||||
const VAddr segment_addr = std::bit_cast<VAddr>(interpeter_path.data());
|
||||
elf.LoadSegment(segment_addr, elf_pheader[i].p_offset, elf_pheader[i].p_filesz);
|
||||
LOG_INFO(Core_Linker, "Interpreter: {}", interpeter_path.data());
|
||||
break;
|
||||
}
|
||||
case PT_NOTE:
|
||||
if (elf_pheader[i].p_offset && elf_pheader[i].p_filesz) {
|
||||
std::vector<char> note(elf_pheader[i].p_filesz);
|
||||
const VAddr segment_addr = std::bit_cast<VAddr>(note.data());
|
||||
elf.LoadSegment(segment_addr, elf_pheader[i].p_offset, elf_pheader[i].p_filesz);
|
||||
LOG_INFO(Core_Linker, "note: {}", note.data());
|
||||
}
|
||||
break;
|
||||
default:
|
||||
LOG_ERROR(Core_Linker, "Unimplemented type {}", header_type);
|
||||
}
|
||||
@ -232,6 +263,46 @@ void Module::LoadModuleToMemory(u32& max_tls_index) {
|
||||
}
|
||||
}
|
||||
|
||||
static std::string_view ModuleAttrString(u32 module_attr) {
|
||||
switch (module_attr) {
|
||||
case MODULE_ATTR_NONE:
|
||||
return "NONE";
|
||||
case MODULE_ATTR_SCE_CANT_STOP:
|
||||
return "SCE_CANT_STOP";
|
||||
case MODULE_ATTR_SCE_EXCLUSIVE_LOAD:
|
||||
return "SCE_EXCLUSIVE_LOAD";
|
||||
case MODULE_ATTR_SCE_EXCLUSIVE_START:
|
||||
return "SCE_EXCLUSIVE_START";
|
||||
case MODULE_ATTR_SCE_CAN_RESTART:
|
||||
return "SCE_CAN_RESTART";
|
||||
case MODULE_ATTR_SCE_CAN_RELOCATE:
|
||||
return "SCE_CAN_RESTART";
|
||||
case MODULE_ATTR_SCE_CANT_SHARE:
|
||||
return "SCE_CANT_SHARE";
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
static std::string LibAttrString(u32 lib_attr) {
|
||||
std::vector<std::string> strings;
|
||||
|
||||
if (lib_attr & LIB_ATTR_AUTO_EXPORT)
|
||||
strings.push_back("AUTO_EXPORT");
|
||||
if (lib_attr & LIB_ATTR_WEAK_EXPORT)
|
||||
strings.push_back("WEAK_EXPORT");
|
||||
if (lib_attr & LIB_ATTR_LOOSE_IMPORT)
|
||||
strings.push_back("LOOSE_IMPORT");
|
||||
|
||||
std::string result;
|
||||
for (const auto& str : strings) {
|
||||
if (!result.empty())
|
||||
result += "|";
|
||||
result += str;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void Module::LoadDynamicInfo() {
|
||||
for (const auto* dyn = reinterpret_cast<elf_dynamic*>(m_dynamic.data()); dyn->d_tag != DT_NULL;
|
||||
dyn++) {
|
||||
@ -326,7 +397,8 @@ void Module::LoadDynamicInfo() {
|
||||
dynamic_info.flags = dyn->d_un.d_val;
|
||||
// This value should always be DF_TEXTREL (0x04)
|
||||
if (dynamic_info.flags != 0x04) {
|
||||
LOG_WARNING(Core_Linker, "DT_FLAGS is NOT 0x04 should check!");
|
||||
LOG_WARNING(Core_Linker, "DT_FLAGS is NOT 0x04 should check! Current: {:#x}",
|
||||
dynamic_info.flags);
|
||||
}
|
||||
break;
|
||||
case DT_NEEDED:
|
||||
@ -338,6 +410,14 @@ void Module::LoadDynamicInfo() {
|
||||
LOG_ERROR(Core_Linker, "DT_NEEDED str table is not loaded should check!");
|
||||
}
|
||||
break;
|
||||
case DT_SONAME:
|
||||
if (dynamic_info.str_table) {
|
||||
LOG_INFO(Core_Linker, "DT_SONAME value = {}",
|
||||
dynamic_info.str_table + dyn->d_un.d_val);
|
||||
} else {
|
||||
LOG_ERROR(Core_Linker, "DT_SONAME str table is not loaded should check!");
|
||||
}
|
||||
break;
|
||||
case DT_SCE_NEEDED_MODULE: {
|
||||
ModuleInfo& info = dynamic_info.import_modules.emplace_back();
|
||||
info.value = dyn->d_un.d_val;
|
||||
@ -357,14 +437,18 @@ void Module::LoadDynamicInfo() {
|
||||
// the given app. How exactly this is generated isn't known, however it is not necessary
|
||||
// to have a valid fingerprint. While an invalid fingerprint will cause a warning to be
|
||||
// printed to the kernel log, the ELF will still load and run.
|
||||
LOG_INFO(Core_Linker, "DT_SCE_FINGERPRINT value = {:#018x}", dyn->d_un.d_val);
|
||||
LOG_DEBUG(Core_Linker, "DT_SCE_FINGERPRINT value = {:#018x}", dyn->d_un.d_val);
|
||||
std::memcpy(info.fingerprint.data(), &dyn->d_un.d_val, sizeof(SCE_DBG_NUM_FINGERPRINT));
|
||||
break;
|
||||
case DT_SCE_IMPORT_LIB_ATTR:
|
||||
// The upper 32-bits should contain the module index multiplied by 0x10000. The lower
|
||||
// 32-bits should be a constant 0x9.
|
||||
LOG_INFO(Core_Linker, "unsupported DT_SCE_IMPORT_LIB_ATTR value = ......: {:#018x}",
|
||||
dyn->d_un.d_val);
|
||||
// 32-bits library attributes flag.
|
||||
LOG_DEBUG(Core_Linker, "DT_SCE_IMPORT_LIB_ATTR Library ID: {:#02x}, value = {}",
|
||||
dyn->d_un.d_val >> 48, LibAttrString(dyn->d_un.d_val & 0xFFFFFFFF));
|
||||
break;
|
||||
case DT_SCE_EXPORT_LIB_ATTR:
|
||||
LOG_DEBUG(Core_Linker, "DT_SCE_EXPORT_LIB_ATTR Library ID: {:#02x}, value = {}",
|
||||
dyn->d_un.d_val >> 48, LibAttrString(dyn->d_un.d_val & 0xFFFFFFFF));
|
||||
break;
|
||||
case DT_SCE_ORIGINAL_FILENAME:
|
||||
dynamic_info.filename = dynamic_info.str_table + dyn->d_un.d_val;
|
||||
@ -379,8 +463,8 @@ void Module::LoadDynamicInfo() {
|
||||
break;
|
||||
};
|
||||
case DT_SCE_MODULE_ATTR:
|
||||
LOG_INFO(Core_Linker, "unsupported DT_SCE_MODULE_ATTR value = ..........: {:#018x}",
|
||||
dyn->d_un.d_val);
|
||||
LOG_INFO(Core_Linker, "DT_SCE_MODULE_ATTR value = {}",
|
||||
ModuleAttrString(dyn->d_un.d_val & 0xFFFFFFFF));
|
||||
break;
|
||||
case DT_SCE_EXPORT_LIB: {
|
||||
LibraryInfo& info = dynamic_info.export_libs.emplace_back();
|
||||
@ -389,6 +473,10 @@ void Module::LoadDynamicInfo() {
|
||||
info.enc_id = EncodeId(info.id);
|
||||
break;
|
||||
}
|
||||
case DT_STRSZ:
|
||||
LOG_INFO(Core_Linker, "unsupported DT_STRSZ value = ..........: {:#018x}",
|
||||
dyn->d_un.d_val);
|
||||
break;
|
||||
default:
|
||||
LOG_INFO(Core_Linker, "unsupported dynamic tag ..........: {:#018x}", dyn->d_tag);
|
||||
}
|
||||
|
@ -266,7 +266,7 @@ void Emulator::Run(const std::filesystem::path& file) {
|
||||
}
|
||||
|
||||
void Emulator::LoadSystemModules(const std::filesystem::path& file, std::string game_serial) {
|
||||
constexpr std::array<SysModules, 10> ModulesToLoad{
|
||||
constexpr std::array<SysModules, 13> ModulesToLoad{
|
||||
{{"libSceNgs2.sprx", &Libraries::Ngs2::RegisterlibSceNgs2},
|
||||
{"libSceFiber.sprx", &Libraries::Fiber::RegisterlibSceFiber},
|
||||
{"libSceUlt.sprx", nullptr},
|
||||
@ -276,7 +276,10 @@ void Emulator::LoadSystemModules(const std::filesystem::path& file, std::string
|
||||
{"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap},
|
||||
{"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc},
|
||||
{"libSceJpegEnc.sprx", &Libraries::JpegEnc::RegisterlibSceJpegEnc},
|
||||
{"libSceCesCs.sprx", nullptr}}};
|
||||
{"libSceCesCs.sprx", nullptr},
|
||||
{"libSceFont.sprx", nullptr},
|
||||
{"libSceFontFt.sprx", nullptr},
|
||||
{"libSceFreeTypeOt.sprx", nullptr}}};
|
||||
|
||||
std::vector<std::filesystem::path> found_modules;
|
||||
const auto& sys_module_path = Common::FS::GetUserPath(Common::FS::PathType::SysModuleDir);
|
||||
|
@ -87,6 +87,14 @@ Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||
return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b));
|
||||
}
|
||||
|
||||
Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||
return Decorate(ctx, inst, ctx.OpFDiv(ctx.F32[1], a, b));
|
||||
}
|
||||
|
||||
Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||
return Decorate(ctx, inst, ctx.OpFDiv(ctx.F64[1], a, b));
|
||||
}
|
||||
|
||||
Id EmitFPNeg16(EmitContext& ctx, Id value) {
|
||||
return ctx.OpFNegate(ctx.F16[1], value);
|
||||
}
|
||||
@ -217,10 +225,34 @@ Id EmitFPTrunc64(EmitContext& ctx, Id value) {
|
||||
return ctx.OpTrunc(ctx.F64[1], value);
|
||||
}
|
||||
|
||||
Id EmitFPFract(EmitContext& ctx, Id value) {
|
||||
Id EmitFPFract32(EmitContext& ctx, Id value) {
|
||||
return ctx.OpFract(ctx.F32[1], value);
|
||||
}
|
||||
|
||||
Id EmitFPFract64(EmitContext& ctx, Id value) {
|
||||
return ctx.OpFract(ctx.F64[1], value);
|
||||
}
|
||||
|
||||
Id EmitFPFrexpSig32(EmitContext& ctx, Id value) {
|
||||
const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f32, value);
|
||||
return ctx.OpCompositeExtract(ctx.F32[1], frexp, 0);
|
||||
}
|
||||
|
||||
Id EmitFPFrexpSig64(EmitContext& ctx, Id value) {
|
||||
const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f64, value);
|
||||
return ctx.OpCompositeExtract(ctx.F64[1], frexp, 0);
|
||||
}
|
||||
|
||||
Id EmitFPFrexpExp32(EmitContext& ctx, Id value) {
|
||||
const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f32, value);
|
||||
return ctx.OpCompositeExtract(ctx.U32[1], frexp, 1);
|
||||
}
|
||||
|
||||
Id EmitFPFrexpExp64(EmitContext& ctx, Id value) {
|
||||
const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f64, value);
|
||||
return ctx.OpCompositeExtract(ctx.U32[1], frexp, 1);
|
||||
}
|
||||
|
||||
Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs) {
|
||||
return ctx.OpFOrdEqual(ctx.U1[1], lhs, rhs);
|
||||
}
|
||||
|
@ -189,6 +189,8 @@ Id EmitFPMin64(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitFPNeg16(EmitContext& ctx, Id value);
|
||||
Id EmitFPNeg32(EmitContext& ctx, Id value);
|
||||
Id EmitFPNeg64(EmitContext& ctx, Id value);
|
||||
@ -220,7 +222,12 @@ Id EmitFPCeil64(EmitContext& ctx, Id value);
|
||||
Id EmitFPTrunc16(EmitContext& ctx, Id value);
|
||||
Id EmitFPTrunc32(EmitContext& ctx, Id value);
|
||||
Id EmitFPTrunc64(EmitContext& ctx, Id value);
|
||||
Id EmitFPFract(EmitContext& ctx, Id value);
|
||||
Id EmitFPFract32(EmitContext& ctx, Id value);
|
||||
Id EmitFPFract64(EmitContext& ctx, Id value);
|
||||
Id EmitFPFrexpSig32(EmitContext& ctx, Id value);
|
||||
Id EmitFPFrexpSig64(EmitContext& ctx, Id value);
|
||||
Id EmitFPFrexpExp32(EmitContext& ctx, Id value);
|
||||
Id EmitFPFrexpExp64(EmitContext& ctx, Id value);
|
||||
Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs);
|
||||
|
@ -147,6 +147,10 @@ void EmitContext::DefineArithmeticTypes() {
|
||||
|
||||
full_result_i32x2 = Name(TypeStruct(S32[1], S32[1]), "full_result_i32x2");
|
||||
full_result_u32x2 = Name(TypeStruct(U32[1], U32[1]), "full_result_u32x2");
|
||||
frexp_result_f32 = Name(TypeStruct(F32[1], U32[1]), "frexp_result_f32");
|
||||
if (info.uses_fp64) {
|
||||
frexp_result_f64 = Name(TypeStruct(F64[1], U32[1]), "frexp_result_f64");
|
||||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineInterfaces() {
|
||||
|
@ -148,6 +148,8 @@ public:
|
||||
|
||||
Id full_result_i32x2;
|
||||
Id full_result_u32x2;
|
||||
Id frexp_result_f32;
|
||||
Id frexp_result_f64;
|
||||
|
||||
Id pi_x2;
|
||||
|
||||
|
@ -13,6 +13,11 @@ void Translator::EmitExport(const GcnInst& inst) {
|
||||
|
||||
const auto& exp = inst.control.exp;
|
||||
const IR::Attribute attrib{exp.target};
|
||||
if (attrib == IR::Attribute::Depth && exp.en != 1) {
|
||||
LOG_WARNING(Render_Vulkan, "Unsupported depth export");
|
||||
return;
|
||||
}
|
||||
|
||||
const std::array vsrc = {
|
||||
IR::VectorReg(inst.src[0].code),
|
||||
IR::VectorReg(inst.src[1].code),
|
||||
|
@ -200,6 +200,11 @@ public:
|
||||
void V_BFREV_B32(const GcnInst& inst);
|
||||
void V_FFBH_U32(const GcnInst& inst);
|
||||
void V_FFBL_B32(const GcnInst& inst);
|
||||
void V_FREXP_EXP_I32_F64(const GcnInst& inst);
|
||||
void V_FREXP_MANT_F64(const GcnInst& inst);
|
||||
void V_FRACT_F64(const GcnInst& inst);
|
||||
void V_FREXP_EXP_I32_F32(const GcnInst& inst);
|
||||
void V_FREXP_MANT_F32(const GcnInst& inst);
|
||||
void V_MOVRELD_B32(const GcnInst& inst);
|
||||
void V_MOVRELS_B32(const GcnInst& inst);
|
||||
void V_MOVRELSD_B32(const GcnInst& inst);
|
||||
|
@ -179,6 +179,16 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
||||
return V_FFBH_U32(inst);
|
||||
case Opcode::V_FFBL_B32:
|
||||
return V_FFBL_B32(inst);
|
||||
case Opcode::V_FREXP_EXP_I32_F64:
|
||||
return V_FREXP_EXP_I32_F64(inst);
|
||||
case Opcode::V_FREXP_MANT_F64:
|
||||
return V_FREXP_MANT_F64(inst);
|
||||
case Opcode::V_FRACT_F64:
|
||||
return V_FRACT_F64(inst);
|
||||
case Opcode::V_FREXP_EXP_I32_F32:
|
||||
return V_FREXP_EXP_I32_F32(inst);
|
||||
case Opcode::V_FREXP_MANT_F32:
|
||||
return V_FREXP_MANT_F32(inst);
|
||||
case Opcode::V_MOVRELD_B32:
|
||||
return V_MOVRELD_B32(inst);
|
||||
case Opcode::V_MOVRELS_B32:
|
||||
@ -733,7 +743,7 @@ void Translator::V_CVT_F32_UBYTE(u32 index, const GcnInst& inst) {
|
||||
|
||||
void Translator::V_FRACT_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
SetDst(inst.dst[0], ir.Fract(src0));
|
||||
SetDst(inst.dst[0], ir.FPFract(src0));
|
||||
}
|
||||
|
||||
void Translator::V_TRUNC_F32(const GcnInst& inst) {
|
||||
@ -822,6 +832,31 @@ void Translator::V_FFBL_B32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], ir.FindILsb(src0));
|
||||
}
|
||||
|
||||
void Translator::V_FREXP_EXP_I32_F64(const GcnInst& inst) {
|
||||
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
|
||||
SetDst(inst.dst[0], ir.FPFrexpExp(src0));
|
||||
}
|
||||
|
||||
void Translator::V_FREXP_MANT_F64(const GcnInst& inst) {
|
||||
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
|
||||
SetDst64(inst.dst[0], ir.FPFrexpSig(src0));
|
||||
}
|
||||
|
||||
void Translator::V_FRACT_F64(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc64<IR::F64>(inst.src[0])};
|
||||
SetDst64(inst.dst[0], ir.FPFract(src0));
|
||||
}
|
||||
|
||||
void Translator::V_FREXP_EXP_I32_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
SetDst(inst.dst[0], ir.FPFrexpExp(src0));
|
||||
}
|
||||
|
||||
void Translator::V_FREXP_MANT_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
SetDst(inst.dst[0], ir.FPFrexpSig(src0));
|
||||
}
|
||||
|
||||
void Translator::V_MOVRELD_B32(const GcnInst& inst) {
|
||||
const IR::U32 src_val{GetSrc(inst.src[0])};
|
||||
u32 dst_vgprno = inst.dst[0].code - static_cast<u32>(IR::VectorReg::V0);
|
||||
|
@ -527,6 +527,7 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal
|
||||
info.has_offset.Assign(flags.test(MimgModifier::Offset));
|
||||
info.has_lod.Assign(flags.any(MimgModifier::Lod));
|
||||
info.is_array.Assign(mimg.da);
|
||||
info.is_unnormalized.Assign(mimg.unrm);
|
||||
|
||||
if (gather) {
|
||||
info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1);
|
||||
|
@ -692,6 +692,20 @@ F32F64 IREmitter::FPMul(const F32F64& a, const F32F64& b) {
|
||||
}
|
||||
}
|
||||
|
||||
F32F64 IREmitter::FPDiv(const F32F64& a, const F32F64& b) {
|
||||
if (a.Type() != b.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
|
||||
}
|
||||
switch (a.Type()) {
|
||||
case Type::F32:
|
||||
return Inst<F32>(Opcode::FPDiv32, a, b);
|
||||
case Type::F64:
|
||||
return Inst<F64>(Opcode::FPDiv64, a, b);
|
||||
default:
|
||||
ThrowInvalidType(a.Type());
|
||||
}
|
||||
}
|
||||
|
||||
F32F64 IREmitter::FPFma(const F32F64& a, const F32F64& b, const F32F64& c) {
|
||||
if (a.Type() != b.Type() || a.Type() != c.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type());
|
||||
@ -855,8 +869,37 @@ F32F64 IREmitter::FPTrunc(const F32F64& value) {
|
||||
}
|
||||
}
|
||||
|
||||
F32 IREmitter::Fract(const F32& value) {
|
||||
return Inst<F32>(Opcode::FPFract, value);
|
||||
F32F64 IREmitter::FPFract(const F32F64& value) {
|
||||
switch (value.Type()) {
|
||||
case Type::F32:
|
||||
return Inst<F32>(Opcode::FPFract32, value);
|
||||
case Type::F64:
|
||||
return Inst<F64>(Opcode::FPFract64, value);
|
||||
default:
|
||||
ThrowInvalidType(value.Type());
|
||||
}
|
||||
}
|
||||
|
||||
F32F64 IREmitter::FPFrexpSig(const F32F64& value) {
|
||||
switch (value.Type()) {
|
||||
case Type::F32:
|
||||
return Inst<F32>(Opcode::FPFrexpSig32, value);
|
||||
case Type::F64:
|
||||
return Inst<F64>(Opcode::FPFrexpSig64, value);
|
||||
default:
|
||||
ThrowInvalidType(value.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::FPFrexpExp(const F32F64& value) {
|
||||
switch (value.Type()) {
|
||||
case Type::F32:
|
||||
return Inst<U32>(Opcode::FPFrexpExp32, value);
|
||||
case Type::F64:
|
||||
return Inst<U32>(Opcode::FPFrexpExp64, value);
|
||||
default:
|
||||
ThrowInvalidType(value.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U1 IREmitter::FPEqual(const F32F64& lhs, const F32F64& rhs, bool ordered) {
|
||||
|
@ -158,6 +158,7 @@ public:
|
||||
[[nodiscard]] F32F64 FPAdd(const F32F64& a, const F32F64& b);
|
||||
[[nodiscard]] F32F64 FPSub(const F32F64& a, const F32F64& b);
|
||||
[[nodiscard]] F32F64 FPMul(const F32F64& a, const F32F64& b);
|
||||
[[nodiscard]] F32F64 FPDiv(const F32F64& a, const F32F64& b);
|
||||
[[nodiscard]] F32F64 FPFma(const F32F64& a, const F32F64& b, const F32F64& c);
|
||||
|
||||
[[nodiscard]] F32F64 FPAbs(const F32F64& value);
|
||||
@ -179,7 +180,9 @@ public:
|
||||
[[nodiscard]] F32F64 FPFloor(const F32F64& value);
|
||||
[[nodiscard]] F32F64 FPCeil(const F32F64& value);
|
||||
[[nodiscard]] F32F64 FPTrunc(const F32F64& value);
|
||||
[[nodiscard]] F32 Fract(const F32& value);
|
||||
[[nodiscard]] F32F64 FPFract(const F32F64& value);
|
||||
[[nodiscard]] F32F64 FPFrexpSig(const F32F64& value);
|
||||
[[nodiscard]] U32 FPFrexpExp(const F32F64& value);
|
||||
|
||||
[[nodiscard]] U1 FPEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
|
||||
[[nodiscard]] U1 FPNotEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
|
||||
|
@ -184,6 +184,8 @@ OPCODE(FPMin32, F32, F32,
|
||||
OPCODE(FPMin64, F64, F64, F64, )
|
||||
OPCODE(FPMul32, F32, F32, F32, )
|
||||
OPCODE(FPMul64, F64, F64, F64, )
|
||||
OPCODE(FPDiv32, F32, F32, F32, )
|
||||
OPCODE(FPDiv64, F64, F64, F64, )
|
||||
OPCODE(FPNeg32, F32, F32, )
|
||||
OPCODE(FPNeg64, F64, F64, )
|
||||
OPCODE(FPRecip32, F32, F32, )
|
||||
@ -208,7 +210,12 @@ OPCODE(FPCeil32, F32, F32,
|
||||
OPCODE(FPCeil64, F64, F64, )
|
||||
OPCODE(FPTrunc32, F32, F32, )
|
||||
OPCODE(FPTrunc64, F64, F64, )
|
||||
OPCODE(FPFract, F32, F32, )
|
||||
OPCODE(FPFract32, F32, F32, )
|
||||
OPCODE(FPFract64, F64, F64, )
|
||||
OPCODE(FPFrexpSig32, F32, F32, )
|
||||
OPCODE(FPFrexpSig64, F64, F64, )
|
||||
OPCODE(FPFrexpExp32, U32, F32, )
|
||||
OPCODE(FPFrexpExp64, U32, F64, )
|
||||
|
||||
OPCODE(FPOrdEqual32, U1, F32, F32, )
|
||||
OPCODE(FPOrdEqual64, U1, F64, F64, )
|
||||
|
@ -137,6 +137,35 @@ bool IsImageInstruction(const IR::Inst& inst) {
|
||||
}
|
||||
}
|
||||
|
||||
IR::Value SwizzleVector(IR::IREmitter& ir, auto sharp, IR::Value texel) {
|
||||
boost::container::static_vector<IR::Value, 4> comps;
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
switch (sharp.GetSwizzle(i)) {
|
||||
case AmdGpu::CompSwizzle::Zero:
|
||||
comps.emplace_back(ir.Imm32(0.f));
|
||||
break;
|
||||
case AmdGpu::CompSwizzle::One:
|
||||
comps.emplace_back(ir.Imm32(1.f));
|
||||
break;
|
||||
case AmdGpu::CompSwizzle::Red:
|
||||
comps.emplace_back(ir.CompositeExtract(texel, 0));
|
||||
break;
|
||||
case AmdGpu::CompSwizzle::Green:
|
||||
comps.emplace_back(ir.CompositeExtract(texel, 1));
|
||||
break;
|
||||
case AmdGpu::CompSwizzle::Blue:
|
||||
comps.emplace_back(ir.CompositeExtract(texel, 2));
|
||||
break;
|
||||
case AmdGpu::CompSwizzle::Alpha:
|
||||
comps.emplace_back(ir.CompositeExtract(texel, 3));
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
return ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]);
|
||||
};
|
||||
|
||||
class Descriptors {
|
||||
public:
|
||||
explicit Descriptors(Info& info_)
|
||||
@ -388,6 +417,15 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(binding));
|
||||
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
|
||||
|
||||
// Apply dst_sel swizzle on formatted buffer instructions
|
||||
if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) {
|
||||
inst.SetArg(2, SwizzleVector(ir, buffer, inst.Arg(2)));
|
||||
} else {
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info);
|
||||
inst.ReplaceUsesWith(SwizzleVector(ir, buffer, texel));
|
||||
}
|
||||
}
|
||||
|
||||
IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t,
|
||||
@ -420,26 +458,29 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors, const IR::Inst* producer,
|
||||
const u32 image_binding, const AmdGpu::Image& image) {
|
||||
// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
|
||||
const u32 sampler_binding = [&] {
|
||||
const auto [sampler_binding, sampler] = [&] -> std::pair<u32, AmdGpu::Sampler> {
|
||||
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
|
||||
const IR::Value& handle = producer->Arg(1);
|
||||
// Inline sampler resource.
|
||||
if (handle.IsImmediate()) {
|
||||
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
|
||||
return descriptors.Add(SamplerResource{
|
||||
const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()};
|
||||
const auto binding = descriptors.Add(SamplerResource{
|
||||
.sharp_idx = std::numeric_limits<u32>::max(),
|
||||
.inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()},
|
||||
.inline_sampler = inline_sampler,
|
||||
});
|
||||
return {binding, inline_sampler};
|
||||
}
|
||||
// Normal sampler resource.
|
||||
const auto ssharp_handle = handle.InstRecursive();
|
||||
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
|
||||
const auto ssharp = TrackSharp(ssharp_ud, info);
|
||||
return descriptors.Add(SamplerResource{
|
||||
const auto binding = descriptors.Add(SamplerResource{
|
||||
.sharp_idx = ssharp,
|
||||
.associated_image = image_binding,
|
||||
.disable_aniso = disable_aniso,
|
||||
});
|
||||
return {binding, info.ReadUdSharp<AmdGpu::Sampler>(ssharp)};
|
||||
}();
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
@ -539,28 +580,46 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
}
|
||||
}();
|
||||
|
||||
const auto unnormalized = sampler.force_unnormalized || inst_info.is_unnormalized;
|
||||
// Query dimensions of image if needed for normalization.
|
||||
// We can't use the image sharp because it could be bound to a different image later.
|
||||
const auto dimensions =
|
||||
unnormalized ? ir.ImageQueryDimension(ir.Imm32(image_binding), ir.Imm32(0u), ir.Imm1(false))
|
||||
: IR::Value{};
|
||||
const auto get_coord = [&](u32 idx, u32 dim_idx) -> IR::Value {
|
||||
const auto coord = get_addr_reg(idx);
|
||||
if (unnormalized) {
|
||||
// Normalize the coordinate for sampling, dividing by its corresponding dimension.
|
||||
return ir.FPDiv(coord,
|
||||
ir.BitCast<IR::F32>(IR::U32{ir.CompositeExtract(dimensions, dim_idx)}));
|
||||
}
|
||||
return coord;
|
||||
};
|
||||
|
||||
// Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler
|
||||
const IR::Value coords = [&] -> IR::Value {
|
||||
switch (image.GetType()) {
|
||||
case AmdGpu::ImageType::Color1D: // x
|
||||
addr_reg = addr_reg + 1;
|
||||
return get_addr_reg(addr_reg - 1);
|
||||
return get_coord(addr_reg - 1, 0);
|
||||
case AmdGpu::ImageType::Color1DArray: // x, slice
|
||||
[[fallthrough]];
|
||||
case AmdGpu::ImageType::Color2D: // x, y
|
||||
addr_reg = addr_reg + 2;
|
||||
return ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1));
|
||||
return ir.CompositeConstruct(get_coord(addr_reg - 2, 0), get_coord(addr_reg - 1, 1));
|
||||
case AmdGpu::ImageType::Color2DArray: // x, y, slice
|
||||
[[fallthrough]];
|
||||
case AmdGpu::ImageType::Color2DMsaa: // x, y, frag
|
||||
[[fallthrough]];
|
||||
addr_reg = addr_reg + 3;
|
||||
return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
|
||||
get_addr_reg(addr_reg - 1));
|
||||
case AmdGpu::ImageType::Color3D: // x, y, z
|
||||
addr_reg = addr_reg + 3;
|
||||
return ir.CompositeConstruct(get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2),
|
||||
get_addr_reg(addr_reg - 1));
|
||||
return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
|
||||
get_coord(addr_reg - 1, 2));
|
||||
case AmdGpu::ImageType::Cube: // x, y, face
|
||||
addr_reg = addr_reg + 3;
|
||||
return PatchCubeCoord(ir, get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2),
|
||||
return PatchCubeCoord(ir, get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
|
||||
get_addr_reg(addr_reg - 1), false, inst_info.is_array);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
@ -711,6 +770,10 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
||||
}();
|
||||
inst.SetArg(1, coords);
|
||||
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageWrite) {
|
||||
inst.SetArg(2, SwizzleVector(ir, image, inst.Arg(2)));
|
||||
}
|
||||
|
||||
if (inst_info.has_lod) {
|
||||
ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch);
|
||||
ASSERT(image.GetType() != AmdGpu::ImageType::Color2DMsaa &&
|
||||
|
@ -40,7 +40,8 @@ union TextureInstInfo {
|
||||
BitField<6, 2, u32> gather_comp;
|
||||
BitField<8, 1, u32> has_derivatives;
|
||||
BitField<9, 1, u32> is_array;
|
||||
BitField<10, 1, u32> is_gather;
|
||||
BitField<10, 1, u32> is_unnormalized;
|
||||
BitField<11, 1, u32> is_gather;
|
||||
};
|
||||
|
||||
union BufferInstInfo {
|
||||
|
@ -31,6 +31,7 @@ struct BufferSpecialization {
|
||||
|
||||
struct TextureBufferSpecialization {
|
||||
bool is_integer = false;
|
||||
u32 dst_select = 0;
|
||||
|
||||
auto operator<=>(const TextureBufferSpecialization&) const = default;
|
||||
};
|
||||
@ -38,8 +39,12 @@ struct TextureBufferSpecialization {
|
||||
struct ImageSpecialization {
|
||||
AmdGpu::ImageType type = AmdGpu::ImageType::Color2D;
|
||||
bool is_integer = false;
|
||||
u32 dst_select = 0;
|
||||
|
||||
auto operator<=>(const ImageSpecialization&) const = default;
|
||||
bool operator==(const ImageSpecialization& other) const {
|
||||
return type == other.type && is_integer == other.is_integer &&
|
||||
(dst_select != 0 ? dst_select == other.dst_select : true);
|
||||
}
|
||||
};
|
||||
|
||||
struct FMaskSpecialization {
|
||||
@ -49,6 +54,12 @@ struct FMaskSpecialization {
|
||||
auto operator<=>(const FMaskSpecialization&) const = default;
|
||||
};
|
||||
|
||||
struct SamplerSpecialization {
|
||||
bool force_unnormalized = false;
|
||||
|
||||
auto operator<=>(const SamplerSpecialization&) const = default;
|
||||
};
|
||||
|
||||
/**
|
||||
* Alongside runtime information, this structure also checks bound resources
|
||||
* for compatibility. Can be used as a key for storing shader permutations.
|
||||
@ -67,6 +78,7 @@ struct StageSpecialization {
|
||||
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
|
||||
boost::container::small_vector<ImageSpecialization, 16> images;
|
||||
boost::container::small_vector<FMaskSpecialization, 8> fmasks;
|
||||
boost::container::small_vector<SamplerSpecialization, 16> samplers;
|
||||
Backend::Bindings start{};
|
||||
|
||||
explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_,
|
||||
@ -96,17 +108,25 @@ struct StageSpecialization {
|
||||
ForEachSharp(binding, tex_buffers, info->texture_buffers,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
||||
spec.dst_select = sharp.DstSelect();
|
||||
});
|
||||
ForEachSharp(binding, images, info->images,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
||||
spec.type = sharp.GetBoundType();
|
||||
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
||||
if (desc.is_storage) {
|
||||
spec.dst_select = sharp.DstSelect();
|
||||
}
|
||||
});
|
||||
ForEachSharp(binding, fmasks, info->fmasks,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
||||
spec.width = sharp.width;
|
||||
spec.height = sharp.height;
|
||||
});
|
||||
ForEachSharp(samplers, info->samplers,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Sampler sharp) {
|
||||
spec.force_unnormalized = sharp.force_unnormalized;
|
||||
});
|
||||
}
|
||||
|
||||
void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) {
|
||||
@ -175,6 +195,11 @@ struct StageSpecialization {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
for (u32 i = 0; i < samplers.size(); i++) {
|
||||
if (samplers[i] != other.samplers[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
@ -431,6 +431,10 @@ struct Liverpool {
|
||||
return u64(z_read_base) << 8;
|
||||
}
|
||||
|
||||
u64 StencilAddress() const {
|
||||
return u64(stencil_read_base) << 8;
|
||||
}
|
||||
|
||||
u32 NumSamples() const {
|
||||
return 1u << z_info.num_samples; // spec doesn't say it is a log2
|
||||
}
|
||||
|
@ -52,6 +52,10 @@ struct Buffer {
|
||||
return std::memcmp(this, &other, sizeof(Buffer)) == 0;
|
||||
}
|
||||
|
||||
u32 DstSelect() const {
|
||||
return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9);
|
||||
}
|
||||
|
||||
CompSwizzle GetSwizzle(u32 comp) const noexcept {
|
||||
const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w};
|
||||
return static_cast<CompSwizzle>(select[comp]);
|
||||
@ -204,6 +208,11 @@ struct Image {
|
||||
return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9);
|
||||
}
|
||||
|
||||
CompSwizzle GetSwizzle(u32 comp) const noexcept {
|
||||
const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w};
|
||||
return static_cast<CompSwizzle>(select[comp]);
|
||||
}
|
||||
|
||||
static char SelectComp(u32 sel) {
|
||||
switch (sel) {
|
||||
case 0:
|
||||
|
@ -699,15 +699,6 @@ vk::Format AdjustColorBufferFormat(vk::Format base_format,
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else if (comp_swap_reverse) {
|
||||
switch (base_format) {
|
||||
case vk::Format::eR8G8B8A8Unorm:
|
||||
return vk::Format::eA8B8G8R8UnormPack32;
|
||||
case vk::Format::eR8G8B8A8Srgb:
|
||||
return vk::Format::eA8B8G8R8SrgbPack32;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return base_format;
|
||||
}
|
||||
|
@ -616,18 +616,24 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
|
||||
auto& [image_id, desc] = image_bindings.emplace_back(std::piecewise_construct, std::tuple{},
|
||||
std::tuple{tsharp, image_desc});
|
||||
image_id = texture_cache.FindImage(desc);
|
||||
auto& image = texture_cache.GetImage(image_id);
|
||||
if (image.binding.is_bound) {
|
||||
auto* image = &texture_cache.GetImage(image_id);
|
||||
if (image->depth_id) {
|
||||
// If this image has an associated depth image, it's a stencil attachment.
|
||||
// Redirect the access to the actual depth-stencil buffer.
|
||||
image_id = image->depth_id;
|
||||
image = &texture_cache.GetImage(image_id);
|
||||
}
|
||||
if (image->binding.is_bound) {
|
||||
// The image is already bound. In case if it is about to be used as storage we need
|
||||
// to force general layout on it.
|
||||
image.binding.force_general |= image_desc.is_storage;
|
||||
image->binding.force_general |= image_desc.is_storage;
|
||||
}
|
||||
if (image.binding.is_target) {
|
||||
if (image->binding.is_target) {
|
||||
// The image is already bound as target. Since we read and output to it need to force
|
||||
// general layout too.
|
||||
image.binding.force_general = 1u;
|
||||
image->binding.force_general = 1u;
|
||||
}
|
||||
image.binding.is_bound = 1u;
|
||||
image->binding.is_bound = 1u;
|
||||
}
|
||||
|
||||
// Second pass to re-bind images that were updated after binding
|
||||
|
@ -145,8 +145,10 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||
const ImageInfo& info_)
|
||||
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
|
||||
image{instance->GetDevice(), instance->GetAllocator()} {
|
||||
if (info.pixel_format == vk::Format::eUndefined) {
|
||||
return;
|
||||
}
|
||||
mip_hashes.resize(info.resources.levels);
|
||||
ASSERT(info.pixel_format != vk::Format::eUndefined);
|
||||
// Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case
|
||||
// the texture cache should re-create the resource with the usage requested
|
||||
vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat |
|
||||
|
@ -92,6 +92,10 @@ struct Image {
|
||||
return image_view_ids[std::distance(image_view_infos.begin(), it)];
|
||||
}
|
||||
|
||||
void AssociateDepth(ImageId image_id) {
|
||||
depth_id = image_id;
|
||||
}
|
||||
|
||||
boost::container::small_vector<vk::ImageMemoryBarrier2, 32> GetBarriers(
|
||||
vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
|
||||
vk::PipelineStageFlags2 dst_stage, std::optional<SubresourceRange> subres_range);
|
||||
@ -116,6 +120,7 @@ struct Image {
|
||||
VAddr track_addr_end = 0;
|
||||
std::vector<ImageViewInfo> image_view_infos;
|
||||
std::vector<ImageViewId> image_view_ids;
|
||||
ImageId depth_id{};
|
||||
|
||||
// Resource state tracking
|
||||
struct {
|
||||
|
@ -298,6 +298,9 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice
|
||||
resources.layers = num_slices;
|
||||
meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0;
|
||||
|
||||
stencil_addr = buffer.StencilAddress();
|
||||
stencil_size = pitch * size.height * sizeof(u8);
|
||||
|
||||
guest_address = buffer.Address();
|
||||
const auto depth_slice_sz = buffer.GetDepthSliceSize();
|
||||
guest_size_bytes = depth_slice_sz * num_slices;
|
||||
|
@ -69,7 +69,7 @@ struct ImageInfo {
|
||||
} props{}; // Surface properties with impact on various calculation factors
|
||||
|
||||
vk::Format pixel_format = vk::Format::eUndefined;
|
||||
vk::ImageType type = vk::ImageType::e1D;
|
||||
vk::ImageType type = vk::ImageType::e2D;
|
||||
SubresourceExtent resources;
|
||||
Extent3D size{1, 1, 1};
|
||||
u32 num_bits{};
|
||||
|
@ -50,34 +50,6 @@ vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) {
|
||||
}
|
||||
}
|
||||
|
||||
bool IsIdentityMapping(u32 dst_sel, u32 num_components) {
|
||||
return (num_components == 1 && dst_sel == 0b001'000'000'100) ||
|
||||
(num_components == 2 && dst_sel == 0b001'000'101'100) ||
|
||||
(num_components == 3 && dst_sel == 0b001'110'101'100) ||
|
||||
(num_components == 4 && dst_sel == 0b111'110'101'100);
|
||||
}
|
||||
|
||||
vk::Format TrySwizzleFormat(vk::Format format, u32 dst_sel) {
|
||||
// BGRA
|
||||
if (dst_sel == 0b111100101110) {
|
||||
switch (format) {
|
||||
case vk::Format::eR8G8B8A8Unorm:
|
||||
return vk::Format::eB8G8R8A8Unorm;
|
||||
case vk::Format::eR8G8B8A8Snorm:
|
||||
return vk::Format::eB8G8R8A8Snorm;
|
||||
case vk::Format::eR8G8B8A8Uint:
|
||||
return vk::Format::eB8G8R8A8Uint;
|
||||
case vk::Format::eR8G8B8A8Sint:
|
||||
return vk::Format::eB8G8R8A8Sint;
|
||||
case vk::Format::eR8G8B8A8Srgb:
|
||||
return vk::Format::eB8G8R8A8Srgb;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return format;
|
||||
}
|
||||
|
||||
ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept
|
||||
: is_storage{desc.is_storage} {
|
||||
const auto dfmt = image.GetDataFmt();
|
||||
@ -120,17 +92,6 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageReso
|
||||
mapping.b = ConvertComponentSwizzle(image.dst_sel_z);
|
||||
mapping.a = ConvertComponentSwizzle(image.dst_sel_w);
|
||||
}
|
||||
// Check for unfortunate case of storage images being swizzled
|
||||
const u32 num_comps = AmdGpu::NumComponents(image.GetDataFmt());
|
||||
const u32 dst_sel = image.DstSelect();
|
||||
if (is_storage && !IsIdentityMapping(dst_sel, num_comps)) {
|
||||
if (auto new_format = TrySwizzleFormat(format, dst_sel); new_format != format) {
|
||||
format = new_format;
|
||||
return;
|
||||
}
|
||||
LOG_ERROR(Render_Vulkan, "Storage image (num_comps = {}) requires swizzling {}", num_comps,
|
||||
image.DstSelectName());
|
||||
}
|
||||
}
|
||||
|
||||
ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer) noexcept {
|
||||
@ -170,7 +131,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
|
||||
format = image.info.pixel_format;
|
||||
aspect = vk::ImageAspectFlagBits::eDepth;
|
||||
}
|
||||
if (image.aspect_mask & vk::ImageAspectFlagBits::eStencil && format == vk::Format::eR8Unorm) {
|
||||
if (image.aspect_mask & vk::ImageAspectFlagBits::eStencil && format == vk::Format::eR8Uint) {
|
||||
format = image.info.pixel_format;
|
||||
aspect = vk::ImageAspectFlagBits::eStencil;
|
||||
}
|
||||
|
@ -25,7 +25,7 @@ Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sample
|
||||
.minLod = sampler.MinLod(),
|
||||
.maxLod = sampler.MaxLod(),
|
||||
.borderColor = LiverpoolToVK::BorderColor(sampler.border_color_type),
|
||||
.unnormalizedCoordinates = bool(sampler.force_unnormalized),
|
||||
.unnormalizedCoordinates = false, // Handled in shader due to Vulkan limitations.
|
||||
};
|
||||
auto [sampler_result, smplr] = instance.GetDevice().createSamplerUnique(sampler_ci);
|
||||
ASSERT_MSG(sampler_result == vk::Result::eSuccess, "Failed to create sampler: {}",
|
||||
|
@ -443,6 +443,27 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) {
|
||||
}
|
||||
}
|
||||
|
||||
// If there is a stencil attachment, link depth and stencil.
|
||||
if (desc.info.stencil_addr != 0) {
|
||||
ImageId stencil_id{};
|
||||
ForEachImageInRegion(desc.info.stencil_addr, desc.info.stencil_size,
|
||||
[&](ImageId image_id, Image& image) {
|
||||
if (image.info.guest_address == desc.info.stencil_addr) {
|
||||
stencil_id = image_id;
|
||||
}
|
||||
});
|
||||
if (!stencil_id) {
|
||||
ImageInfo info{};
|
||||
info.guest_address = desc.info.stencil_addr;
|
||||
info.guest_size_bytes = desc.info.stencil_size;
|
||||
info.size = desc.info.size;
|
||||
stencil_id = slot_images.insert(instance, scheduler, info);
|
||||
RegisterImage(stencil_id);
|
||||
}
|
||||
Image& image = slot_images[stencil_id];
|
||||
image.AssociateDepth(image_id);
|
||||
}
|
||||
|
||||
return RegisterImageView(image_id, desc.view_info);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user