diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 878c10868..bacfbea0d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -89,7 +89,7 @@ jobs: arch: amd64 - name: Configure CMake - run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $env:NUMBER_OF_PROCESSORS @@ -143,7 +143,7 @@ jobs: arch: amd64 - name: Configure CMake - run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $env:NUMBER_OF_PROCESSORS @@ -201,7 +201,7 @@ jobs: variant: sccache - name: Configure CMake - run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache + run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu) @@ -265,7 +265,7 @@ jobs: variant: sccache - name: Configure CMake - run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache + run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu) @@ -312,7 +312,7 @@ jobs: key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} - name: Configure CMake - run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc) @@ -368,7 +368,7 @@ jobs: key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} - name: Configure CMake - run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc) diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 082be211a..e1e67f235 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -8,6 +8,9 @@ set_directory_properties(PROPERTIES SYSTEM ON ) +# Set CMP0069 policy to "NEW" in order to ensure consistent behavior when building external targets with LTO enabled +set(CMAKE_POLICY_DEFAULT_CMP0069 NEW) + if (MSVC) # Silence "deprecation" warnings add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE -D_SCL_SECURE_NO_WARNINGS) diff --git a/externals/sirit b/externals/sirit index 6cecb95d6..e12b6b592 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit 6cecb95d679c82c413d1f989e0b7ad9af130600d +Subproject commit e12b6b592ce9917a85303c555259488643c56f47 diff --git a/src/core/address_space.h b/src/core/address_space.h index 7ccc2cd1e..3dc282fc3 100644 --- a/src/core/address_space.h +++ b/src/core/address_space.h @@ -23,7 +23,8 @@ constexpr VAddr CODE_BASE_OFFSET = 0x100000000ULL; constexpr VAddr SYSTEM_MANAGED_MIN = 0x00000400000ULL; constexpr VAddr SYSTEM_MANAGED_MAX = 0x07FFFFBFFFULL; -constexpr VAddr SYSTEM_RESERVED_MIN = 0x07FFFFC000ULL; +// align to bunch of zeroes, to fit IDA Pro "ps4_module_loader" loader plugin +constexpr VAddr SYSTEM_RESERVED_MIN = 0x0800000000ULL; #if defined(__APPLE__) && defined(ARCH_X86_64) // Can only comfortably reserve the first 0x7C0000000 of system reserved space. constexpr VAddr SYSTEM_RESERVED_MAX = 0xFBFFFFFFFULL; diff --git a/src/core/libraries/audio3d/audio3d.cpp b/src/core/libraries/audio3d/audio3d.cpp index 44670d87b..d896524c6 100644 --- a/src/core/libraries/audio3d/audio3d.cpp +++ b/src/core/libraries/audio3d/audio3d.cpp @@ -80,7 +80,7 @@ int PS4_SYSV_ABI sceAudio3dPortGetAttributesSupported(OrbisAudio3dPortId uiPortI int PS4_SYSV_ABI sceAudio3dPortGetQueueLevel(OrbisAudio3dPortId uiPortId, u32* pQueueLevel, u32* pQueueAvailable) { - LOG_INFO(Lib_Audio3d, "uiPortId = {}", uiPortId); + LOG_TRACE(Lib_Audio3d, "uiPortId = {}", uiPortId); return ORBIS_OK; } diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 18035e6ce..dbf085fb3 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -971,7 +971,7 @@ s32 PS4_SYSV_ABI sceGnmFindResourcesPublic() { } void PS4_SYSV_ABI sceGnmFlushGarlic() { - LOG_WARNING(Lib_GnmDriver, "(STUBBED) called"); + LOG_TRACE(Lib_GnmDriver, "(STUBBED) called"); } int PS4_SYSV_ABI sceGnmGetCoredumpAddress() { diff --git a/src/core/libraries/kernel/process.cpp b/src/core/libraries/kernel/process.cpp index 6c29d9305..97cc01ebc 100644 --- a/src/core/libraries/kernel/process.cpp +++ b/src/core/libraries/kernel/process.cpp @@ -50,6 +50,9 @@ s32 PS4_SYSV_ABI sceKernelLoadStartModule(const char* moduleFileName, size_t arg return handle; } handle = linker->LoadModule(path, true); + if (handle == -1) { + return ORBIS_KERNEL_ERROR_ESRCH; + } auto* module = linker->GetModule(handle); linker->RelocateAnyImports(module); diff --git a/src/core/libraries/libc_internal/libc_internal.cpp b/src/core/libraries/libc_internal/libc_internal.cpp index eb6046c7a..8453a78b9 100644 --- a/src/core/libraries/libc_internal/libc_internal.cpp +++ b/src/core/libraries/libc_internal/libc_internal.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include "common/assert.h" #include "common/logging/log.h" @@ -65,6 +66,15 @@ char* PS4_SYSV_ABI internal_strncpy(char* dest, const char* src, std::size_t cou return std::strncpy(dest, src, count); } +int PS4_SYSV_ABI internal_strncpy_s(char* dest, size_t destsz, const char* src, size_t count) { +#ifdef _WIN64 + return strncpy_s(dest, destsz, src, count); +#else + std::strcpy(dest, src); + return 0; +#endif +} + char* PS4_SYSV_ABI internal_strcat(char* dest, const char* src) { return std::strcat(dest, src); } @@ -237,6 +247,8 @@ void RegisterlibSceLibcInternal(Core::Loader::SymbolsResolver* sym) { internal_strlen); LIB_FUNCTION("6sJWiWSRuqk", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_strncpy); + LIB_FUNCTION("YNzNkJzYqEg", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_strncpy_s); LIB_FUNCTION("Ls4tzzhimqQ", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_strcat); LIB_FUNCTION("ob5xAW4ln-0", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, diff --git a/src/core/loader/elf.h b/src/core/loader/elf.h index 1dc0c60a1..f2cdb8e05 100644 --- a/src/core/loader/elf.h +++ b/src/core/loader/elf.h @@ -347,8 +347,10 @@ struct elf_program_id_header { constexpr s64 DT_NULL = 0; constexpr s64 DT_NEEDED = 0x00000001; constexpr s64 DT_RELA = 0x00000007; +constexpr s64 DT_STRSZ = 0x00000009; constexpr s64 DT_INIT = 0x0000000c; constexpr s64 DT_FINI = 0x0000000d; +constexpr s64 DT_SONAME = 0x0000000e; constexpr s64 DT_DEBUG = 0x00000015; constexpr s64 DT_TEXTREL = 0x00000016; constexpr s64 DT_INIT_ARRAY = 0x00000019; @@ -365,6 +367,7 @@ constexpr s64 DT_SCE_NEEDED_MODULE = 0x6100000f; constexpr s64 DT_SCE_MODULE_ATTR = 0x61000011; constexpr s64 DT_SCE_EXPORT_LIB = 0x61000013; constexpr s64 DT_SCE_IMPORT_LIB = 0x61000015; +constexpr s64 DT_SCE_EXPORT_LIB_ATTR = 0x61000017; constexpr s64 DT_SCE_IMPORT_LIB_ATTR = 0x61000019; constexpr s64 DT_SCE_HASH = 0x61000025; constexpr s64 DT_SCE_PLTGOT = 0x61000027; @@ -458,6 +461,20 @@ struct eh_frame_hdr { uint32_t fde_count; }; +// Values for DT_SCE_MODULE_ATTR +constexpr u32 MODULE_ATTR_NONE = 0x00; +constexpr u32 MODULE_ATTR_SCE_CANT_STOP = 0x01; +constexpr u32 MODULE_ATTR_SCE_EXCLUSIVE_LOAD = 0x02; +constexpr u32 MODULE_ATTR_SCE_EXCLUSIVE_START = 0x04; +constexpr u32 MODULE_ATTR_SCE_CAN_RESTART = 0x08; +constexpr u32 MODULE_ATTR_SCE_CAN_RELOCATE = 0x10; +constexpr u32 MODULE_ATTR_SCE_CANT_SHARE = 0x20; + +// Values for DT_SCE_IMPORT_LIB_ATTR & DT_SCE_EXPORT_LIB_ATTR +constexpr u32 LIB_ATTR_AUTO_EXPORT = 0x01; +constexpr u32 LIB_ATTR_WEAK_EXPORT = 0x02; +constexpr u32 LIB_ATTR_LOOSE_IMPORT = 0x08; + namespace Core::Loader { class Elf { diff --git a/src/core/module.cpp b/src/core/module.cpp index 70afb932c..5e615bd7a 100644 --- a/src/core/module.cpp +++ b/src/core/module.cpp @@ -215,6 +215,37 @@ void Module::LoadModuleToMemory(u32& max_tls_index) { } break; } + case PT_SCE_LIBVERSION: + // contains list of used libraries and binary version in format: + // u8 size; name[size-5]; ':'; u32 version + LOG_INFO(Core_Linker, "PT_SCE_LIBVERSION unused blob: offset = {:#x}, size = {}", + elf_pheader[i].p_offset, elf_pheader[i].p_filesz); + break; + case PT_SCE_MODULE_PARAM: + // contains unknown struct, first u32 - sizeof this struct, currently unused + LOG_INFO(Core_Linker, "PT_SCE_MODULE_PARAM unused blob: offset = {:#x}, size = {}", + elf_pheader[i].p_offset, elf_pheader[i].p_filesz); + break; + case PT_SCE_COMMENT: + // Contains path to compiled executable, unused + LOG_INFO(Core_Linker, "PT_SCE_COMMENT unused blob: offset = {:#x}, size = {}", + elf_pheader[i].p_offset, elf_pheader[i].p_filesz); + break; + case PT_INTERP: { + std::vector interpeter_path(elf_pheader[i].p_filesz); + const VAddr segment_addr = std::bit_cast(interpeter_path.data()); + elf.LoadSegment(segment_addr, elf_pheader[i].p_offset, elf_pheader[i].p_filesz); + LOG_INFO(Core_Linker, "Interpreter: {}", interpeter_path.data()); + break; + } + case PT_NOTE: + if (elf_pheader[i].p_offset && elf_pheader[i].p_filesz) { + std::vector note(elf_pheader[i].p_filesz); + const VAddr segment_addr = std::bit_cast(note.data()); + elf.LoadSegment(segment_addr, elf_pheader[i].p_offset, elf_pheader[i].p_filesz); + LOG_INFO(Core_Linker, "note: {}", note.data()); + } + break; default: LOG_ERROR(Core_Linker, "Unimplemented type {}", header_type); } @@ -232,6 +263,46 @@ void Module::LoadModuleToMemory(u32& max_tls_index) { } } +static std::string_view ModuleAttrString(u32 module_attr) { + switch (module_attr) { + case MODULE_ATTR_NONE: + return "NONE"; + case MODULE_ATTR_SCE_CANT_STOP: + return "SCE_CANT_STOP"; + case MODULE_ATTR_SCE_EXCLUSIVE_LOAD: + return "SCE_EXCLUSIVE_LOAD"; + case MODULE_ATTR_SCE_EXCLUSIVE_START: + return "SCE_EXCLUSIVE_START"; + case MODULE_ATTR_SCE_CAN_RESTART: + return "SCE_CAN_RESTART"; + case MODULE_ATTR_SCE_CAN_RELOCATE: + return "SCE_CAN_RESTART"; + case MODULE_ATTR_SCE_CANT_SHARE: + return "SCE_CANT_SHARE"; + default: + return "UNKNOWN"; + } +} + +static std::string LibAttrString(u32 lib_attr) { + std::vector strings; + + if (lib_attr & LIB_ATTR_AUTO_EXPORT) + strings.push_back("AUTO_EXPORT"); + if (lib_attr & LIB_ATTR_WEAK_EXPORT) + strings.push_back("WEAK_EXPORT"); + if (lib_attr & LIB_ATTR_LOOSE_IMPORT) + strings.push_back("LOOSE_IMPORT"); + + std::string result; + for (const auto& str : strings) { + if (!result.empty()) + result += "|"; + result += str; + } + return result; +} + void Module::LoadDynamicInfo() { for (const auto* dyn = reinterpret_cast(m_dynamic.data()); dyn->d_tag != DT_NULL; dyn++) { @@ -326,7 +397,8 @@ void Module::LoadDynamicInfo() { dynamic_info.flags = dyn->d_un.d_val; // This value should always be DF_TEXTREL (0x04) if (dynamic_info.flags != 0x04) { - LOG_WARNING(Core_Linker, "DT_FLAGS is NOT 0x04 should check!"); + LOG_WARNING(Core_Linker, "DT_FLAGS is NOT 0x04 should check! Current: {:#x}", + dynamic_info.flags); } break; case DT_NEEDED: @@ -338,6 +410,14 @@ void Module::LoadDynamicInfo() { LOG_ERROR(Core_Linker, "DT_NEEDED str table is not loaded should check!"); } break; + case DT_SONAME: + if (dynamic_info.str_table) { + LOG_INFO(Core_Linker, "DT_SONAME value = {}", + dynamic_info.str_table + dyn->d_un.d_val); + } else { + LOG_ERROR(Core_Linker, "DT_SONAME str table is not loaded should check!"); + } + break; case DT_SCE_NEEDED_MODULE: { ModuleInfo& info = dynamic_info.import_modules.emplace_back(); info.value = dyn->d_un.d_val; @@ -357,14 +437,18 @@ void Module::LoadDynamicInfo() { // the given app. How exactly this is generated isn't known, however it is not necessary // to have a valid fingerprint. While an invalid fingerprint will cause a warning to be // printed to the kernel log, the ELF will still load and run. - LOG_INFO(Core_Linker, "DT_SCE_FINGERPRINT value = {:#018x}", dyn->d_un.d_val); + LOG_DEBUG(Core_Linker, "DT_SCE_FINGERPRINT value = {:#018x}", dyn->d_un.d_val); std::memcpy(info.fingerprint.data(), &dyn->d_un.d_val, sizeof(SCE_DBG_NUM_FINGERPRINT)); break; case DT_SCE_IMPORT_LIB_ATTR: // The upper 32-bits should contain the module index multiplied by 0x10000. The lower - // 32-bits should be a constant 0x9. - LOG_INFO(Core_Linker, "unsupported DT_SCE_IMPORT_LIB_ATTR value = ......: {:#018x}", - dyn->d_un.d_val); + // 32-bits library attributes flag. + LOG_DEBUG(Core_Linker, "DT_SCE_IMPORT_LIB_ATTR Library ID: {:#02x}, value = {}", + dyn->d_un.d_val >> 48, LibAttrString(dyn->d_un.d_val & 0xFFFFFFFF)); + break; + case DT_SCE_EXPORT_LIB_ATTR: + LOG_DEBUG(Core_Linker, "DT_SCE_EXPORT_LIB_ATTR Library ID: {:#02x}, value = {}", + dyn->d_un.d_val >> 48, LibAttrString(dyn->d_un.d_val & 0xFFFFFFFF)); break; case DT_SCE_ORIGINAL_FILENAME: dynamic_info.filename = dynamic_info.str_table + dyn->d_un.d_val; @@ -379,8 +463,8 @@ void Module::LoadDynamicInfo() { break; }; case DT_SCE_MODULE_ATTR: - LOG_INFO(Core_Linker, "unsupported DT_SCE_MODULE_ATTR value = ..........: {:#018x}", - dyn->d_un.d_val); + LOG_INFO(Core_Linker, "DT_SCE_MODULE_ATTR value = {}", + ModuleAttrString(dyn->d_un.d_val & 0xFFFFFFFF)); break; case DT_SCE_EXPORT_LIB: { LibraryInfo& info = dynamic_info.export_libs.emplace_back(); @@ -389,6 +473,10 @@ void Module::LoadDynamicInfo() { info.enc_id = EncodeId(info.id); break; } + case DT_STRSZ: + LOG_INFO(Core_Linker, "unsupported DT_STRSZ value = ..........: {:#018x}", + dyn->d_un.d_val); + break; default: LOG_INFO(Core_Linker, "unsupported dynamic tag ..........: {:#018x}", dyn->d_tag); } diff --git a/src/emulator.cpp b/src/emulator.cpp index eeac5973a..c517bc284 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -266,7 +266,7 @@ void Emulator::Run(const std::filesystem::path& file) { } void Emulator::LoadSystemModules(const std::filesystem::path& file, std::string game_serial) { - constexpr std::array ModulesToLoad{ + constexpr std::array ModulesToLoad{ {{"libSceNgs2.sprx", &Libraries::Ngs2::RegisterlibSceNgs2}, {"libSceFiber.sprx", &Libraries::Fiber::RegisterlibSceFiber}, {"libSceUlt.sprx", nullptr}, @@ -276,7 +276,10 @@ void Emulator::LoadSystemModules(const std::filesystem::path& file, std::string {"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap}, {"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc}, {"libSceJpegEnc.sprx", &Libraries::JpegEnc::RegisterlibSceJpegEnc}, - {"libSceCesCs.sprx", nullptr}}}; + {"libSceCesCs.sprx", nullptr}, + {"libSceFont.sprx", nullptr}, + {"libSceFontFt.sprx", nullptr}, + {"libSceFreeTypeOt.sprx", nullptr}}}; std::vector found_modules; const auto& sys_module_path = Common::FS::GetUserPath(Common::FS::PathType::SysModuleDir); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index e822eabef..a63be87e2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -87,6 +87,14 @@ Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b)); } +Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFDiv(ctx.F32[1], a, b)); +} + +Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFDiv(ctx.F64[1], a, b)); +} + Id EmitFPNeg16(EmitContext& ctx, Id value) { return ctx.OpFNegate(ctx.F16[1], value); } @@ -217,10 +225,34 @@ Id EmitFPTrunc64(EmitContext& ctx, Id value) { return ctx.OpTrunc(ctx.F64[1], value); } -Id EmitFPFract(EmitContext& ctx, Id value) { +Id EmitFPFract32(EmitContext& ctx, Id value) { return ctx.OpFract(ctx.F32[1], value); } +Id EmitFPFract64(EmitContext& ctx, Id value) { + return ctx.OpFract(ctx.F64[1], value); +} + +Id EmitFPFrexpSig32(EmitContext& ctx, Id value) { + const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f32, value); + return ctx.OpCompositeExtract(ctx.F32[1], frexp, 0); +} + +Id EmitFPFrexpSig64(EmitContext& ctx, Id value) { + const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f64, value); + return ctx.OpCompositeExtract(ctx.F64[1], frexp, 0); +} + +Id EmitFPFrexpExp32(EmitContext& ctx, Id value) { + const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f32, value); + return ctx.OpCompositeExtract(ctx.U32[1], frexp, 1); +} + +Id EmitFPFrexpExp64(EmitContext& ctx, Id value) { + const auto frexp = ctx.OpFrexpStruct(ctx.frexp_result_f64, value); + return ctx.OpCompositeExtract(ctx.U32[1], frexp, 1); +} + Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpFOrdEqual(ctx.U1[1], lhs, rhs); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index cc3db880c..4ff53670e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -189,6 +189,8 @@ Id EmitFPMin64(EmitContext& ctx, Id a, Id b); Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPNeg16(EmitContext& ctx, Id value); Id EmitFPNeg32(EmitContext& ctx, Id value); Id EmitFPNeg64(EmitContext& ctx, Id value); @@ -220,7 +222,12 @@ Id EmitFPCeil64(EmitContext& ctx, Id value); Id EmitFPTrunc16(EmitContext& ctx, Id value); Id EmitFPTrunc32(EmitContext& ctx, Id value); Id EmitFPTrunc64(EmitContext& ctx, Id value); -Id EmitFPFract(EmitContext& ctx, Id value); +Id EmitFPFract32(EmitContext& ctx, Id value); +Id EmitFPFract64(EmitContext& ctx, Id value); +Id EmitFPFrexpSig32(EmitContext& ctx, Id value); +Id EmitFPFrexpSig64(EmitContext& ctx, Id value); +Id EmitFPFrexpExp32(EmitContext& ctx, Id value); +Id EmitFPFrexpExp64(EmitContext& ctx, Id value); Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 5c7278c6b..1ada2f1f9 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -147,6 +147,10 @@ void EmitContext::DefineArithmeticTypes() { full_result_i32x2 = Name(TypeStruct(S32[1], S32[1]), "full_result_i32x2"); full_result_u32x2 = Name(TypeStruct(U32[1], U32[1]), "full_result_u32x2"); + frexp_result_f32 = Name(TypeStruct(F32[1], U32[1]), "frexp_result_f32"); + if (info.uses_fp64) { + frexp_result_f64 = Name(TypeStruct(F64[1], U32[1]), "frexp_result_f64"); + } } void EmitContext::DefineInterfaces() { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 4e5e7dd3b..cd1293328 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -148,6 +148,8 @@ public: Id full_result_i32x2; Id full_result_u32x2; + Id frexp_result_f32; + Id frexp_result_f64; Id pi_x2; diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index f82f8fc1b..f4914577d 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -13,6 +13,11 @@ void Translator::EmitExport(const GcnInst& inst) { const auto& exp = inst.control.exp; const IR::Attribute attrib{exp.target}; + if (attrib == IR::Attribute::Depth && exp.en != 1) { + LOG_WARNING(Render_Vulkan, "Unsupported depth export"); + return; + } + const std::array vsrc = { IR::VectorReg(inst.src[0].code), IR::VectorReg(inst.src[1].code), diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 43f3ccef2..2f320a6c7 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -200,6 +200,11 @@ public: void V_BFREV_B32(const GcnInst& inst); void V_FFBH_U32(const GcnInst& inst); void V_FFBL_B32(const GcnInst& inst); + void V_FREXP_EXP_I32_F64(const GcnInst& inst); + void V_FREXP_MANT_F64(const GcnInst& inst); + void V_FRACT_F64(const GcnInst& inst); + void V_FREXP_EXP_I32_F32(const GcnInst& inst); + void V_FREXP_MANT_F32(const GcnInst& inst); void V_MOVRELD_B32(const GcnInst& inst); void V_MOVRELS_B32(const GcnInst& inst); void V_MOVRELSD_B32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 8149230db..3e9e677a7 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -179,6 +179,16 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_FFBH_U32(inst); case Opcode::V_FFBL_B32: return V_FFBL_B32(inst); + case Opcode::V_FREXP_EXP_I32_F64: + return V_FREXP_EXP_I32_F64(inst); + case Opcode::V_FREXP_MANT_F64: + return V_FREXP_MANT_F64(inst); + case Opcode::V_FRACT_F64: + return V_FRACT_F64(inst); + case Opcode::V_FREXP_EXP_I32_F32: + return V_FREXP_EXP_I32_F32(inst); + case Opcode::V_FREXP_MANT_F32: + return V_FREXP_MANT_F32(inst); case Opcode::V_MOVRELD_B32: return V_MOVRELD_B32(inst); case Opcode::V_MOVRELS_B32: @@ -733,7 +743,7 @@ void Translator::V_CVT_F32_UBYTE(u32 index, const GcnInst& inst) { void Translator::V_FRACT_F32(const GcnInst& inst) { const IR::F32 src0{GetSrc(inst.src[0])}; - SetDst(inst.dst[0], ir.Fract(src0)); + SetDst(inst.dst[0], ir.FPFract(src0)); } void Translator::V_TRUNC_F32(const GcnInst& inst) { @@ -822,6 +832,31 @@ void Translator::V_FFBL_B32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FindILsb(src0)); } +void Translator::V_FREXP_EXP_I32_F64(const GcnInst& inst) { + const IR::F64 src0{GetSrc64(inst.src[0])}; + SetDst(inst.dst[0], ir.FPFrexpExp(src0)); +} + +void Translator::V_FREXP_MANT_F64(const GcnInst& inst) { + const IR::F64 src0{GetSrc64(inst.src[0])}; + SetDst64(inst.dst[0], ir.FPFrexpSig(src0)); +} + +void Translator::V_FRACT_F64(const GcnInst& inst) { + const IR::F32 src0{GetSrc64(inst.src[0])}; + SetDst64(inst.dst[0], ir.FPFract(src0)); +} + +void Translator::V_FREXP_EXP_I32_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + SetDst(inst.dst[0], ir.FPFrexpExp(src0)); +} + +void Translator::V_FREXP_MANT_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + SetDst(inst.dst[0], ir.FPFrexpSig(src0)); +} + void Translator::V_MOVRELD_B32(const GcnInst& inst) { const IR::U32 src_val{GetSrc(inst.src[0])}; u32 dst_vgprno = inst.dst[0].code - static_cast(IR::VectorReg::V0); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index b7ad3b36b..74b9c905d 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -527,6 +527,7 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal info.has_offset.Assign(flags.test(MimgModifier::Offset)); info.has_lod.Assign(flags.any(MimgModifier::Lod)); info.is_array.Assign(mimg.da); + info.is_unnormalized.Assign(mimg.unrm); if (gather) { info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1); diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 78e7f2289..29b406699 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -692,6 +692,20 @@ F32F64 IREmitter::FPMul(const F32F64& a, const F32F64& b) { } } +F32F64 IREmitter::FPDiv(const F32F64& a, const F32F64& b) { + if (a.Type() != b.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::F32: + return Inst(Opcode::FPDiv32, a, b); + case Type::F64: + return Inst(Opcode::FPDiv64, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + F32F64 IREmitter::FPFma(const F32F64& a, const F32F64& b, const F32F64& c) { if (a.Type() != b.Type() || a.Type() != c.Type()) { UNREACHABLE_MSG("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type()); @@ -855,8 +869,37 @@ F32F64 IREmitter::FPTrunc(const F32F64& value) { } } -F32 IREmitter::Fract(const F32& value) { - return Inst(Opcode::FPFract, value); +F32F64 IREmitter::FPFract(const F32F64& value) { + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::FPFract32, value); + case Type::F64: + return Inst(Opcode::FPFract64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +F32F64 IREmitter::FPFrexpSig(const F32F64& value) { + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::FPFrexpSig32, value); + case Type::F64: + return Inst(Opcode::FPFrexpSig64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32 IREmitter::FPFrexpExp(const F32F64& value) { + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::FPFrexpExp32, value); + case Type::F64: + return Inst(Opcode::FPFrexpExp64, value); + default: + ThrowInvalidType(value.Type()); + } } U1 IREmitter::FPEqual(const F32F64& lhs, const F32F64& rhs, bool ordered) { diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index cbd3780de..f77e22b82 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -158,6 +158,7 @@ public: [[nodiscard]] F32F64 FPAdd(const F32F64& a, const F32F64& b); [[nodiscard]] F32F64 FPSub(const F32F64& a, const F32F64& b); [[nodiscard]] F32F64 FPMul(const F32F64& a, const F32F64& b); + [[nodiscard]] F32F64 FPDiv(const F32F64& a, const F32F64& b); [[nodiscard]] F32F64 FPFma(const F32F64& a, const F32F64& b, const F32F64& c); [[nodiscard]] F32F64 FPAbs(const F32F64& value); @@ -179,7 +180,9 @@ public: [[nodiscard]] F32F64 FPFloor(const F32F64& value); [[nodiscard]] F32F64 FPCeil(const F32F64& value); [[nodiscard]] F32F64 FPTrunc(const F32F64& value); - [[nodiscard]] F32 Fract(const F32& value); + [[nodiscard]] F32F64 FPFract(const F32F64& value); + [[nodiscard]] F32F64 FPFrexpSig(const F32F64& value); + [[nodiscard]] U32 FPFrexpExp(const F32F64& value); [[nodiscard]] U1 FPEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true); [[nodiscard]] U1 FPNotEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 0283ccd0f..8f40ed985 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -184,6 +184,8 @@ OPCODE(FPMin32, F32, F32, OPCODE(FPMin64, F64, F64, F64, ) OPCODE(FPMul32, F32, F32, F32, ) OPCODE(FPMul64, F64, F64, F64, ) +OPCODE(FPDiv32, F32, F32, F32, ) +OPCODE(FPDiv64, F64, F64, F64, ) OPCODE(FPNeg32, F32, F32, ) OPCODE(FPNeg64, F64, F64, ) OPCODE(FPRecip32, F32, F32, ) @@ -208,7 +210,12 @@ OPCODE(FPCeil32, F32, F32, OPCODE(FPCeil64, F64, F64, ) OPCODE(FPTrunc32, F32, F32, ) OPCODE(FPTrunc64, F64, F64, ) -OPCODE(FPFract, F32, F32, ) +OPCODE(FPFract32, F32, F32, ) +OPCODE(FPFract64, F64, F64, ) +OPCODE(FPFrexpSig32, F32, F32, ) +OPCODE(FPFrexpSig64, F64, F64, ) +OPCODE(FPFrexpExp32, U32, F32, ) +OPCODE(FPFrexpExp64, U32, F64, ) OPCODE(FPOrdEqual32, U1, F32, F32, ) OPCODE(FPOrdEqual64, U1, F64, F64, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 89c5c78a0..398579ad4 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -137,6 +137,35 @@ bool IsImageInstruction(const IR::Inst& inst) { } } +IR::Value SwizzleVector(IR::IREmitter& ir, auto sharp, IR::Value texel) { + boost::container::static_vector comps; + for (u32 i = 0; i < 4; i++) { + switch (sharp.GetSwizzle(i)) { + case AmdGpu::CompSwizzle::Zero: + comps.emplace_back(ir.Imm32(0.f)); + break; + case AmdGpu::CompSwizzle::One: + comps.emplace_back(ir.Imm32(1.f)); + break; + case AmdGpu::CompSwizzle::Red: + comps.emplace_back(ir.CompositeExtract(texel, 0)); + break; + case AmdGpu::CompSwizzle::Green: + comps.emplace_back(ir.CompositeExtract(texel, 1)); + break; + case AmdGpu::CompSwizzle::Blue: + comps.emplace_back(ir.CompositeExtract(texel, 2)); + break; + case AmdGpu::CompSwizzle::Alpha: + comps.emplace_back(ir.CompositeExtract(texel, 3)); + break; + default: + UNREACHABLE(); + } + } + return ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]); +}; + class Descriptors { public: explicit Descriptors(Info& info_) @@ -388,6 +417,15 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; inst.SetArg(0, ir.Imm32(binding)); ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable); + + // Apply dst_sel swizzle on formatted buffer instructions + if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) { + inst.SetArg(2, SwizzleVector(ir, buffer, inst.Arg(2))); + } else { + const auto inst_info = inst.Flags(); + const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info); + inst.ReplaceUsesWith(SwizzleVector(ir, buffer, texel)); + } } IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t, @@ -420,26 +458,29 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors, const IR::Inst* producer, const u32 image_binding, const AmdGpu::Image& image) { // Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions - const u32 sampler_binding = [&] { + const auto [sampler_binding, sampler] = [&] -> std::pair { ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2); const IR::Value& handle = producer->Arg(1); // Inline sampler resource. if (handle.IsImmediate()) { LOG_WARNING(Render_Vulkan, "Inline sampler detected"); - return descriptors.Add(SamplerResource{ + const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()}; + const auto binding = descriptors.Add(SamplerResource{ .sharp_idx = std::numeric_limits::max(), - .inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()}, + .inline_sampler = inline_sampler, }); + return {binding, inline_sampler}; } // Normal sampler resource. const auto ssharp_handle = handle.InstRecursive(); const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle); const auto ssharp = TrackSharp(ssharp_ud, info); - return descriptors.Add(SamplerResource{ + const auto binding = descriptors.Add(SamplerResource{ .sharp_idx = ssharp, .associated_image = image_binding, .disable_aniso = disable_aniso, }); + return {binding, info.ReadUdSharp(ssharp)}; }(); IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; @@ -539,28 +580,46 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info, } }(); + const auto unnormalized = sampler.force_unnormalized || inst_info.is_unnormalized; + // Query dimensions of image if needed for normalization. + // We can't use the image sharp because it could be bound to a different image later. + const auto dimensions = + unnormalized ? ir.ImageQueryDimension(ir.Imm32(image_binding), ir.Imm32(0u), ir.Imm1(false)) + : IR::Value{}; + const auto get_coord = [&](u32 idx, u32 dim_idx) -> IR::Value { + const auto coord = get_addr_reg(idx); + if (unnormalized) { + // Normalize the coordinate for sampling, dividing by its corresponding dimension. + return ir.FPDiv(coord, + ir.BitCast(IR::U32{ir.CompositeExtract(dimensions, dim_idx)})); + } + return coord; + }; + // Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler const IR::Value coords = [&] -> IR::Value { switch (image.GetType()) { case AmdGpu::ImageType::Color1D: // x addr_reg = addr_reg + 1; - return get_addr_reg(addr_reg - 1); + return get_coord(addr_reg - 1, 0); case AmdGpu::ImageType::Color1DArray: // x, slice [[fallthrough]]; case AmdGpu::ImageType::Color2D: // x, y addr_reg = addr_reg + 2; - return ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1)); + return ir.CompositeConstruct(get_coord(addr_reg - 2, 0), get_coord(addr_reg - 1, 1)); case AmdGpu::ImageType::Color2DArray: // x, y, slice [[fallthrough]]; case AmdGpu::ImageType::Color2DMsaa: // x, y, frag - [[fallthrough]]; + addr_reg = addr_reg + 3; + return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1), + get_addr_reg(addr_reg - 1)); case AmdGpu::ImageType::Color3D: // x, y, z addr_reg = addr_reg + 3; - return ir.CompositeConstruct(get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2), - get_addr_reg(addr_reg - 1)); + return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1), + get_coord(addr_reg - 1, 2)); case AmdGpu::ImageType::Cube: // x, y, face addr_reg = addr_reg + 3; - return PatchCubeCoord(ir, get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2), + return PatchCubeCoord(ir, get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1), get_addr_reg(addr_reg - 1), false, inst_info.is_array); default: UNREACHABLE(); @@ -711,6 +770,10 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip }(); inst.SetArg(1, coords); + if (inst.GetOpcode() == IR::Opcode::ImageWrite) { + inst.SetArg(2, SwizzleVector(ir, image, inst.Arg(2))); + } + if (inst_info.has_lod) { ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch); ASSERT(image.GetType() != AmdGpu::ImageType::Color2DMsaa && diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index 3004d2b86..ca2e9ceb9 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -40,7 +40,8 @@ union TextureInstInfo { BitField<6, 2, u32> gather_comp; BitField<8, 1, u32> has_derivatives; BitField<9, 1, u32> is_array; - BitField<10, 1, u32> is_gather; + BitField<10, 1, u32> is_unnormalized; + BitField<11, 1, u32> is_gather; }; union BufferInstInfo { diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 2a3bd62f4..9b5dd8fa1 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -31,6 +31,7 @@ struct BufferSpecialization { struct TextureBufferSpecialization { bool is_integer = false; + u32 dst_select = 0; auto operator<=>(const TextureBufferSpecialization&) const = default; }; @@ -38,8 +39,12 @@ struct TextureBufferSpecialization { struct ImageSpecialization { AmdGpu::ImageType type = AmdGpu::ImageType::Color2D; bool is_integer = false; + u32 dst_select = 0; - auto operator<=>(const ImageSpecialization&) const = default; + bool operator==(const ImageSpecialization& other) const { + return type == other.type && is_integer == other.is_integer && + (dst_select != 0 ? dst_select == other.dst_select : true); + } }; struct FMaskSpecialization { @@ -49,6 +54,12 @@ struct FMaskSpecialization { auto operator<=>(const FMaskSpecialization&) const = default; }; +struct SamplerSpecialization { + bool force_unnormalized = false; + + auto operator<=>(const SamplerSpecialization&) const = default; +}; + /** * Alongside runtime information, this structure also checks bound resources * for compatibility. Can be used as a key for storing shader permutations. @@ -67,6 +78,7 @@ struct StageSpecialization { boost::container::small_vector tex_buffers; boost::container::small_vector images; boost::container::small_vector fmasks; + boost::container::small_vector samplers; Backend::Bindings start{}; explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_, @@ -96,17 +108,25 @@ struct StageSpecialization { ForEachSharp(binding, tex_buffers, info->texture_buffers, [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); + spec.dst_select = sharp.DstSelect(); }); ForEachSharp(binding, images, info->images, [](auto& spec, const auto& desc, AmdGpu::Image sharp) { spec.type = sharp.GetBoundType(); spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); + if (desc.is_storage) { + spec.dst_select = sharp.DstSelect(); + } }); ForEachSharp(binding, fmasks, info->fmasks, [](auto& spec, const auto& desc, AmdGpu::Image sharp) { spec.width = sharp.width; spec.height = sharp.height; }); + ForEachSharp(samplers, info->samplers, + [](auto& spec, const auto& desc, AmdGpu::Sampler sharp) { + spec.force_unnormalized = sharp.force_unnormalized; + }); } void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) { @@ -175,6 +195,11 @@ struct StageSpecialization { return false; } } + for (u32 i = 0; i < samplers.size(); i++) { + if (samplers[i] != other.samplers[i]) { + return false; + } + } return true; } }; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index ca3b01612..9bc3454d8 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -431,6 +431,10 @@ struct Liverpool { return u64(z_read_base) << 8; } + u64 StencilAddress() const { + return u64(stencil_read_base) << 8; + } + u32 NumSamples() const { return 1u << z_info.num_samples; // spec doesn't say it is a log2 } diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index ba87425f2..5d7417559 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -52,6 +52,10 @@ struct Buffer { return std::memcmp(this, &other, sizeof(Buffer)) == 0; } + u32 DstSelect() const { + return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9); + } + CompSwizzle GetSwizzle(u32 comp) const noexcept { const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w}; return static_cast(select[comp]); @@ -204,6 +208,11 @@ struct Image { return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9); } + CompSwizzle GetSwizzle(u32 comp) const noexcept { + const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w}; + return static_cast(select[comp]); + } + static char SelectComp(u32 sel) { switch (sel) { case 0: diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index fa8d28ba0..ec0bb3bb7 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -699,15 +699,6 @@ vk::Format AdjustColorBufferFormat(vk::Format base_format, default: break; } - } else if (comp_swap_reverse) { - switch (base_format) { - case vk::Format::eR8G8B8A8Unorm: - return vk::Format::eA8B8G8R8UnormPack32; - case vk::Format::eR8G8B8A8Srgb: - return vk::Format::eA8B8G8R8SrgbPack32; - default: - break; - } } return base_format; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 9abf1b527..eb2ef3600 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -616,18 +616,24 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin auto& [image_id, desc] = image_bindings.emplace_back(std::piecewise_construct, std::tuple{}, std::tuple{tsharp, image_desc}); image_id = texture_cache.FindImage(desc); - auto& image = texture_cache.GetImage(image_id); - if (image.binding.is_bound) { + auto* image = &texture_cache.GetImage(image_id); + if (image->depth_id) { + // If this image has an associated depth image, it's a stencil attachment. + // Redirect the access to the actual depth-stencil buffer. + image_id = image->depth_id; + image = &texture_cache.GetImage(image_id); + } + if (image->binding.is_bound) { // The image is already bound. In case if it is about to be used as storage we need // to force general layout on it. - image.binding.force_general |= image_desc.is_storage; + image->binding.force_general |= image_desc.is_storage; } - if (image.binding.is_target) { + if (image->binding.is_target) { // The image is already bound as target. Since we read and output to it need to force // general layout too. - image.binding.force_general = 1u; + image->binding.force_general = 1u; } - image.binding.is_bound = 1u; + image->binding.is_bound = 1u; } // Second pass to re-bind images that were updated after binding diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index e7e1ce1da..03339d280 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -145,8 +145,10 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, const ImageInfo& info_) : instance{&instance_}, scheduler{&scheduler_}, info{info_}, image{instance->GetDevice(), instance->GetAllocator()} { + if (info.pixel_format == vk::Format::eUndefined) { + return; + } mip_hashes.resize(info.resources.levels); - ASSERT(info.pixel_format != vk::Format::eUndefined); // Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case // the texture cache should re-create the resource with the usage requested vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat | diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index a1b1b007f..473dd731e 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -92,6 +92,10 @@ struct Image { return image_view_ids[std::distance(image_view_infos.begin(), it)]; } + void AssociateDepth(ImageId image_id) { + depth_id = image_id; + } + boost::container::small_vector GetBarriers( vk::ImageLayout dst_layout, vk::Flags dst_mask, vk::PipelineStageFlags2 dst_stage, std::optional subres_range); @@ -116,6 +120,7 @@ struct Image { VAddr track_addr_end = 0; std::vector image_view_infos; std::vector image_view_ids; + ImageId depth_id{}; // Resource state tracking struct { diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 0ed36ee39..1445d41cd 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -298,6 +298,9 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice resources.layers = num_slices; meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0; + stencil_addr = buffer.StencilAddress(); + stencil_size = pitch * size.height * sizeof(u8); + guest_address = buffer.Address(); const auto depth_slice_sz = buffer.GetDepthSliceSize(); guest_size_bytes = depth_slice_sz * num_slices; diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index e12ae3be1..a657310a8 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -69,7 +69,7 @@ struct ImageInfo { } props{}; // Surface properties with impact on various calculation factors vk::Format pixel_format = vk::Format::eUndefined; - vk::ImageType type = vk::ImageType::e1D; + vk::ImageType type = vk::ImageType::e2D; SubresourceExtent resources; Extent3D size{1, 1, 1}; u32 num_bits{}; diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 61f1aaafe..cc467e9a4 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -50,34 +50,6 @@ vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) { } } -bool IsIdentityMapping(u32 dst_sel, u32 num_components) { - return (num_components == 1 && dst_sel == 0b001'000'000'100) || - (num_components == 2 && dst_sel == 0b001'000'101'100) || - (num_components == 3 && dst_sel == 0b001'110'101'100) || - (num_components == 4 && dst_sel == 0b111'110'101'100); -} - -vk::Format TrySwizzleFormat(vk::Format format, u32 dst_sel) { - // BGRA - if (dst_sel == 0b111100101110) { - switch (format) { - case vk::Format::eR8G8B8A8Unorm: - return vk::Format::eB8G8R8A8Unorm; - case vk::Format::eR8G8B8A8Snorm: - return vk::Format::eB8G8R8A8Snorm; - case vk::Format::eR8G8B8A8Uint: - return vk::Format::eB8G8R8A8Uint; - case vk::Format::eR8G8B8A8Sint: - return vk::Format::eB8G8R8A8Sint; - case vk::Format::eR8G8B8A8Srgb: - return vk::Format::eB8G8R8A8Srgb; - default: - break; - } - } - return format; -} - ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept : is_storage{desc.is_storage} { const auto dfmt = image.GetDataFmt(); @@ -120,17 +92,6 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageReso mapping.b = ConvertComponentSwizzle(image.dst_sel_z); mapping.a = ConvertComponentSwizzle(image.dst_sel_w); } - // Check for unfortunate case of storage images being swizzled - const u32 num_comps = AmdGpu::NumComponents(image.GetDataFmt()); - const u32 dst_sel = image.DstSelect(); - if (is_storage && !IsIdentityMapping(dst_sel, num_comps)) { - if (auto new_format = TrySwizzleFormat(format, dst_sel); new_format != format) { - format = new_format; - return; - } - LOG_ERROR(Render_Vulkan, "Storage image (num_comps = {}) requires swizzling {}", num_comps, - image.DstSelectName()); - } } ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer) noexcept { @@ -170,7 +131,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info format = image.info.pixel_format; aspect = vk::ImageAspectFlagBits::eDepth; } - if (image.aspect_mask & vk::ImageAspectFlagBits::eStencil && format == vk::Format::eR8Unorm) { + if (image.aspect_mask & vk::ImageAspectFlagBits::eStencil && format == vk::Format::eR8Uint) { format = image.info.pixel_format; aspect = vk::ImageAspectFlagBits::eStencil; } diff --git a/src/video_core/texture_cache/sampler.cpp b/src/video_core/texture_cache/sampler.cpp index e47f53abf..9f4bc7a7e 100644 --- a/src/video_core/texture_cache/sampler.cpp +++ b/src/video_core/texture_cache/sampler.cpp @@ -25,7 +25,7 @@ Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sample .minLod = sampler.MinLod(), .maxLod = sampler.MaxLod(), .borderColor = LiverpoolToVK::BorderColor(sampler.border_color_type), - .unnormalizedCoordinates = bool(sampler.force_unnormalized), + .unnormalizedCoordinates = false, // Handled in shader due to Vulkan limitations. }; auto [sampler_result, smplr] = instance.GetDevice().createSamplerUnique(sampler_ci); ASSERT_MSG(sampler_result == vk::Result::eSuccess, "Failed to create sampler: {}", diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 153314d2b..897d6f67e 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -443,6 +443,27 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) { } } + // If there is a stencil attachment, link depth and stencil. + if (desc.info.stencil_addr != 0) { + ImageId stencil_id{}; + ForEachImageInRegion(desc.info.stencil_addr, desc.info.stencil_size, + [&](ImageId image_id, Image& image) { + if (image.info.guest_address == desc.info.stencil_addr) { + stencil_id = image_id; + } + }); + if (!stencil_id) { + ImageInfo info{}; + info.guest_address = desc.info.stencil_addr; + info.guest_size_bytes = desc.info.stencil_size; + info.size = desc.info.size; + stencil_id = slot_images.insert(instance, scheduler, info); + RegisterImage(stencil_id); + } + Image& image = slot_images[stencil_id]; + image.AssociateDepth(image_id); + } + return RegisterImageView(image_id, desc.view_info); }