diff --git a/CMakeLists.txt b/CMakeLists.txt index f55767611..9b10d0e5b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -597,6 +597,8 @@ set(MISC_LIBS src/core/libraries/screenshot/screenshot.cpp src/core/libraries/move/move.h src/core/libraries/ulobjmgr/ulobjmgr.cpp src/core/libraries/ulobjmgr/ulobjmgr.h + src/core/libraries/signin_dialog/signindialog.cpp + src/core/libraries/signin_dialog/signindialog.h ) set(DEV_TOOLS src/core/devtools/layer.cpp diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp index 867d62916..622af93cc 100644 --- a/src/common/logging/filter.cpp +++ b/src/common/logging/filter.cpp @@ -137,6 +137,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) { SUB(Lib, NpParty) \ SUB(Lib, Zlib) \ SUB(Lib, Hmd) \ + SUB(Lib, SigninDialog) \ CLS(Frontend) \ CLS(Render) \ SUB(Render, Vulkan) \ diff --git a/src/common/logging/types.h b/src/common/logging/types.h index e5714a81a..27a87e082 100644 --- a/src/common/logging/types.h +++ b/src/common/logging/types.h @@ -104,6 +104,7 @@ enum class Class : u8 { Lib_NpParty, ///< The LibSceNpParty implementation Lib_Zlib, ///< The LibSceZlib implementation. Lib_Hmd, ///< The LibSceHmd implementation. + Lib_SigninDialog, ///< The LibSigninDialog implementation. Frontend, ///< Emulator UI Render, ///< Video Core Render_Vulkan, ///< Vulkan backend diff --git a/src/core/address_space.h b/src/core/address_space.h index 7ccc2cd1e..d7f3efc75 100644 --- a/src/core/address_space.h +++ b/src/core/address_space.h @@ -19,8 +19,6 @@ enum class MemoryPermission : u32 { }; DECLARE_ENUM_FLAG_OPERATORS(MemoryPermission) -constexpr VAddr CODE_BASE_OFFSET = 0x100000000ULL; - constexpr VAddr SYSTEM_MANAGED_MIN = 0x00000400000ULL; constexpr VAddr SYSTEM_MANAGED_MAX = 0x07FFFFBFFFULL; constexpr VAddr SYSTEM_RESERVED_MIN = 0x07FFFFC000ULL; diff --git a/src/core/cpu_patches.cpp b/src/core/cpu_patches.cpp index c8106b270..8937ef04b 100644 --- a/src/core/cpu_patches.cpp +++ b/src/core/cpu_patches.cpp @@ -464,9 +464,8 @@ static std::pair TryPatch(u8* code, PatchModule* module) { if (needs_trampoline && instruction.length < 5) { // Trampoline is needed but instruction is too short to patch. - // Return false and length to fall back to the illegal instruction handler, - // or to signal to AOT compilation that this instruction should be skipped and - // handled at runtime. + // Return false and length to signal to AOT compilation that this instruction + // should be skipped and handled at runtime. return std::make_pair(false, instruction.length); } @@ -512,136 +511,137 @@ static std::pair TryPatch(u8* code, PatchModule* module) { #if defined(ARCH_X86_64) +static bool Is4ByteExtrqOrInsertq(void* code_address) { + u8* bytes = (u8*)code_address; + if (bytes[0] == 0x66 && bytes[1] == 0x0F && bytes[2] == 0x79) { + return true; // extrq + } else if (bytes[0] == 0xF2 && bytes[1] == 0x0F && bytes[2] == 0x79) { + return true; // insertq + } else { + return false; + } +} + static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) { - ZydisDecodedInstruction instruction; - ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; - const auto status = - Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address); + // We need to decode the instruction to find out what it is. Normally we'd use a fully fleshed + // out decoder like Zydis, however Zydis does a bunch of stuff that impact performance that we + // don't care about. We can get information about the instruction a lot faster by writing a mini + // decoder here, since we know it is definitely an extrq or an insertq. If for some reason we + // need to interpret more instructions in the future (I don't see why we would), we can revert + // to using Zydis. + ZydisMnemonic mnemonic; + u8* bytes = (u8*)code_address; + if (bytes[0] == 0x66) { + mnemonic = ZYDIS_MNEMONIC_EXTRQ; + } else if (bytes[0] == 0xF2) { + mnemonic = ZYDIS_MNEMONIC_INSERTQ; + } else { + ZydisDecodedInstruction instruction; + ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; + const auto status = + Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address); + LOG_ERROR(Core, "Unhandled illegal instruction at code address {}: {}", + fmt::ptr(code_address), + ZYAN_SUCCESS(status) ? ZydisMnemonicGetString(instruction.mnemonic) + : "Failed to decode"); + return false; + } - switch (instruction.mnemonic) { + ASSERT(bytes[1] == 0x0F && bytes[2] == 0x79); + + // Note: It's guaranteed that there's no REX prefix in these instructions checked by + // Is4ByteExtrqOrInsertq + u8 modrm = bytes[3]; + u8 rm = modrm & 0b111; + u8 reg = (modrm >> 3) & 0b111; + u8 mod = (modrm >> 6) & 0b11; + + ASSERT(mod == 0b11); // Any instruction we interpret here uses reg/reg addressing only + + int dstIndex = reg; + int srcIndex = rm; + + switch (mnemonic) { case ZYDIS_MNEMONIC_EXTRQ: { - bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE && - operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE; - if (immediateForm) { - LOG_CRITICAL(Core, "EXTRQ immediate form should have been patched at code address: {}", - fmt::ptr(code_address)); - return false; + const auto dst = Common::GetXmmPointer(ctx, dstIndex); + const auto src = Common::GetXmmPointer(ctx, srcIndex); + + u64 lowQWordSrc; + memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc)); + + u64 lowQWordDst; + memcpy(&lowQWordDst, dst, sizeof(lowQWordDst)); + + u64 length = lowQWordSrc & 0x3F; + u64 mask; + if (length == 0) { + length = 64; // for the check below + mask = 0xFFFF'FFFF'FFFF'FFFF; } else { - ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && - operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER && - operands[0].reg.value >= ZYDIS_REGISTER_XMM0 && - operands[0].reg.value <= ZYDIS_REGISTER_XMM15 && - operands[1].reg.value >= ZYDIS_REGISTER_XMM0 && - operands[1].reg.value <= ZYDIS_REGISTER_XMM15, - "Unexpected operand types for EXTRQ instruction"); - - const auto dstIndex = operands[0].reg.value - ZYDIS_REGISTER_XMM0; - const auto srcIndex = operands[1].reg.value - ZYDIS_REGISTER_XMM0; - - const auto dst = Common::GetXmmPointer(ctx, dstIndex); - const auto src = Common::GetXmmPointer(ctx, srcIndex); - - u64 lowQWordSrc; - memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc)); - - u64 lowQWordDst; - memcpy(&lowQWordDst, dst, sizeof(lowQWordDst)); - - u64 length = lowQWordSrc & 0x3F; - u64 mask; - if (length == 0) { - length = 64; // for the check below - mask = 0xFFFF'FFFF'FFFF'FFFF; - } else { - mask = (1ULL << length) - 1; - } - - u64 index = (lowQWordSrc >> 8) & 0x3F; - if (length + index > 64) { - // Undefined behavior if length + index is bigger than 64 according to the spec, - // we'll warn and continue execution. - LOG_TRACE(Core, - "extrq at {} with length {} and index {} is bigger than 64, " - "undefined behavior", - fmt::ptr(code_address), length, index); - } - - lowQWordDst >>= index; - lowQWordDst &= mask; - - memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); - - Common::IncrementRip(ctx, instruction.length); - - return true; + mask = (1ULL << length) - 1; } - break; + + u64 index = (lowQWordSrc >> 8) & 0x3F; + if (length + index > 64) { + // Undefined behavior if length + index is bigger than 64 according to the spec, + // we'll warn and continue execution. + LOG_TRACE(Core, + "extrq at {} with length {} and index {} is bigger than 64, " + "undefined behavior", + fmt::ptr(code_address), length, index); + } + + lowQWordDst >>= index; + lowQWordDst &= mask; + + memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); + + Common::IncrementRip(ctx, 4); + + return true; } case ZYDIS_MNEMONIC_INSERTQ: { - bool immediateForm = operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE && - operands[3].type == ZYDIS_OPERAND_TYPE_IMMEDIATE; - if (immediateForm) { - LOG_CRITICAL(Core, - "INSERTQ immediate form should have been patched at code address: {}", - fmt::ptr(code_address)); - return false; + const auto dst = Common::GetXmmPointer(ctx, dstIndex); + const auto src = Common::GetXmmPointer(ctx, srcIndex); + + u64 lowQWordSrc, highQWordSrc; + memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc)); + memcpy(&highQWordSrc, (u8*)src + 8, sizeof(highQWordSrc)); + + u64 lowQWordDst; + memcpy(&lowQWordDst, dst, sizeof(lowQWordDst)); + + u64 length = highQWordSrc & 0x3F; + u64 mask; + if (length == 0) { + length = 64; // for the check below + mask = 0xFFFF'FFFF'FFFF'FFFF; } else { - ASSERT_MSG(operands[2].type == ZYDIS_OPERAND_TYPE_UNUSED && - operands[3].type == ZYDIS_OPERAND_TYPE_UNUSED, - "operands 2 and 3 must be unused for register form."); - - ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && - operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER, - "operands 0 and 1 must be registers."); - - const auto dstIndex = operands[0].reg.value - ZYDIS_REGISTER_XMM0; - const auto srcIndex = operands[1].reg.value - ZYDIS_REGISTER_XMM0; - - const auto dst = Common::GetXmmPointer(ctx, dstIndex); - const auto src = Common::GetXmmPointer(ctx, srcIndex); - - u64 lowQWordSrc, highQWordSrc; - memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc)); - memcpy(&highQWordSrc, (u8*)src + 8, sizeof(highQWordSrc)); - - u64 lowQWordDst; - memcpy(&lowQWordDst, dst, sizeof(lowQWordDst)); - - u64 length = highQWordSrc & 0x3F; - u64 mask; - if (length == 0) { - length = 64; // for the check below - mask = 0xFFFF'FFFF'FFFF'FFFF; - } else { - mask = (1ULL << length) - 1; - } - - u64 index = (highQWordSrc >> 8) & 0x3F; - if (length + index > 64) { - // Undefined behavior if length + index is bigger than 64 according to the spec, - // we'll warn and continue execution. - LOG_TRACE(Core, - "insertq at {} with length {} and index {} is bigger than 64, " - "undefined behavior", - fmt::ptr(code_address), length, index); - } - - lowQWordSrc &= mask; - lowQWordDst &= ~(mask << index); - lowQWordDst |= lowQWordSrc << index; - - memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); - - Common::IncrementRip(ctx, instruction.length); - - return true; + mask = (1ULL << length) - 1; } - break; + + u64 index = (highQWordSrc >> 8) & 0x3F; + if (length + index > 64) { + // Undefined behavior if length + index is bigger than 64 according to the spec, + // we'll warn and continue execution. + LOG_TRACE(Core, + "insertq at {} with length {} and index {} is bigger than 64, " + "undefined behavior", + fmt::ptr(code_address), length, index); + } + + lowQWordSrc &= mask; + lowQWordDst &= ~(mask << index); + lowQWordDst |= lowQWordSrc << index; + + memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); + + Common::IncrementRip(ctx, 4); + + return true; } default: { - LOG_ERROR(Core, "Unhandled illegal instruction at code address {}: {}", - fmt::ptr(code_address), ZydisMnemonicGetString(instruction.mnemonic)); - return false; + UNREACHABLE(); } } @@ -695,9 +695,22 @@ static bool PatchesAccessViolationHandler(void* context, void* /* fault_address static bool PatchesIllegalInstructionHandler(void* context) { void* code_address = Common::GetRip(context); - if (!TryPatchJit(code_address)) { + if (Is4ByteExtrqOrInsertq(code_address)) { + // The instruction is not big enough for a relative jump, don't try to patch it and pass it + // to our illegal instruction interpreter directly return TryExecuteIllegalInstruction(context, code_address); + } else { + if (!TryPatchJit(code_address)) { + ZydisDecodedInstruction instruction; + ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; + const auto status = + Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address); + LOG_ERROR(Core, "Failed to patch address {:x} -- mnemonic: {}", (u64)code_address, + ZYAN_SUCCESS(status) ? ZydisMnemonicGetString(instruction.mnemonic) + : "Failed to decode"); + } } + return true; } diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 25ac4921c..f2f40e0e3 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -505,9 +505,10 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da u32 flags) { LOG_TRACE(Lib_GnmDriver, "called"); - if ((!sceKernelIsNeoMode() || !UseNeoCompatSequences) && !cmdbuf && (size == 16) && - (shader_stage < ShaderStages::Max) && (vertex_sgpr_offset < 0x10u) && - (instance_sgpr_offset < 0x10u)) { + if ((!sceKernelIsNeoMode() || !UseNeoCompatSequences) && cmdbuf && (size == 16) && + (vertex_sgpr_offset < 0x10u) && (instance_sgpr_offset < 0x10u) && + (shader_stage == ShaderStages::Vs || shader_stage == ShaderStages::Es || + shader_stage == ShaderStages::Ls)) { cmdbuf = WriteHeader(cmdbuf, 2); cmdbuf = WriteBody(cmdbuf, 0u); @@ -535,10 +536,33 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da return -1; } -int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - UNREACHABLE(); - return ORBIS_OK; +int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti(u32* cmdbuf, u32 size, u32 data_offset, u32 max_count, + u32 shader_stage, u32 vertex_sgpr_offset, + u32 instance_sgpr_offset, u32 flags) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (cmdbuf && (size == 11) && (vertex_sgpr_offset < 0x10u) && (instance_sgpr_offset < 0x10u) && + (shader_stage == ShaderStages::Vs || shader_stage == ShaderStages::Es || + shader_stage == ShaderStages::Ls)) { + + const auto predicate = flags & 1 ? PM4Predicate::PredEnable : PM4Predicate::PredDisable; + cmdbuf = WriteHeader( + cmdbuf, 6, PM4ShaderType::ShaderGraphics, predicate); + + const auto sgpr_offset = indirect_sgpr_offsets[shader_stage]; + + cmdbuf[0] = data_offset; + cmdbuf[1] = vertex_sgpr_offset == 0 ? 0 : (vertex_sgpr_offset & 0xffffu) + sgpr_offset; + cmdbuf[2] = instance_sgpr_offset == 0 ? 0 : (instance_sgpr_offset & 0xffffu) + sgpr_offset; + cmdbuf[3] = max_count; + cmdbuf[4] = sizeof(DrawIndexedIndirectArgs); + cmdbuf[5] = sceKernelIsNeoMode() ? flags & 0xe0000000u : 0; + + cmdbuf += 6; + WriteTrailingNop<3>(cmdbuf); + return ORBIS_OK; + } + return -1; } int PS4_SYSV_ABI sceGnmDrawIndexMultiInstanced() { diff --git a/src/core/libraries/gnmdriver/gnmdriver.h b/src/core/libraries/gnmdriver/gnmdriver.h index 94d06c85f..a3d4968d3 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.h +++ b/src/core/libraries/gnmdriver/gnmdriver.h @@ -51,7 +51,9 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da u32 max_count, u64 count_addr, u32 shader_stage, u32 vertex_sgpr_offset, u32 instance_sgpr_offset, u32 flags); -int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti(); +int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti(u32* cmdbuf, u32 size, u32 data_offset, u32 max_count, + u32 shader_stage, u32 vertex_sgpr_offset, + u32 instance_sgpr_offset, u32 flags); int PS4_SYSV_ABI sceGnmDrawIndexMultiInstanced(); s32 PS4_SYSV_ABI sceGnmDrawIndexOffset(u32* cmdbuf, u32 size, u32 index_offset, u32 index_count, u32 flags); diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp index 8a0c91479..495ddc52f 100644 --- a/src/core/libraries/kernel/memory.cpp +++ b/src/core/libraries/kernel/memory.cpp @@ -126,9 +126,6 @@ s32 PS4_SYSV_ABI sceKernelAvailableDirectMemorySize(u64 searchStart, u64 searchE s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtualQueryInfo* info, size_t infoSize) { LOG_INFO(Kernel_Vmm, "called addr = {}, flags = {:#x}", fmt::ptr(addr), flags); - if (!addr) { - return ORBIS_KERNEL_ERROR_EACCES; - } auto* memory = Core::Memory::Instance(); return memory->VirtualQuery(std::bit_cast(addr), flags, info); } @@ -136,7 +133,6 @@ s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtual s32 PS4_SYSV_ABI sceKernelReserveVirtualRange(void** addr, u64 len, int flags, u64 alignment) { LOG_INFO(Kernel_Vmm, "addr = {}, len = {:#x}, flags = {:#x}, alignment = {:#x}", fmt::ptr(*addr), len, flags, alignment); - if (addr == nullptr) { LOG_ERROR(Kernel_Vmm, "Address is invalid!"); return ORBIS_KERNEL_ERROR_EINVAL; @@ -155,9 +151,12 @@ s32 PS4_SYSV_ABI sceKernelReserveVirtualRange(void** addr, u64 len, int flags, u auto* memory = Core::Memory::Instance(); const VAddr in_addr = reinterpret_cast(*addr); const auto map_flags = static_cast(flags); - memory->Reserve(addr, in_addr, len, map_flags, alignment); - return ORBIS_OK; + s32 result = memory->Reserve(addr, in_addr, len, map_flags, alignment); + if (result == 0) { + LOG_INFO(Kernel_Vmm, "out_addr = {}", fmt::ptr(*addr)); + } + return result; } int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, int flags, @@ -172,10 +171,12 @@ int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, i LOG_ERROR(Kernel_Vmm, "Map size is either zero or not 16KB aligned!"); return ORBIS_KERNEL_ERROR_EINVAL; } + if (!Common::Is16KBAligned(directMemoryStart)) { LOG_ERROR(Kernel_Vmm, "Start address is not 16KB aligned!"); return ORBIS_KERNEL_ERROR_EINVAL; } + if (alignment != 0) { if ((!std::has_single_bit(alignment) && !Common::Is16KBAligned(alignment))) { LOG_ERROR(Kernel_Vmm, "Alignment value is invalid!"); @@ -183,14 +184,19 @@ int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, i } } + if (std::strlen(name) >= ORBIS_KERNEL_MAXIMUM_NAME_LENGTH) { + LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!"); + return ORBIS_KERNEL_ERROR_ENAMETOOLONG; + } + const VAddr in_addr = reinterpret_cast(*addr); const auto mem_prot = static_cast(prot); const auto map_flags = static_cast(flags); auto* memory = Core::Memory::Instance(); const auto ret = - memory->MapMemory(addr, in_addr, len, mem_prot, map_flags, Core::VMAType::Direct, "", false, - directMemoryStart, alignment); + memory->MapMemory(addr, in_addr, len, mem_prot, map_flags, Core::VMAType::Direct, name, + false, directMemoryStart, alignment); LOG_INFO(Kernel_Vmm, "out_addr = {}", fmt::ptr(*addr)); return ret; @@ -199,7 +205,8 @@ int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, i int PS4_SYSV_ABI sceKernelMapDirectMemory(void** addr, u64 len, int prot, int flags, s64 directMemoryStart, u64 alignment) { LOG_INFO(Kernel_Vmm, "called, redirected to sceKernelMapNamedDirectMemory"); - return sceKernelMapNamedDirectMemory(addr, len, prot, flags, directMemoryStart, alignment, ""); + return sceKernelMapNamedDirectMemory(addr, len, prot, flags, directMemoryStart, alignment, + "anon"); } s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t len, int prot, @@ -210,17 +217,16 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t return ORBIS_KERNEL_ERROR_EINVAL; } - static constexpr size_t MaxNameSize = 32; - if (std::strlen(name) > MaxNameSize) { - LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!"); - return ORBIS_KERNEL_ERROR_ENAMETOOLONG; - } - if (name == nullptr) { LOG_ERROR(Kernel_Vmm, "name is invalid!"); return ORBIS_KERNEL_ERROR_EFAULT; } + if (std::strlen(name) >= ORBIS_KERNEL_MAXIMUM_NAME_LENGTH) { + LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!"); + return ORBIS_KERNEL_ERROR_ENAMETOOLONG; + } + const VAddr in_addr = reinterpret_cast(*addr_in_out); const auto mem_prot = static_cast(prot); const auto map_flags = static_cast(flags); @@ -236,7 +242,7 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot, int flags) { - return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, ""); + return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, "anon"); } int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot) { @@ -304,7 +310,7 @@ s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEn case MemoryOpTypes::ORBIS_KERNEL_MAP_OP_MAP_DIRECT: { result = sceKernelMapNamedDirectMemory(&entries[i].start, entries[i].length, entries[i].protection, flags, - static_cast(entries[i].offset), 0, ""); + static_cast(entries[i].offset), 0, "anon"); LOG_INFO(Kernel_Vmm, "entry = {}, operation = {}, len = {:#x}, offset = {:#x}, type = {}, " "result = {}", @@ -326,7 +332,7 @@ s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEn } case MemoryOpTypes::ORBIS_KERNEL_MAP_OP_MAP_FLEXIBLE: { result = sceKernelMapNamedFlexibleMemory(&entries[i].start, entries[i].length, - entries[i].protection, flags, ""); + entries[i].protection, flags, "anon"); LOG_INFO(Kernel_Vmm, "entry = {}, operation = {}, len = {:#x}, type = {}, " "result = {}", @@ -356,16 +362,16 @@ s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEn } s32 PS4_SYSV_ABI sceKernelSetVirtualRangeName(const void* addr, size_t len, const char* name) { - static constexpr size_t MaxNameSize = 32; - if (std::strlen(name) > MaxNameSize) { - LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!"); - return ORBIS_KERNEL_ERROR_ENAMETOOLONG; - } - if (name == nullptr) { LOG_ERROR(Kernel_Vmm, "name is invalid!"); return ORBIS_KERNEL_ERROR_EFAULT; } + + if (std::strlen(name) >= ORBIS_KERNEL_MAXIMUM_NAME_LENGTH) { + LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!"); + return ORBIS_KERNEL_ERROR_ENAMETOOLONG; + } + auto* memory = Core::Memory::Instance(); memory->NameVirtualRange(std::bit_cast(addr), len, name); return ORBIS_OK; diff --git a/src/core/libraries/kernel/memory.h b/src/core/libraries/kernel/memory.h index 400b6c3fc..6acb559d1 100644 --- a/src/core/libraries/kernel/memory.h +++ b/src/core/libraries/kernel/memory.h @@ -47,6 +47,8 @@ enum MemoryOpTypes : u32 { ORBIS_KERNEL_MAP_OP_TYPE_PROTECT = 4 }; +constexpr u32 ORBIS_KERNEL_MAXIMUM_NAME_LENGTH = 32; + struct OrbisQueryInfo { uintptr_t start; uintptr_t end; @@ -59,14 +61,12 @@ struct OrbisVirtualQueryInfo { size_t offset; s32 protection; s32 memory_type; - union { - BitField<0, 1, u32> is_flexible; - BitField<1, 1, u32> is_direct; - BitField<2, 1, u32> is_stack; - BitField<3, 1, u32> is_pooled; - BitField<4, 1, u32> is_committed; - }; - std::array name; + u32 is_flexible : 1; + u32 is_direct : 1; + u32 is_stack : 1; + u32 is_pooled : 1; + u32 is_committed : 1; + char name[ORBIS_KERNEL_MAXIMUM_NAME_LENGTH]; }; struct OrbisKernelBatchMapEntry { diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp index c4127ecf2..e791e74bf 100644 --- a/src/core/libraries/kernel/threads/pthread.cpp +++ b/src/core/libraries/kernel/threads/pthread.cpp @@ -289,7 +289,12 @@ int PS4_SYSV_ABI posix_pthread_create_name_np(PthreadT* thread, const PthreadAtt /* Create thread */ new_thread->native_thr = Core::NativeThread(); int ret = new_thread->native_thr.Create(RunThread, new_thread, &new_thread->attr); + ASSERT_MSG(ret == 0, "Failed to create thread with error {}", ret); + + if (attr != nullptr && *attr != nullptr && (*attr)->cpuset != nullptr) { + new_thread->SetAffinity((*attr)->cpuset); + } if (ret) { *thread = nullptr; } @@ -521,6 +526,69 @@ int PS4_SYSV_ABI posix_pthread_setcancelstate(PthreadCancelState state, return 0; } +int Pthread::SetAffinity(const Cpuset* cpuset) { + const auto processor_count = std::thread::hardware_concurrency(); + if (processor_count < 8) { + return 0; + } + if (cpuset == nullptr) { + return POSIX_EINVAL; + } + + u64 mask = cpuset->bits; + + uintptr_t handle = native_thr.GetHandle(); + if (handle == 0) { + return POSIX_ESRCH; + } + + // We don't use this currently because some games gets performance problems + // when applying affinity even on strong hardware + /* + #ifdef _WIN64 + DWORD_PTR affinity_mask = static_cast(mask); + if (!SetThreadAffinityMask(reinterpret_cast(handle), affinity_mask)) { + return POSIX_EINVAL; + } + + #elif defined(__linux__) + cpu_set_t cpu_set; + CPU_ZERO(&cpu_set); + + u64 mask = cpuset->bits; + for (int cpu = 0; cpu < std::min(64, CPU_SETSIZE); ++cpu) { + if (mask & (1ULL << cpu)) { + CPU_SET(cpu, &cpu_set); + } + } + + int result = + pthread_setaffinity_np(static_cast(handle), sizeof(cpu_set_t), &cpu_set); + if (result != 0) { + return POSIX_EINVAL; + } + #endif + */ + return 0; +} + +int PS4_SYSV_ABI posix_pthread_setaffinity_np(PthreadT thread, size_t cpusetsize, + const Cpuset* cpusetp) { + if (thread == nullptr || cpusetp == nullptr) { + return POSIX_EINVAL; + } + thread->attr.cpusetsize = cpusetsize; + return thread->SetAffinity(cpusetp); +} + +int PS4_SYSV_ABI scePthreadSetaffinity(PthreadT thread, const Cpuset mask) { + int result = posix_pthread_setaffinity_np(thread, 0x10, &mask); + if (result != 0) { + return ErrnoToSceKernelError(result); + } + return 0; +} + void RegisterThread(Core::Loader::SymbolsResolver* sym) { // Posix LIB_FUNCTION("Z4QosVuAsA0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_once); @@ -544,6 +612,7 @@ void RegisterThread(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("Z4QosVuAsA0", "libkernel", 1, "libkernel", 1, 1, posix_pthread_once); LIB_FUNCTION("EotR8a3ASf4", "libkernel", 1, "libkernel", 1, 1, posix_pthread_self); LIB_FUNCTION("OxhIB8LB-PQ", "libkernel", 1, "libkernel", 1, 1, posix_pthread_create); + LIB_FUNCTION("5KWrg7-ZqvE", "libkernel", 1, "libkernel", 1, 1, posix_pthread_setaffinity_np); // Orbis LIB_FUNCTION("14bOACANTBo", "libkernel", 1, "libkernel", 1, 1, ORBIS(posix_pthread_once)); @@ -566,6 +635,7 @@ void RegisterThread(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("W0Hpm2X0uPE", "libkernel", 1, "libkernel", 1, 1, ORBIS(posix_pthread_setprio)); LIB_FUNCTION("rNhWz+lvOMU", "libkernel", 1, "libkernel", 1, 1, _sceKernelSetThreadDtors); LIB_FUNCTION("6XG4B33N09g", "libkernel", 1, "libkernel", 1, 1, sched_yield); + LIB_FUNCTION("bt3CTBKmGyI", "libkernel", 1, "libkernel", 1, 1, scePthreadSetaffinity) } } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/threads/pthread.h b/src/core/libraries/kernel/threads/pthread.h index 089156776..09eed11b8 100644 --- a/src/core/libraries/kernel/threads/pthread.h +++ b/src/core/libraries/kernel/threads/pthread.h @@ -332,6 +332,8 @@ struct Pthread { return true; } } + + int SetAffinity(const Cpuset* cpuset); }; using PthreadT = Pthread*; diff --git a/src/core/libraries/libc_internal/printf.h b/src/core/libraries/libc_internal/printf.h index fe63481a0..9c22e922c 100644 --- a/src/core/libraries/libc_internal/printf.h +++ b/src/core/libraries/libc_internal/printf.h @@ -56,7 +56,6 @@ #include #include -#include #include #include #include diff --git a/src/core/libraries/libs.cpp b/src/core/libraries/libs.cpp index 3f5baf640..3826ff793 100644 --- a/src/core/libraries/libs.cpp +++ b/src/core/libraries/libs.cpp @@ -45,6 +45,7 @@ #include "core/libraries/save_data/savedata.h" #include "core/libraries/screenshot/screenshot.h" #include "core/libraries/share_play/shareplay.h" +#include "core/libraries/signin_dialog/signindialog.h" #include "core/libraries/system/commondialog.h" #include "core/libraries/system/msgdialog.h" #include "core/libraries/system/posix.h" @@ -120,6 +121,7 @@ void InitHLELibs(Core::Loader::SymbolsResolver* sym) { Libraries::Hmd::RegisterlibSceHmd(sym); Libraries::DiscMap::RegisterlibSceDiscMap(sym); Libraries::Ulobjmgr::RegisterlibSceUlobjmgr(sym); + Libraries::SigninDialog::RegisterlibSceSigninDialog(sym); } } // namespace Libraries diff --git a/src/core/libraries/signin_dialog/signindialog.cpp b/src/core/libraries/signin_dialog/signindialog.cpp new file mode 100644 index 000000000..0e4eb63a2 --- /dev/null +++ b/src/core/libraries/signin_dialog/signindialog.cpp @@ -0,0 +1,64 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +// Generated By moduleGenerator +#include "common/logging/log.h" +#include "core/libraries/error_codes.h" +#include "core/libraries/libs.h" +#include "signindialog.h" + +namespace Libraries::SigninDialog { + +s32 PS4_SYSV_ABI sceSigninDialogInitialize() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceSigninDialogOpen() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called"); + return ORBIS_OK; +} + +Status PS4_SYSV_ABI sceSigninDialogGetStatus() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called, return 'finished' status"); + return Status::FINISHED; +} + +Status PS4_SYSV_ABI sceSigninDialogUpdateStatus() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called, return 'finished' status"); + return Status::FINISHED; +} + +s32 PS4_SYSV_ABI sceSigninDialogGetResult() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceSigninDialogClose() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceSigninDialogTerminate() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called"); + return ORBIS_OK; +} + +void RegisterlibSceSigninDialog(Core::Loader::SymbolsResolver* sym) { + LIB_FUNCTION("mlYGfmqE3fQ", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogInitialize); + LIB_FUNCTION("JlpJVoRWv7U", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogOpen); + LIB_FUNCTION("2m077aeC+PA", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogGetStatus); + LIB_FUNCTION("Bw31liTFT3A", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogUpdateStatus); + LIB_FUNCTION("nqG7rqnYw1U", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogGetResult); + LIB_FUNCTION("M3OkENHcyiU", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogClose); + LIB_FUNCTION("LXlmS6PvJdU", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogTerminate); +}; + +} // namespace Libraries::SigninDialog diff --git a/src/core/libraries/signin_dialog/signindialog.h b/src/core/libraries/signin_dialog/signindialog.h new file mode 100644 index 000000000..8726ad1f6 --- /dev/null +++ b/src/core/libraries/signin_dialog/signindialog.h @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once +#include "common/types.h" + +namespace Core::Loader { +class SymbolsResolver; +} + +enum class Status : u32 { + NONE = 0, + INITIALIZED = 1, + RUNNING = 2, + FINISHED = 3, +}; + +namespace Libraries::SigninDialog { + +s32 PS4_SYSV_ABI sceSigninDialogInitialize(); +s32 PS4_SYSV_ABI sceSigninDialogOpen(); +Status PS4_SYSV_ABI sceSigninDialogGetStatus(); +Status PS4_SYSV_ABI sceSigninDialogUpdateStatus(); +s32 PS4_SYSV_ABI sceSigninDialogGetResult(); +s32 PS4_SYSV_ABI sceSigninDialogClose(); +s32 PS4_SYSV_ABI sceSigninDialogTerminate(); + +void RegisterlibSceSigninDialog(Core::Loader::SymbolsResolver* sym); +} // namespace Libraries::SigninDialog diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 494ffa70c..9861e813a 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -75,7 +75,8 @@ u64 MemoryManager::ClampRangeSize(VAddr virtual_addr, u64 size) { // Clamp size to the remaining size of the current VMA. auto vma = FindVMA(virtual_addr); - ASSERT_MSG(vma != vma_map.end(), "Attempted to access invalid GPU address {:#x}", virtual_addr); + ASSERT_MSG(vma->second.Contains(virtual_addr, 0), + "Attempted to access invalid GPU address {:#x}", virtual_addr); u64 clamped_size = vma->second.base + vma->second.size - virtual_addr; ++vma; @@ -96,6 +97,8 @@ u64 MemoryManager::ClampRangeSize(VAddr virtual_addr, u64 size) { bool MemoryManager::TryWriteBacking(void* address, const void* data, u32 num_bytes) { const VAddr virtual_addr = std::bit_cast(address); const auto& vma = FindVMA(virtual_addr)->second; + ASSERT_MSG(vma.Contains(virtual_addr, 0), + "Attempting to access out of bounds memory at address {:#x}", virtual_addr); if (vma.type != VMAType::Direct) { return false; } @@ -145,10 +148,12 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size, auto mapping_end = mapping_start + size; // Find the first free, large enough dmem area in the range. - while ((!dmem_area->second.is_free || dmem_area->second.GetEnd() < mapping_end) && - dmem_area != dmem_map.end()) { + while (!dmem_area->second.is_free || dmem_area->second.GetEnd() < mapping_end) { // The current dmem_area isn't suitable, move to the next one. dmem_area++; + if (dmem_area == dmem_map.end()) { + break; + } // Update local variables based on the new dmem_area mapping_start = Common::AlignUp(dmem_area->second.base, alignment); @@ -172,7 +177,6 @@ void MemoryManager::Free(PAddr phys_addr, size_t size) { std::scoped_lock lk{mutex}; auto dmem_area = CarveDmemArea(phys_addr, size); - ASSERT(dmem_area != dmem_map.end() && dmem_area->second.size >= size); // Release any dmem mappings that reference this physical block. std::vector> remove_list; @@ -216,12 +220,18 @@ int MemoryManager::PoolReserve(void** out_addr, VAddr virtual_addr, size_t size, vma = FindVMA(mapped_addr)->second; } const size_t remaining_size = vma.base + vma.size - mapped_addr; - ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size); + ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size, + "Memory region {:#x} to {:#x} is not large enough to reserve {:#x} to {:#x}", + vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size); } // Find the first free area starting with provided virtual address. if (False(flags & MemoryMapFlags::Fixed)) { mapped_addr = SearchFree(mapped_addr, size, alignment); + if (mapped_addr == -1) { + // No suitable memory areas to map to + return ORBIS_KERNEL_ERROR_ENOMEM; + } } // Add virtual memory area @@ -229,7 +239,7 @@ int MemoryManager::PoolReserve(void** out_addr, VAddr virtual_addr, size_t size, auto& new_vma = new_vma_handle->second; new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce); new_vma.prot = MemoryProt::NoAccess; - new_vma.name = ""; + new_vma.name = "anon"; new_vma.type = VMAType::PoolReserved; MergeAdjacent(vma_map, new_vma_handle); @@ -247,19 +257,25 @@ int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, Mem // Fixed mapping means the virtual address must exactly match the provided one. if (True(flags & MemoryMapFlags::Fixed)) { - auto& vma = FindVMA(mapped_addr)->second; + auto vma = FindVMA(mapped_addr)->second; // If the VMA is mapped, unmap the region first. if (vma.IsMapped()) { UnmapMemoryImpl(mapped_addr, size); vma = FindVMA(mapped_addr)->second; } const size_t remaining_size = vma.base + vma.size - mapped_addr; - ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size); + ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size, + "Memory region {:#x} to {:#x} is not large enough to reserve {:#x} to {:#x}", + vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size); } // Find the first free area starting with provided virtual address. if (False(flags & MemoryMapFlags::Fixed)) { mapped_addr = SearchFree(mapped_addr, size, alignment); + if (mapped_addr == -1) { + // No suitable memory areas to map to + return ORBIS_KERNEL_ERROR_ENOMEM; + } } // Add virtual memory area @@ -267,7 +283,7 @@ int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, Mem auto& new_vma = new_vma_handle->second; new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce); new_vma.prot = MemoryProt::NoAccess; - new_vma.name = ""; + new_vma.name = "anon"; new_vma.type = VMAType::Reserved; MergeAdjacent(vma_map, new_vma_handle); @@ -288,7 +304,9 @@ int MemoryManager::PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot) // This should return SCE_KERNEL_ERROR_ENOMEM but shouldn't normally happen. const auto& vma = FindVMA(mapped_addr)->second; const size_t remaining_size = vma.base + vma.size - mapped_addr; - ASSERT_MSG(!vma.IsMapped() && remaining_size >= size); + ASSERT_MSG(!vma.IsMapped() && remaining_size >= size, + "Memory region {:#x} to {:#x} isn't free enough to map region {:#x} to {:#x}", + vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size); // Perform the mapping. void* out_addr = impl.Map(mapped_addr, size, alignment, -1, false); @@ -302,7 +320,10 @@ int MemoryManager::PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot) new_vma.is_exec = false; new_vma.phys_base = 0; - rasterizer->MapMemory(mapped_addr, size); + if (IsValidGpuMapping(mapped_addr, size)) { + rasterizer->MapMemory(mapped_addr, size); + } + return ORBIS_OK; } @@ -325,15 +346,34 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M // Fixed mapping means the virtual address must exactly match the provided one. if (True(flags & MemoryMapFlags::Fixed)) { - // This should return SCE_KERNEL_ERROR_ENOMEM but shouldn't normally happen. - const auto& vma = FindVMA(mapped_addr)->second; - const size_t remaining_size = vma.base + vma.size - mapped_addr; - ASSERT_MSG(!vma.IsMapped() && remaining_size >= size); + auto vma = FindVMA(mapped_addr)->second; + size_t remaining_size = vma.base + vma.size - mapped_addr; + // There's a possible edge case where we're mapping to a partially reserved range. + // To account for this, unmap any reserved areas within this mapping range first. + auto unmap_addr = mapped_addr; + auto unmap_size = size; + while (!vma.IsMapped() && unmap_addr < mapped_addr + size && remaining_size < size) { + auto unmapped = UnmapBytesFromEntry(unmap_addr, vma, unmap_size); + unmap_addr += unmapped; + unmap_size -= unmapped; + vma = FindVMA(unmap_addr)->second; + } + + // This should return SCE_KERNEL_ERROR_ENOMEM but rarely happens. + vma = FindVMA(mapped_addr)->second; + remaining_size = vma.base + vma.size - mapped_addr; + ASSERT_MSG(!vma.IsMapped() && remaining_size >= size, + "Memory region {:#x} to {:#x} isn't free enough to map region {:#x} to {:#x}", + vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size); } // Find the first free area starting with provided virtual address. if (False(flags & MemoryMapFlags::Fixed)) { mapped_addr = SearchFree(mapped_addr, size, alignment); + if (mapped_addr == -1) { + // No suitable memory areas to map to + return ORBIS_KERNEL_ERROR_ENOMEM; + } } // Perform the mapping. @@ -353,7 +393,10 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M if (type == VMAType::Flexible) { flexible_usage += size; } - rasterizer->MapMemory(mapped_addr, size); + + if (IsValidGpuMapping(mapped_addr, size)) { + rasterizer->MapMemory(mapped_addr, size); + } return ORBIS_OK; } @@ -366,12 +409,18 @@ int MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, size_t size, Mem // Find first free area to map the file. if (False(flags & MemoryMapFlags::Fixed)) { mapped_addr = SearchFree(mapped_addr, size_aligned, 1); + if (mapped_addr == -1) { + // No suitable memory areas to map to + return ORBIS_KERNEL_ERROR_ENOMEM; + } } if (True(flags & MemoryMapFlags::Fixed)) { const auto& vma = FindVMA(virtual_addr)->second; const size_t remaining_size = vma.base + vma.size - virtual_addr; - ASSERT_MSG(!vma.IsMapped() && remaining_size >= size); + ASSERT_MSG(!vma.IsMapped() && remaining_size >= size, + "Memory region {:#x} to {:#x} isn't free enough to map region {:#x} to {:#x}", + vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size); } // Map the file. @@ -404,7 +453,9 @@ void MemoryManager::PoolDecommit(VAddr virtual_addr, size_t size) { const auto start_in_vma = virtual_addr - vma_base_addr; const auto type = vma_base.type; - rasterizer->UnmapMemory(virtual_addr, size); + if (IsValidGpuMapping(virtual_addr, size)) { + rasterizer->UnmapMemory(virtual_addr, size); + } // Mark region as free and attempt to coalesce it with neighbours. const auto new_it = CarveVMA(virtual_addr, size); @@ -444,7 +495,10 @@ u64 MemoryManager::UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma if (type == VMAType::Flexible) { flexible_usage -= adjusted_size; } - rasterizer->UnmapMemory(virtual_addr, adjusted_size); + + if (IsValidGpuMapping(virtual_addr, adjusted_size)) { + rasterizer->UnmapMemory(virtual_addr, adjusted_size); + } // Mark region as free and attempt to coalesce it with neighbours. const auto new_it = CarveVMA(virtual_addr, adjusted_size); @@ -471,6 +525,8 @@ s32 MemoryManager::UnmapMemoryImpl(VAddr virtual_addr, u64 size) { do { auto it = FindVMA(virtual_addr + unmapped_bytes); auto& vma_base = it->second; + ASSERT_MSG(vma_base.Contains(virtual_addr + unmapped_bytes, 0), + "Address {:#x} is out of bounds", virtual_addr + unmapped_bytes); auto unmapped = UnmapBytesFromEntry(virtual_addr + unmapped_bytes, vma_base, size - unmapped_bytes); ASSERT_MSG(unmapped > 0, "Failed to unmap memory, progress is impossible"); @@ -485,7 +541,10 @@ int MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* pr const auto it = FindVMA(addr); const auto& vma = it->second; - ASSERT_MSG(vma.type != VMAType::Free, "Provided address is not mapped"); + if (!vma.Contains(addr, 0) || vma.IsFree()) { + LOG_ERROR(Kernel_Vmm, "Address {:#x} is not mapped", addr); + return ORBIS_KERNEL_ERROR_EACCES; + } if (start != nullptr) { *start = reinterpret_cast(vma.base); @@ -555,6 +614,8 @@ s32 MemoryManager::Protect(VAddr addr, size_t size, MemoryProt prot) { do { auto it = FindVMA(addr + protected_bytes); auto& vma_base = it->second; + ASSERT_MSG(vma_base.Contains(addr + protected_bytes, 0), "Address {:#x} is out of bounds", + addr + protected_bytes); auto result = 0; result = ProtectBytes(addr + protected_bytes, vma_base, size - protected_bytes, prot); if (result < 0) { @@ -571,8 +632,16 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags, ::Libraries::Kernel::OrbisVirtualQueryInfo* info) { std::scoped_lock lk{mutex}; - auto it = FindVMA(addr); - if (it->second.type == VMAType::Free && flags == 1) { + // FindVMA on addresses before the vma_map return garbage data. + auto query_addr = + addr < impl.SystemManagedVirtualBase() ? impl.SystemManagedVirtualBase() : addr; + if (addr < query_addr && flags == 0) { + LOG_WARNING(Kernel_Vmm, "VirtualQuery on free memory region"); + return ORBIS_KERNEL_ERROR_EACCES; + } + auto it = FindVMA(query_addr); + + while (it->second.type == VMAType::Free && flags == 1 && it != --vma_map.end()) { ++it; } if (it->second.type == VMAType::Free) { @@ -585,15 +654,17 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags, info->end = vma.base + vma.size; info->offset = vma.phys_base; info->protection = static_cast(vma.prot); - info->is_flexible.Assign(vma.type == VMAType::Flexible); - info->is_direct.Assign(vma.type == VMAType::Direct); - info->is_stack.Assign(vma.type == VMAType::Stack); - info->is_pooled.Assign(vma.type == VMAType::PoolReserved || vma.type == VMAType::Pooled); - info->is_committed.Assign(vma.IsMapped()); - vma.name.copy(info->name.data(), std::min(info->name.size(), vma.name.size())); + info->is_flexible = vma.type == VMAType::Flexible ? 1 : 0; + info->is_direct = vma.type == VMAType::Direct ? 1 : 0; + info->is_stack = vma.type == VMAType::Stack ? 1 : 0; + info->is_pooled = vma.type == VMAType::PoolReserved || vma.type == VMAType::Pooled ? 1 : 0; + info->is_committed = vma.IsMapped() ? 1 : 0; + + strncpy(info->name, vma.name.data(), ::Libraries::Kernel::ORBIS_KERNEL_MAXIMUM_NAME_LENGTH); + if (vma.type == VMAType::Direct) { const auto dmem_it = FindDmemArea(vma.phys_base); - ASSERT(dmem_it != dmem_map.end()); + ASSERT_MSG(vma.phys_base <= dmem_it->second.GetEnd(), "vma.phys_base is not in dmem_map!"); info->memory_type = dmem_it->second.memory_type; } else { info->memory_type = ::Libraries::Kernel::SCE_KERNEL_WB_ONION; @@ -607,11 +678,11 @@ int MemoryManager::DirectMemoryQuery(PAddr addr, bool find_next, std::scoped_lock lk{mutex}; auto dmem_area = FindDmemArea(addr); - while (dmem_area != dmem_map.end() && dmem_area->second.is_free && find_next) { + while (dmem_area != --dmem_map.end() && dmem_area->second.is_free && find_next) { dmem_area++; } - if (dmem_area == dmem_map.end() || dmem_area->second.is_free) { + if (dmem_area->second.is_free) { LOG_ERROR(Core, "Unable to find allocated direct memory region to query!"); return ORBIS_KERNEL_ERROR_EACCES; } @@ -691,36 +762,56 @@ VAddr MemoryManager::SearchFree(VAddr virtual_addr, size_t size, u32 alignment) virtual_addr = min_search_address; } + // If the requested address is beyond the maximum our code can handle, throw an assert + auto max_search_address = impl.UserVirtualBase() + impl.UserVirtualSize(); + ASSERT_MSG(virtual_addr <= max_search_address, "Input address {:#x} is out of bounds", + virtual_addr); + auto it = FindVMA(virtual_addr); - ASSERT_MSG(it != vma_map.end(), "Specified mapping address was not found!"); // If the VMA is free and contains the requested mapping we are done. if (it->second.IsFree() && it->second.Contains(virtual_addr, size)) { return virtual_addr; } + // Search for the first free VMA that fits our mapping. - const auto is_suitable = [&] { + while (it != vma_map.end()) { if (!it->second.IsFree()) { - return false; + it++; + continue; } + const auto& vma = it->second; virtual_addr = Common::AlignUp(vma.base, alignment); // Sometimes the alignment itself might be larger than the VMA. if (virtual_addr > vma.base + vma.size) { - return false; + it++; + continue; } + + // Make sure the address is within our defined bounds + if (virtual_addr >= max_search_address) { + // There are no free mappings within our safely usable address space. + break; + } + + // If there's enough space in the VMA, return the address. const size_t remaining_size = vma.base + vma.size - virtual_addr; - return remaining_size >= size; - }; - while (!is_suitable()) { - ++it; + if (remaining_size >= size) { + return virtual_addr; + } + it++; } - return virtual_addr; + + // Couldn't find a suitable VMA, return an error. + LOG_ERROR(Kernel_Vmm, "Couldn't find a free mapping for address {:#x}, size {:#x}", + virtual_addr, size); + return -1; } MemoryManager::VMAHandle MemoryManager::CarveVMA(VAddr virtual_addr, size_t size) { auto vma_handle = FindVMA(virtual_addr); - ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map"); + ASSERT_MSG(vma_handle->second.Contains(virtual_addr, 0), "Virtual address not in vm_map"); const VirtualMemoryArea& vma = vma_handle->second; ASSERT_MSG(vma.base <= virtual_addr, "Adding a mapping to already mapped region"); @@ -749,7 +840,7 @@ MemoryManager::VMAHandle MemoryManager::CarveVMA(VAddr virtual_addr, size_t size MemoryManager::DMemHandle MemoryManager::CarveDmemArea(PAddr addr, size_t size) { auto dmem_handle = FindDmemArea(addr); - ASSERT_MSG(dmem_handle != dmem_map.end(), "Physical address not in dmem_map"); + ASSERT_MSG(addr <= dmem_handle->second.GetEnd(), "Physical address not in dmem_map"); const DirectMemoryArea& area = dmem_handle->second; ASSERT_MSG(area.base <= addr, "Adding an allocation to already allocated region"); @@ -804,7 +895,7 @@ int MemoryManager::GetDirectMemoryType(PAddr addr, int* directMemoryTypeOut, auto dmem_area = FindDmemArea(addr); - if (dmem_area == dmem_map.end() || dmem_area->second.is_free) { + if (addr > dmem_area->second.GetEnd() || dmem_area->second.is_free) { LOG_ERROR(Core, "Unable to find allocated direct memory region to check type!"); return ORBIS_KERNEL_ERROR_ENOENT; } diff --git a/src/core/memory.h b/src/core/memory.h index a6a55e288..3a204eb96 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -157,6 +157,12 @@ public: return impl.SystemReservedVirtualBase(); } + bool IsValidGpuMapping(VAddr virtual_addr, u64 size) { + // The PS4's GPU can only handle 40 bit addresses. + const VAddr max_gpu_address{0x10000000000}; + return virtual_addr + size < max_gpu_address; + } + bool IsValidAddress(const void* addr) const noexcept { const VAddr virtual_addr = reinterpret_cast(addr); const auto end_it = std::prev(vma_map.end()); @@ -186,7 +192,7 @@ public: int PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot); int MapMemory(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot, - MemoryMapFlags flags, VMAType type, std::string_view name = "", + MemoryMapFlags flags, VMAType type, std::string_view name = "anon", bool is_exec = false, PAddr phys_addr = -1, u64 alignment = 0); int MapFile(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot, diff --git a/src/core/module.cpp b/src/core/module.cpp index cbe44457c..f31bbed6c 100644 --- a/src/core/module.cpp +++ b/src/core/module.cpp @@ -19,8 +19,7 @@ namespace Core { using EntryFunc = PS4_SYSV_ABI int (*)(size_t args, const void* argp, void* param); -static u64 LoadOffset = CODE_BASE_OFFSET; -static constexpr u64 CODE_BASE_INCR = 0x010000000u; +static constexpr u64 ModuleLoadBase = 0x800000000; static u64 GetAlignedSize(const elf_program_header& phdr) { return (phdr.p_align != 0 ? (phdr.p_memsz + (phdr.p_align - 1)) & ~(phdr.p_align - 1) @@ -84,7 +83,7 @@ static std::string StringToNid(std::string_view symbol) { } Module::Module(Core::MemoryManager* memory_, const std::filesystem::path& file_, u32& max_tls_index) - : memory{memory_}, file{file_}, name{file.stem().string()} { + : memory{memory_}, file{file_}, name{file.filename().string()} { elf.Open(file); if (elf.IsElfFile()) { LoadModuleToMemory(max_tls_index); @@ -113,10 +112,8 @@ void Module::LoadModuleToMemory(u32& max_tls_index) { // Map module segments (and possible TLS trampolines) void** out_addr = reinterpret_cast(&base_virtual_addr); - memory->MapMemory(out_addr, memory->SystemReservedVirtualBase() + LoadOffset, - aligned_base_size + TrampolineSize, MemoryProt::CpuReadWrite, - MemoryMapFlags::Fixed, VMAType::Code, name, true); - LoadOffset += CODE_BASE_INCR * (1 + aligned_base_size / CODE_BASE_INCR); + memory->MapMemory(out_addr, ModuleLoadBase, aligned_base_size + TrampolineSize, + MemoryProt::CpuReadWrite, MemoryMapFlags::NoFlags, VMAType::Code, name, true); LOG_INFO(Core_Linker, "Loading module {} to {}", name, fmt::ptr(*out_addr)); #ifdef ARCH_X86_64 @@ -229,7 +226,7 @@ void Module::LoadModuleToMemory(u32& max_tls_index) { LOG_INFO(Core_Linker, "program entry addr ..........: {:#018x}", entry_addr); if (MemoryPatcher::g_eboot_address == 0) { - if (name == "eboot") { + if (name == "eboot.bin") { MemoryPatcher::g_eboot_address = base_virtual_addr; MemoryPatcher::g_eboot_image_size = base_size; MemoryPatcher::OnGameLoaded(); diff --git a/src/core/tls.h b/src/core/tls.h index 6edd6a297..46ca8153b 100644 --- a/src/core/tls.h +++ b/src/core/tls.h @@ -5,6 +5,8 @@ #include "common/types.h" +void* memset(void* ptr, int value, size_t num); + namespace Xbyak { class CodeGenerator; } @@ -41,9 +43,18 @@ Tcb* GetTcbBase(); /// Makes sure TLS is initialized for the thread before entering guest. void EnsureThreadInitialized(); +template +__attribute__((optnone)) void ClearStack() { + volatile void* buf = alloca(size); + memset(const_cast(buf), 0, size); + buf = nullptr; +} + template ReturnType ExecuteGuest(PS4_SYSV_ABI ReturnType (*func)(FuncArgs...), CallArgs&&... args) { EnsureThreadInitialized(); + // clear stack to avoid trash from EnsureThreadInitialized + ClearStack<13_KB>(); return func(std::forward(args)...); } diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index cfc01c58f..5639bc56a 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -143,6 +143,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { case Opcode::IMAGE_SAMPLE_C_LZ: case Opcode::IMAGE_SAMPLE_O: case Opcode::IMAGE_SAMPLE_L_O: + case Opcode::IMAGE_SAMPLE_B_O: case Opcode::IMAGE_SAMPLE_LZ_O: case Opcode::IMAGE_SAMPLE_C_O: case Opcode::IMAGE_SAMPLE_C_LZ_O: diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 784f8b4d2..12e48c8e4 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -281,6 +281,11 @@ constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept // Fall back to null image if unbound. return AmdGpu::Image::Null(); } + const auto data_fmt = image.GetDataFmt(); + if (is_depth && data_fmt != AmdGpu::DataFormat::Format16 && + data_fmt != AmdGpu::DataFormat::Format32) { + return AmdGpu::Image::NullDepth(); + } return image; } diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 1de255e4d..cc0bf83d3 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -363,6 +363,12 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!"); image = AmdGpu::Image::Null(); } + const auto data_fmt = image.GetDataFmt(); + if (inst_info.is_depth && data_fmt != AmdGpu::DataFormat::Format16 && + data_fmt != AmdGpu::DataFormat::Format32) { + LOG_ERROR(Render_Vulkan, "Shader compiled using non-depth image with depth instruction!"); + image = AmdGpu::Image::NullDepth(); + } ASSERT(image.GetType() != AmdGpu::ImageType::Invalid); const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite; diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 517392b98..b8ed42f5b 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -169,10 +169,10 @@ static constexpr u32 MaxColorBuffers = 8; struct PsColorBuffer { AmdGpu::NumberFormat num_format : 4; - AmdGpu::NumberConversion num_conversion : 2; + AmdGpu::NumberConversion num_conversion : 3; AmdGpu::Liverpool::ShaderExportFormat export_format : 4; u32 needs_unorm_fixup : 1; - u32 pad : 21; + u32 pad : 20; AmdGpu::CompMapping swizzle; auto operator<=>(const PsColorBuffer&) const noexcept = default; diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 967b952c6..4c8e3367a 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -455,14 +455,14 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); const auto offset = draw_indirect->data_offset; - const auto size = sizeof(DrawIndirectArgs); + const auto stride = sizeof(DrawIndirectArgs); if (DebugState.DumpingCurrentReg()) { DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } if (rasterizer) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin(fmt::format("gfx:{}:DrawIndirect", cmd_address)); - rasterizer->DrawIndirect(false, indirect_args_addr, offset, size, 1, 0); + rasterizer->DrawIndirect(false, indirect_args_addr, offset, stride, 1, 0); rasterizer->ScopeMarkerEnd(); } break; @@ -471,7 +471,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); const auto offset = draw_index_indirect->data_offset; - const auto size = sizeof(DrawIndexedIndirectArgs); + const auto stride = sizeof(DrawIndexedIndirectArgs); if (DebugState.DumpingCurrentReg()) { DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } @@ -479,25 +479,46 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); rasterizer->ScopeMarkerBegin( fmt::format("gfx:{}:DrawIndexIndirect", cmd_address)); - rasterizer->DrawIndirect(true, indirect_args_addr, offset, size, 1, 0); + rasterizer->DrawIndirect(true, indirect_args_addr, offset, stride, 1, 0); rasterizer->ScopeMarkerEnd(); } break; } - case PM4ItOpcode::DrawIndexIndirectCountMulti: { + case PM4ItOpcode::DrawIndexIndirectMulti: { const auto* draw_index_indirect = reinterpret_cast(header); const auto offset = draw_index_indirect->data_offset; if (DebugState.DumpingCurrentReg()) { DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } + if (rasterizer) { + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin( + fmt::format("gfx:{}:DrawIndexIndirectMulti", cmd_address)); + rasterizer->DrawIndirect(true, indirect_args_addr, offset, + draw_index_indirect->stride, + draw_index_indirect->count, 0); + rasterizer->ScopeMarkerEnd(); + } + break; + } + case PM4ItOpcode::DrawIndexIndirectCountMulti: { + const auto* draw_index_indirect = + reinterpret_cast(header); + const auto offset = draw_index_indirect->data_offset; + if (DebugState.DumpingCurrentReg()) { + DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); + } if (rasterizer) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin( fmt::format("gfx:{}:DrawIndexIndirectCountMulti", cmd_address)); - rasterizer->DrawIndirect( - true, indirect_args_addr, offset, draw_index_indirect->stride, - draw_index_indirect->count, draw_index_indirect->countAddr); + rasterizer->DrawIndirect(true, indirect_args_addr, offset, + draw_index_indirect->stride, + draw_index_indirect->count, + draw_index_indirect->count_indirect_enable.Value() + ? draw_index_indirect->count_addr + : 0); rasterizer->ScopeMarkerEnd(); } break; diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index ae1d32e00..6b55f5b65 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -860,6 +860,24 @@ struct PM4CmdDrawIndexIndirect { }; struct PM4CmdDrawIndexIndirectMulti { + PM4Type3Header header; ///< header + u32 data_offset; ///< Byte aligned offset where the required data structure starts + union { + u32 dw2; + BitField<0, 16, u32> base_vtx_loc; ///< Offset where the CP will write the + ///< BaseVertexLocation it fetched from memory + }; + union { + u32 dw3; + BitField<0, 16, u32> start_inst_loc; ///< Offset where the CP will write the + ///< StartInstanceLocation it fetched from memory + }; + u32 count; ///< Count of data structures to loop through before going to next packet + u32 stride; ///< Stride in memory from one data structure to the next + u32 draw_initiator; ///< Draw Initiator Register +}; + +struct PM4CmdDrawIndexIndirectCountMulti { PM4Type3Header header; ///< header u32 data_offset; ///< Byte aligned offset where the required data structure starts union { @@ -874,14 +892,14 @@ struct PM4CmdDrawIndexIndirectMulti { }; union { u32 dw4; - BitField<0, 16, u32> drawIndexLoc; ///< register offset to write the Draw Index count + BitField<0, 16, u32> draw_index_loc; ///< register offset to write the Draw Index count BitField<30, 1, u32> - countIndirectEnable; ///< Indicates the data structure count is in memory + count_indirect_enable; ///< Indicates the data structure count is in memory BitField<31, 1, u32> - drawIndexEnable; ///< Enables writing of Draw Index count to DRAW_INDEX_LOC + draw_index_enable; ///< Enables writing of Draw Index count to DRAW_INDEX_LOC }; u32 count; ///< Count of data structures to loop through before going to next packet - u64 countAddr; ///< DWord aligned Address[31:2]; Valid if countIndirectEnable is set + u64 count_addr; ///< DWord aligned Address[31:2]; Valid if countIndirectEnable is set u32 stride; ///< Stride in memory from one data structure to the next u32 draw_initiator; ///< Draw Initiator Register }; diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index c387c7bf2..9060074fb 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -219,6 +219,19 @@ struct Image { return image; } + static constexpr Image NullDepth() { + Image image{}; + image.data_format = u64(DataFormat::Format32); + image.num_format = u64(NumberFormat::Float); + image.dst_sel_x = u64(CompSwizzle::Red); + image.dst_sel_y = u64(CompSwizzle::Green); + image.dst_sel_z = u64(CompSwizzle::Blue); + image.dst_sel_w = u64(CompSwizzle::Alpha); + image.tiling_index = u64(TilingMode::Texture_MicroTiled); + image.type = u64(ImageType::Color2D); + return image; + } + bool Valid() const { return (type & 0x8u) != 0; } diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 99f225d79..1004d850f 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -338,6 +338,7 @@ bool Instance::CreateDevice() { .geometryShader = features.geometryShader, .tessellationShader = features.tessellationShader, .logicOp = features.logicOp, + .multiDrawIndirect = features.multiDrawIndirect, .depthBiasClamp = features.depthBiasClamp, .fillModeNonSolid = features.fillModeNonSolid, .depthBounds = features.depthBounds, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 4caa781b9..e7b42a34b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -618,8 +618,9 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin if (instance.IsNullDescriptorSupported()) { image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); } else { - auto& null_image = texture_cache.GetImageView(VideoCore::NULL_IMAGE_VIEW_ID); - image_infos.emplace_back(VK_NULL_HANDLE, *null_image.image_view, + auto& null_image_view = + texture_cache.FindTexture(VideoCore::NULL_IMAGE_ID, desc.view_info); + image_infos.emplace_back(VK_NULL_HANDLE, *null_image_view.image_view, vk::ImageLayout::eGeneral); } } else { diff --git a/src/video_core/texture_cache/image_view.h b/src/video_core/texture_cache/image_view.h index 23c703d23..6a17490bf 100644 --- a/src/video_core/texture_cache/image_view.h +++ b/src/video_core/texture_cache/image_view.h @@ -34,8 +34,6 @@ struct ImageViewInfo { struct Image; -constexpr Common::SlotId NULL_IMAGE_VIEW_ID{0}; - struct ImageView { ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, Image& image, ImageId image_id); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 047bb3dfe..82f4d6413 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -8,6 +8,7 @@ #include "common/debug.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/page_manager.h" +#include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/texture_cache/host_compatibility.h" @@ -23,31 +24,41 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& BufferCache& buffer_cache_, PageManager& tracker_) : instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_}, tile_manager{instance, scheduler} { + // Create basic null image at fixed image ID. + const auto null_id = GetNullImage(vk::Format::eR8G8B8A8Unorm); + ASSERT(null_id.index == NULL_IMAGE_ID.index); +} + +TextureCache::~TextureCache() = default; + +ImageId TextureCache::GetNullImage(const vk::Format format) { + const auto existing_image = null_images.find(format); + if (existing_image != null_images.end()) { + return existing_image->second; + } + ImageInfo info{}; - info.pixel_format = vk::Format::eR8G8B8A8Unorm; + info.pixel_format = format; info.type = vk::ImageType::e2D; - info.tiling_idx = u32(AmdGpu::TilingMode::Texture_MicroTiled); + info.tiling_idx = static_cast(AmdGpu::TilingMode::Texture_MicroTiled); info.num_bits = 32; info.UpdateSize(); + const ImageId null_id = slot_images.insert(instance, scheduler, info); - ASSERT(null_id.index == NULL_IMAGE_ID.index); auto& img = slot_images[null_id]; + const vk::Image& null_image = img.image; - Vulkan::SetObjectName(instance.GetDevice(), null_image, "Null Image"); + Vulkan::SetObjectName(instance.GetDevice(), null_image, + fmt::format("Null Image ({})", vk::to_string(format))); + img.flags = ImageFlagBits::Empty; img.track_addr = img.info.guest_address; img.track_addr_end = img.info.guest_address + img.info.guest_size; - ImageViewInfo view_info; - const auto null_view_id = - slot_image_views.insert(instance, view_info, slot_images[null_id], null_id); - ASSERT(null_view_id.index == NULL_IMAGE_VIEW_ID.index); - const vk::ImageView& null_image_view = slot_image_views[null_view_id].image_view.get(); - Vulkan::SetObjectName(instance.GetDevice(), null_image_view, "Null Image View"); + null_images.emplace(format, null_id); + return null_id; } -TextureCache::~TextureCache() = default; - void TextureCache::MarkAsMaybeDirty(ImageId image_id, Image& image) { if (image.hash == 0) { // Initialize hash @@ -296,7 +307,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) { const auto& info = desc.info; if (info.guest_address == 0) [[unlikely]] { - return NULL_IMAGE_ID; + return GetNullImage(info.pixel_format); } std::scoped_lock lock{mutex}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f262768ea..b6bf88958 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -246,6 +246,9 @@ private: } } + /// Gets or creates a null image for a particular format. + ImageId GetNullImage(vk::Format format); + /// Create an image from the given parameters [[nodiscard]] ImageId InsertImage(const ImageInfo& info, VAddr cpu_addr); @@ -285,6 +288,7 @@ private: Common::SlotVector slot_images; Common::SlotVector slot_image_views; tsl::robin_map samplers; + tsl::robin_map null_images; PageTable page_table; std::mutex mutex;