diff --git a/CMakeLists.txt b/CMakeLists.txt index d92638ab0..f522837fc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1216,7 +1216,7 @@ if (APPLE) if (ARCHITECTURE STREQUAL "x86_64") # Reserve system-managed memory space. - target_link_options(shadps4 PRIVATE -Wl,-ld_classic,-no_pie,-no_fixup_chains,-no_huge,-pagezero_size,0x4000,-segaddr,TCB_SPACE,0x4000,-segaddr,SYSTEM_MANAGED,0x400000,-segaddr,SYSTEM_RESERVED,0x7FFFFC000,-image_base,0x20000000000) + target_link_options(shadps4 PRIVATE -Wl,-ld_classic,-no_pie,-no_fixup_chains,-no_huge,-pagezero_size,0x4000,-segaddr,TCB_SPACE,0x4000,-segaddr,SYSTEM_MANAGED,0x400000,-segaddr,SYSTEM_RESERVED,0x7FFFFC000,-segaddr,USER_AREA,0x7000000000,-image_base,0x700000000000) endif() # Replacement for std::chrono::time_zone @@ -1266,6 +1266,13 @@ if (WIN32) else() target_link_options(shadps4 PRIVATE -Wl,--stack,2097152) endif() + + # Change base image address + if (MSVC) + target_link_options(shadps4 PRIVATE /BASE:0x700000000000) + else() + target_link_options(shadps4 PRIVATE -Wl,--image-base=0x700000000000) + endif() endif() if (WIN32) diff --git a/src/core/address_space.cpp b/src/core/address_space.cpp index 5ded48df4..8fd857b0f 100644 --- a/src/core/address_space.cpp +++ b/src/core/address_space.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include #include "common/alignment.h" #include "common/arch.h" #include "common/assert.h" @@ -24,11 +23,38 @@ // Reserve space for the system address space using a zerofill section. asm(".zerofill SYSTEM_MANAGED,SYSTEM_MANAGED,__SYSTEM_MANAGED,0x7FFBFC000"); asm(".zerofill SYSTEM_RESERVED,SYSTEM_RESERVED,__SYSTEM_RESERVED,0x7C0004000"); +asm(".zerofill USER_AREA,USER_AREA,__USER_AREA,0x5F9000000000"); #endif namespace Core { -static size_t BackingSize = ORBIS_KERNEL_TOTAL_MEM_DEV_PRO; +// Constants used for mapping address space. +constexpr VAddr SYSTEM_MANAGED_MIN = 0x400000ULL; +constexpr VAddr SYSTEM_MANAGED_MAX = 0x7FFFFBFFFULL; +constexpr VAddr SYSTEM_RESERVED_MIN = 0x7FFFFC000ULL; +#if defined(__APPLE__) && defined(ARCH_X86_64) +// Commpage ranges from 0xFC0000000 - 0xFFFFFFFFF, so decrease the system reserved maximum. +constexpr VAddr SYSTEM_RESERVED_MAX = 0xFBFFFFFFFULL; +// GPU-reserved memory ranges from 0x1000000000 - 0x6FFFFFFFFF, so increase the user minimum. +constexpr VAddr USER_MIN = 0x7000000000ULL; +#else +constexpr VAddr SYSTEM_RESERVED_MAX = 0xFFFFFFFFFULL; +constexpr VAddr USER_MIN = 0x1000000000ULL; +#endif +#if defined(__linux__) +// Linux maps the shadPS4 executable around here, so limit the user maximum +constexpr VAddr USER_MAX = 0x54FFFFFFFFFFULL; +#else +constexpr VAddr USER_MAX = 0x5FFFFFFFFFFFULL; +#endif + +// Constants for the sizes of the ranges in address space. +static constexpr u64 SystemManagedSize = SYSTEM_MANAGED_MAX - SYSTEM_MANAGED_MIN + 1; +static constexpr u64 SystemReservedSize = SYSTEM_RESERVED_MAX - SYSTEM_RESERVED_MIN + 1; +static constexpr u64 UserSize = USER_MAX - USER_MIN + 1; + +// Required backing file size for mapping physical address space. +static u64 BackingSize = ORBIS_KERNEL_TOTAL_MEM_DEV_PRO; #ifdef _WIN32 @@ -72,68 +98,95 @@ struct MemoryRegion { struct AddressSpace::Impl { Impl() : process{GetCurrentProcess()} { - BackingSize += Config::getExtraDmemInMbytes() * 1_MB; - // Allocate virtual address placeholder for our address space. - MEM_ADDRESS_REQUIREMENTS req{}; - MEM_EXTENDED_PARAMETER param{}; - req.LowestStartingAddress = reinterpret_cast(SYSTEM_MANAGED_MIN); - // The ending address must align to page boundary - 1 - // https://stackoverflow.com/questions/54223343/virtualalloc2-with-memextendedparameteraddressrequirements-always-produces-error - req.HighestEndingAddress = reinterpret_cast(USER_MIN + UserSize - 1); - req.Alignment = 0; - param.Type = MemExtendedParameterAddressRequirements; - param.Pointer = &req; + // Determine the system's page alignment + SYSTEM_INFO sys_info{}; + GetSystemInfo(&sys_info); + u64 alignment = sys_info.dwAllocationGranularity; - // Typically, lower parts of system managed area is already reserved in windows. - // If reservation fails attempt again by reducing the area size a little bit. - // System managed is about 31GB in size so also cap the number of times we can reduce it - // to a reasonable amount. - static constexpr size_t ReductionOnFail = 1_GB; - static constexpr size_t MaxReductions = 10; + // Determine the host OS build number + // Retrieve module handle for ntdll + auto ntdll_handle = GetModuleHandleW(L"ntdll.dll"); + ASSERT_MSG(ntdll_handle, "Failed to retrieve ntdll handle"); - size_t virtual_size = SystemManagedSize + SystemReservedSize + UserSize; - for (u32 i = 0; i < MaxReductions; i++) { - virtual_base = static_cast(VirtualAlloc2(process, NULL, virtual_size, - MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, - PAGE_NOACCESS, ¶m, 1)); - if (virtual_base) { - break; - } - virtual_size -= ReductionOnFail; + // Get the RtlGetVersion function + s64(WINAPI * RtlGetVersion)(LPOSVERSIONINFOW); + *(FARPROC*)&RtlGetVersion = GetProcAddress(ntdll_handle, "RtlGetVersion"); + ASSERT_MSG(RtlGetVersion, "failed to retrieve function pointer for RtlGetVersion"); + + // Call RtlGetVersion + RTL_OSVERSIONINFOW os_version_info{}; + RtlGetVersion(&os_version_info); + + u64 supported_user_max = USER_MAX; + static constexpr s32 Windows11BuildNumber = 22000; + if (os_version_info.dwBuildNumber < Windows11BuildNumber) { + // Windows 10 has an issue with VirtualAlloc2 on higher addresses. + // To prevent regressions, limit the maximum address we reserve for this platform. + supported_user_max = 0x11000000000ULL; + LOG_WARNING(Core, "Windows 10 detected, reducing user max to {:#x} to avoid problems", + supported_user_max); } - ASSERT_MSG(virtual_base, "Unable to reserve virtual address space: {}", - Common::GetLastErrorMsg()); + // Determine the free address ranges we can access. + VAddr next_addr = SYSTEM_MANAGED_MIN; + MEMORY_BASIC_INFORMATION info{}; + while (next_addr <= supported_user_max) { + ASSERT_MSG(VirtualQuery(reinterpret_cast(next_addr), &info, sizeof(info)), + "Failed to query memory information for address {:#x}", next_addr); + + // Ensure logic uses values aligned to bage boundaries. + next_addr = reinterpret_cast(info.BaseAddress) + info.RegionSize; + next_addr = Common::AlignUp(next_addr, alignment); + + // Prevent size from going past supported_user_max + u64 size = info.RegionSize; + if (next_addr > supported_user_max) { + size -= (next_addr - supported_user_max); + } + size = Common::AlignDown(size, alignment); + + // Check for free memory areas + // Restrict region size to avoid overly fragmenting the virtual memory space. + if (info.State == MEM_FREE && info.RegionSize > 0x1000000) { + VAddr addr = Common::AlignUp(reinterpret_cast(info.BaseAddress), alignment); + regions.emplace(addr, MemoryRegion{addr, size, false}); + } + } + + // Reserve all detected free regions. + for (auto region : regions) { + auto addr = static_cast(VirtualAlloc2( + process, reinterpret_cast(region.second.base), region.second.size, + MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, PAGE_NOACCESS, NULL, 0)); + // All marked regions should reserve fine since they're free. + ASSERT_MSG(addr, "Unable to reserve virtual address space: {}", + Common::GetLastErrorMsg()); + } + + // Set these constants to ensure code relying on them works. + // These do not fully encapsulate the state of the address space. + system_managed_base = reinterpret_cast(regions.begin()->first); + system_managed_size = SystemManagedSize - (regions.begin()->first - SYSTEM_MANAGED_MIN); system_reserved_base = reinterpret_cast(SYSTEM_RESERVED_MIN); system_reserved_size = SystemReservedSize; - system_managed_base = virtual_base; - system_managed_size = system_reserved_base - virtual_base; user_base = reinterpret_cast(USER_MIN); - user_size = virtual_base + virtual_size - user_base; + user_size = supported_user_max - USER_MIN - 1; - LOG_INFO(Kernel_Vmm, "System managed virtual memory region: {} - {}", - fmt::ptr(system_managed_base), - fmt::ptr(system_managed_base + system_managed_size - 1)); - LOG_INFO(Kernel_Vmm, "System reserved virtual memory region: {} - {}", - fmt::ptr(system_reserved_base), - fmt::ptr(system_reserved_base + system_reserved_size - 1)); - LOG_INFO(Kernel_Vmm, "User virtual memory region: {} - {}", fmt::ptr(user_base), - fmt::ptr(user_base + user_size - 1)); - - // Initializer placeholder tracker - const uintptr_t system_managed_addr = reinterpret_cast(system_managed_base); - regions.emplace(system_managed_addr, - MemoryRegion{system_managed_addr, virtual_size, false}); + // Increase BackingSize to account for config options. + BackingSize += Config::getExtraDmemInMbytes() * 1_MB; // Allocate backing file that represents the total physical memory. backing_handle = CreateFileMapping2(INVALID_HANDLE_VALUE, nullptr, FILE_MAP_ALL_ACCESS, PAGE_EXECUTE_READWRITE, SEC_COMMIT, BackingSize, nullptr, nullptr, 0); + ASSERT_MSG(backing_handle, "{}", Common::GetLastErrorMsg()); // Allocate a virtual memory for the backing file map as placeholder backing_base = static_cast(VirtualAlloc2(process, nullptr, BackingSize, MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, PAGE_NOACCESS, nullptr, 0)); + ASSERT_MSG(backing_base, "{}", Common::GetLastErrorMsg()); + // Map backing placeholder. This will commit the pages void* const ret = MapViewOfFile3(backing_handle, process, backing_base, 0, BackingSize, @@ -377,6 +430,14 @@ struct AddressSpace::Impl { } } + boost::icl::interval_set GetUsableRegions() { + boost::icl::interval_set reserved_regions; + for (auto region : regions) { + reserved_regions.insert({region.second.base, region.second.base + region.second.size}); + } + return reserved_regions; + } + HANDLE process{}; HANDLE backing_handle{}; u8* backing_base{}; @@ -441,36 +502,33 @@ struct AddressSpace::Impl { user_size = UserSize; constexpr int protection_flags = PROT_READ | PROT_WRITE; - constexpr int base_map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; + constexpr int map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED; #if defined(__APPLE__) && defined(ARCH_X86_64) - // On ARM64 Macs under Rosetta 2, we run into limitations due to the commpage from - // 0xFC0000000 - 0xFFFFFFFFF and the GPU carveout region from 0x1000000000 - 0x6FFFFFFFFF. - // We can allocate the system managed region, as well as system reserved if reduced in size - // slightly, but we cannot map the user region where we want, so we must let the OS put it - // wherever possible and hope the game won't rely on its location. - system_managed_base = reinterpret_cast( - mmap(reinterpret_cast(SYSTEM_MANAGED_MIN), system_managed_size, protection_flags, - base_map_flags | MAP_FIXED, -1, 0)); - system_reserved_base = reinterpret_cast( - mmap(reinterpret_cast(SYSTEM_RESERVED_MIN), system_reserved_size, - protection_flags, base_map_flags | MAP_FIXED, -1, 0)); - // Cannot guarantee enough space for these areas at the desired addresses, so not MAP_FIXED. - user_base = reinterpret_cast(mmap(reinterpret_cast(USER_MIN), user_size, - protection_flags, base_map_flags, -1, 0)); + // On ARM64 Macs, we run into limitations due to the commpage from 0xFC0000000 - 0xFFFFFFFFF + // and the GPU carveout region from 0x1000000000 - 0x6FFFFFFFFF. Because this creates gaps + // in the available virtual memory region, we map memory space using three distinct parts. + system_managed_base = + reinterpret_cast(mmap(reinterpret_cast(SYSTEM_MANAGED_MIN), + system_managed_size, protection_flags, map_flags, -1, 0)); + system_reserved_base = + reinterpret_cast(mmap(reinterpret_cast(SYSTEM_RESERVED_MIN), + system_reserved_size, protection_flags, map_flags, -1, 0)); + user_base = reinterpret_cast( + mmap(reinterpret_cast(USER_MIN), user_size, protection_flags, map_flags, -1, 0)); #else const auto virtual_size = system_managed_size + system_reserved_size + user_size; #if defined(ARCH_X86_64) const auto virtual_base = reinterpret_cast(mmap(reinterpret_cast(SYSTEM_MANAGED_MIN), virtual_size, - protection_flags, base_map_flags | MAP_FIXED, -1, 0)); + protection_flags, map_flags, -1, 0)); system_managed_base = virtual_base; system_reserved_base = reinterpret_cast(SYSTEM_RESERVED_MIN); user_base = reinterpret_cast(USER_MIN); #else // Map memory wherever possible and instruction translation can handle offsetting to the // base. - const auto virtual_base = reinterpret_cast( - mmap(nullptr, virtual_size, protection_flags, base_map_flags, -1, 0)); + const auto virtual_base = + reinterpret_cast(mmap(nullptr, virtual_size, protection_flags, map_flags, -1, 0)); system_managed_base = virtual_base; system_reserved_base = virtual_base + SYSTEM_RESERVED_MIN - SYSTEM_MANAGED_MIN; user_base = virtual_base + USER_MIN - SYSTEM_MANAGED_MIN; @@ -661,4 +719,22 @@ void AddressSpace::Protect(VAddr virtual_addr, size_t size, MemoryPermission per return impl->Protect(virtual_addr, size, read, write, execute); } +boost::icl::interval_set AddressSpace::GetUsableRegions() { +#ifdef _WIN32 + // On Windows, we need to obtain the accessible intervals from the implementation's regions. + return impl->GetUsableRegions(); +#else + // On Linux and Mac, the memory space is fully represented by the three major regions + boost::icl::interval_set reserved_regions; + VAddr system_managed_addr = reinterpret_cast(system_managed_base); + VAddr system_reserved_addr = reinterpret_cast(system_reserved_base); + VAddr user_addr = reinterpret_cast(user_base); + + reserved_regions.insert({system_managed_addr, system_managed_addr + system_managed_size}); + reserved_regions.insert({system_reserved_addr, system_reserved_addr + system_reserved_size}); + reserved_regions.insert({user_addr, user_addr + user_size}); + return reserved_regions; +#endif +} + } // namespace Core diff --git a/src/core/address_space.h b/src/core/address_space.h index 85b4c36ac..5c50039bd 100644 --- a/src/core/address_space.h +++ b/src/core/address_space.h @@ -4,6 +4,7 @@ #pragma once #include +#include #include "common/arch.h" #include "common/enum.h" #include "common/types.h" @@ -20,22 +21,6 @@ enum class MemoryPermission : u32 { }; DECLARE_ENUM_FLAG_OPERATORS(MemoryPermission) -constexpr VAddr SYSTEM_MANAGED_MIN = 0x00000400000ULL; -constexpr VAddr SYSTEM_MANAGED_MAX = 0x07FFFFBFFFULL; -constexpr VAddr SYSTEM_RESERVED_MIN = 0x07FFFFC000ULL; -#if defined(__APPLE__) && defined(ARCH_X86_64) -// Can only comfortably reserve the first 0x7C0000000 of system reserved space. -constexpr VAddr SYSTEM_RESERVED_MAX = 0xFBFFFFFFFULL; -#else -constexpr VAddr SYSTEM_RESERVED_MAX = 0xFFFFFFFFFULL; -#endif -constexpr VAddr USER_MIN = 0x1000000000ULL; -constexpr VAddr USER_MAX = 0xFBFFFFFFFFULL; - -static constexpr size_t SystemManagedSize = SYSTEM_MANAGED_MAX - SYSTEM_MANAGED_MIN + 1; -static constexpr size_t SystemReservedSize = SYSTEM_RESERVED_MAX - SYSTEM_RESERVED_MIN + 1; -static constexpr size_t UserSize = 1ULL << 40; - /** * Represents the user virtual address space backed by a dmem memory block */ @@ -100,6 +85,9 @@ public: void Protect(VAddr virtual_addr, size_t size, MemoryPermission perms); + // Returns an interval set containing all usable regions. + boost::icl::interval_set GetUsableRegions(); + private: struct Impl; std::unique_ptr impl; diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 3014a410a..8702be099 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -17,6 +17,7 @@ #include "core/libraries/kernel/process.h" #include "core/libraries/libs.h" #include "core/libraries/videoout/video_out.h" +#include "core/memory.h" #include "core/platform.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" @@ -70,7 +71,9 @@ static bool send_init_packet{true}; // initialize HW state before first game's s static int sdk_version{0}; static u32 asc_next_offs_dw[Liverpool::NumComputeRings]; -static constexpr VAddr tessellation_factors_ring_addr = Core::SYSTEM_RESERVED_MAX - 0xFFFFFFF; + +// This address is initialized in sceGnmGetTheTessellationFactorRingBufferBaseAddress +static VAddr tessellation_factors_ring_addr = -1; static constexpr u32 tessellation_offchip_buffer_size = 0x800000u; static void ResetSubmissionLock(Platform::InterruptId irq) { @@ -997,6 +1000,13 @@ int PS4_SYSV_ABI sceGnmGetShaderStatus() { VAddr PS4_SYSV_ABI sceGnmGetTheTessellationFactorRingBufferBaseAddress() { LOG_TRACE(Lib_GnmDriver, "called"); + if (tessellation_factors_ring_addr == -1) { + auto* memory = Core::Memory::Instance(); + auto& address_space = memory->GetAddressSpace(); + tessellation_factors_ring_addr = address_space.SystemReservedVirtualBase() + + address_space.SystemReservedVirtualSize() - 0xFFFFFFF; + } + return tessellation_factors_ring_addr; } diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp index 8e39a5bf3..f2dd52e35 100644 --- a/src/core/libraries/kernel/threads/pthread.cpp +++ b/src/core/libraries/kernel/threads/pthread.cpp @@ -267,7 +267,7 @@ int PS4_SYSV_ABI posix_pthread_create_name_np(PthreadT* thread, const PthreadAtt new_thread->cancel_async = false; auto* memory = Core::Memory::Instance(); - if (name && memory->IsValidAddress(name)) { + if (name && memory->IsValidMapping(reinterpret_cast(name))) { new_thread->name = name; } else { new_thread->name = fmt::format("Thread{}", new_thread->tid.load()); diff --git a/src/core/libraries/kernel/threads/thread_state.cpp b/src/core/libraries/kernel/threads/thread_state.cpp index 614955b97..a8ad0e322 100644 --- a/src/core/libraries/kernel/threads/thread_state.cpp +++ b/src/core/libraries/kernel/threads/thread_state.cpp @@ -21,9 +21,10 @@ void TcbDtor(Core::Tcb* oldtls); ThreadState::ThreadState() { // Reserve memory for maximum amount of threads allowed. auto* memory = Core::Memory::Instance(); + auto& impl = memory->GetAddressSpace(); static constexpr u32 ThrHeapSize = Common::AlignUp(sizeof(Pthread) * MaxThreads, 16_KB); void* heap_addr{}; - const int ret = memory->MapMemory(&heap_addr, Core::SYSTEM_RESERVED_MIN, ThrHeapSize, + const int ret = memory->MapMemory(&heap_addr, impl.SystemReservedVirtualBase(), ThrHeapSize, Core::MemoryProt::CpuReadWrite, Core::MemoryMapFlags::NoFlags, Core::VMAType::File, "ThrHeap"); ASSERT_MSG(ret == 0, "Unable to allocate thread heap memory {}", ret); diff --git a/src/core/memory.cpp b/src/core/memory.cpp index cfe5ffb26..cac0fbc63 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -15,22 +15,16 @@ namespace Core { MemoryManager::MemoryManager() { - // Insert a virtual memory area that covers the entire area we manage. - const VAddr system_managed_base = impl.SystemManagedVirtualBase(); - const u64 system_managed_size = impl.SystemManagedVirtualSize(); - const VAddr system_reserved_base = impl.SystemReservedVirtualBase(); - const u64 system_reserved_size = impl.SystemReservedVirtualSize(); - const VAddr user_base = impl.UserVirtualBase(); - const u64 user_size = impl.UserVirtualSize(); - vma_map.emplace(system_managed_base, - VirtualMemoryArea{system_managed_base, system_managed_size}); - vma_map.emplace(system_reserved_base, - VirtualMemoryArea{system_reserved_base, system_reserved_size}); - vma_map.emplace(user_base, VirtualMemoryArea{user_base, user_size}); + LOG_INFO(Kernel_Vmm, "Virtual memory space initialized with regions:"); - // Log initialization. - LOG_INFO(Kernel_Vmm, "Usable memory address space: {}_GB", - (system_managed_size + system_reserved_size + user_size) >> 30); + // Construct vma_map using the regions reserved by the address space + auto regions = impl.GetUsableRegions(); + u64 total_usable_space = 0; + for (auto region : regions) { + vma_map.emplace(region.lower(), + VirtualMemoryArea{region.lower(), region.upper() - region.lower()}); + LOG_INFO(Kernel_Vmm, "{:#x} - {:#x}", region.lower(), region.upper()); + } } MemoryManager::~MemoryManager() = default; @@ -81,8 +75,8 @@ u64 MemoryManager::ClampRangeSize(VAddr virtual_addr, u64 size) { return size; } - ASSERT_MSG(IsValidAddress(reinterpret_cast(virtual_addr)), - "Attempted to access invalid address {:#x}", virtual_addr); + ASSERT_MSG(IsValidMapping(virtual_addr), "Attempted to access invalid address {:#x}", + virtual_addr); // Clamp size to the remaining size of the current VMA. auto vma = FindVMA(virtual_addr); @@ -119,8 +113,8 @@ void MemoryManager::SetPrtArea(u32 id, VAddr address, u64 size) { } void MemoryManager::CopySparseMemory(VAddr virtual_addr, u8* dest, u64 size) { - ASSERT_MSG(IsValidAddress(reinterpret_cast(virtual_addr)), - "Attempted to access invalid address {:#x}", virtual_addr); + ASSERT_MSG(IsValidMapping(virtual_addr), "Attempted to access invalid address {:#x}", + virtual_addr); auto vma = FindVMA(virtual_addr); while (size) { @@ -138,9 +132,9 @@ void MemoryManager::CopySparseMemory(VAddr virtual_addr, u8* dest, u64 size) { } bool MemoryManager::TryWriteBacking(void* address, const void* data, u32 num_bytes) { - ASSERT_MSG(IsValidAddress(address), "Attempted to access invalid address {}", - fmt::ptr(address)); const VAddr virtual_addr = std::bit_cast(address); + ASSERT_MSG(IsValidMapping(virtual_addr, num_bytes), "Attempted to access invalid address {:#x}", + virtual_addr); const auto& vma = FindVMA(virtual_addr)->second; if (!HasPhysicalBacking(vma)) { return false; @@ -283,8 +277,8 @@ void MemoryManager::Free(PAddr phys_addr, u64 size) { } s32 MemoryManager::PoolCommit(VAddr virtual_addr, u64 size, MemoryProt prot, s32 mtype) { - ASSERT_MSG(IsValidAddress(reinterpret_cast(virtual_addr)), - "Attempted to access invalid address {:#x}", virtual_addr); + ASSERT_MSG(IsValidMapping(virtual_addr, size), "Attempted to access invalid address {:#x}", + virtual_addr); std::scoped_lock lk{mutex}; // Input addresses to PoolCommit are treated as fixed, and have a constant alignment. @@ -435,8 +429,8 @@ s32 MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, u64 size, Memo // Fixed mapping means the virtual address must exactly match the provided one. // On a PS4, the Fixed flag is ignored if address 0 is provided. if (True(flags & MemoryMapFlags::Fixed) && virtual_addr != 0) { - ASSERT_MSG(IsValidAddress(reinterpret_cast(mapped_addr)), - "Attempted to access invalid address {:#x}", mapped_addr); + ASSERT_MSG(IsValidMapping(mapped_addr, size), "Attempted to access invalid address {:#x}", + mapped_addr); auto vma = FindVMA(mapped_addr)->second; // There's a possible edge case where we're mapping to a partially reserved range. // To account for this, unmap any reserved areas within this mapping range first. @@ -538,15 +532,14 @@ s32 MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, u64 size, Memo s32 MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, u64 size, MemoryProt prot, MemoryMapFlags flags, s32 fd, s64 phys_addr) { VAddr mapped_addr = (virtual_addr == 0) ? impl.SystemManagedVirtualBase() : virtual_addr; - ASSERT_MSG(IsValidAddress(reinterpret_cast(mapped_addr)), - "Attempted to access invalid address {:#x}", mapped_addr); + ASSERT_MSG(IsValidMapping(mapped_addr, size), "Attempted to access invalid address {:#x}", + mapped_addr); std::scoped_lock lk{mutex}; - const u64 size_aligned = Common::AlignUp(size, 16_KB); // Find first free area to map the file. if (False(flags & MemoryMapFlags::Fixed)) { - mapped_addr = SearchFree(mapped_addr, size_aligned, 1); + mapped_addr = SearchFree(mapped_addr, size, 1); if (mapped_addr == -1) { // No suitable memory areas to map to return ORBIS_KERNEL_ERROR_ENOMEM; @@ -575,14 +568,14 @@ s32 MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, u64 size, Memory const auto handle = file->f.GetFileMapping(); - impl.MapFile(mapped_addr, size_aligned, phys_addr, std::bit_cast(prot), handle); + impl.MapFile(mapped_addr, size, phys_addr, std::bit_cast(prot), handle); if (prot >= MemoryProt::GpuRead) { ASSERT_MSG(false, "Files cannot be mapped to GPU memory"); } // Add virtual memory area - auto& new_vma = CarveVMA(mapped_addr, size_aligned)->second; + auto& new_vma = CarveVMA(mapped_addr, size)->second; new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce); new_vma.prot = prot; new_vma.name = "File"; @@ -594,8 +587,8 @@ s32 MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, u64 size, Memory } s32 MemoryManager::PoolDecommit(VAddr virtual_addr, u64 size) { - ASSERT_MSG(IsValidAddress(reinterpret_cast(virtual_addr)), - "Attempted to access invalid address {:#x}", virtual_addr); + ASSERT_MSG(IsValidMapping(virtual_addr, size), "Attempted to access invalid address {:#x}", + virtual_addr); std::scoped_lock lk{mutex}; const auto it = FindVMA(virtual_addr); @@ -656,6 +649,13 @@ s32 MemoryManager::PoolDecommit(VAddr virtual_addr, u64 size) { s32 MemoryManager::UnmapMemory(VAddr virtual_addr, u64 size) { std::scoped_lock lk{mutex}; + if (size == 0) { + return ORBIS_OK; + } + virtual_addr = Common::AlignDown(virtual_addr, 16_KB); + size = Common::AlignUp(size, 16_KB); + ASSERT_MSG(IsValidMapping(virtual_addr, size), "Attempted to access invalid address {:#x}", + virtual_addr); return UnmapMemoryImpl(virtual_addr, size); } @@ -741,11 +741,7 @@ u64 MemoryManager::UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma s32 MemoryManager::UnmapMemoryImpl(VAddr virtual_addr, u64 size) { u64 unmapped_bytes = 0; - virtual_addr = Common::AlignDown(virtual_addr, 16_KB); - size = Common::AlignUp(size, 16_KB); do { - ASSERT_MSG(IsValidAddress(reinterpret_cast(virtual_addr)), - "Attempted to access invalid address {:#x}", virtual_addr); auto it = FindVMA(virtual_addr + unmapped_bytes); auto& vma_base = it->second; auto unmapped = @@ -758,8 +754,7 @@ s32 MemoryManager::UnmapMemoryImpl(VAddr virtual_addr, u64 size) { } s32 MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* prot) { - ASSERT_MSG(IsValidAddress(reinterpret_cast(addr)), - "Attempted to access invalid address {:#x}", addr); + ASSERT_MSG(IsValidMapping(addr), "Attempted to access invalid address {:#x}", addr); std::scoped_lock lk{mutex}; const auto it = FindVMA(addr); @@ -836,6 +831,14 @@ s64 MemoryManager::ProtectBytes(VAddr addr, VirtualMemoryArea& vma_base, u64 siz s32 MemoryManager::Protect(VAddr addr, u64 size, MemoryProt prot) { std::scoped_lock lk{mutex}; + // If size is zero, then there's nothing to protect + if (size == 0) { + return ORBIS_OK; + } + + // Ensure the range to modify is valid + ASSERT_MSG(IsValidMapping(addr, size), "Attempted to access invalid address {:#x}", addr); + // Validate protection flags constexpr static MemoryProt valid_flags = MemoryProt::NoAccess | MemoryProt::CpuRead | MemoryProt::CpuWrite | MemoryProt::CpuExec | @@ -850,8 +853,6 @@ s32 MemoryManager::Protect(VAddr addr, u64 size, MemoryProt prot) { // Protect all VMAs between addr and addr + size. s64 protected_bytes = 0; while (protected_bytes < size) { - ASSERT_MSG(IsValidAddress(reinterpret_cast(addr)), - "Attempted to access invalid address {:#x}", addr); auto it = FindVMA(addr + protected_bytes); auto& vma_base = it->second; auto result = ProtectBytes(addr + protected_bytes, vma_base, size - protected_bytes, prot); @@ -995,6 +996,8 @@ s32 MemoryManager::DirectQueryAvailable(PAddr search_start, PAddr search_end, u6 s32 MemoryManager::SetDirectMemoryType(VAddr addr, u64 size, s32 memory_type) { std::scoped_lock lk{mutex}; + ASSERT_MSG(IsValidMapping(addr, size), "Attempted to access invalid address {:#x}", addr); + // Search through all VMAs covered by the provided range. // We aren't modifying these VMAs, so it's safe to iterate through them. auto remaining_size = size; @@ -1044,7 +1047,7 @@ void MemoryManager::NameVirtualRange(VAddr virtual_addr, u64 size, std::string_v // Addresses are aligned down to the nearest 16_KB auto aligned_addr = Common::AlignDown(virtual_addr, 16_KB); - ASSERT_MSG(IsValidAddress(reinterpret_cast(aligned_addr)), + ASSERT_MSG(IsValidMapping(aligned_addr, aligned_size), "Attempted to access invalid address {:#x}", aligned_addr); auto it = FindVMA(aligned_addr); s64 remaining_size = aligned_size; @@ -1086,8 +1089,7 @@ s32 MemoryManager::GetDirectMemoryType(PAddr addr, s32* directMemoryTypeOut, } s32 MemoryManager::IsStack(VAddr addr, void** start, void** end) { - ASSERT_MSG(IsValidAddress(reinterpret_cast(addr)), - "Attempted to access invalid address {:#x}", addr); + ASSERT_MSG(IsValidMapping(addr), "Attempted to access invalid address {:#x}", addr); const auto& vma = FindVMA(addr)->second; if (vma.IsFree()) { return ORBIS_KERNEL_ERROR_EACCES; @@ -1141,16 +1143,17 @@ void MemoryManager::InvalidateMemory(const VAddr addr, const u64 size) const { } VAddr MemoryManager::SearchFree(VAddr virtual_addr, u64 size, u32 alignment) { - // If the requested address is below the mapped range, start search from the lowest address + // Calculate the minimum and maximum addresses present in our address space. auto min_search_address = impl.SystemManagedVirtualBase(); + auto max_search_address = impl.UserVirtualBase() + impl.UserVirtualSize(); + + // If the requested address is below the mapped range, start search from the lowest address if (virtual_addr < min_search_address) { virtual_addr = min_search_address; } // If the requested address is beyond the maximum our code can handle, throw an assert - auto max_search_address = impl.UserVirtualBase() + impl.UserVirtualSize(); - ASSERT_MSG(virtual_addr <= max_search_address, "Input address {:#x} is out of bounds", - virtual_addr); + ASSERT_MSG(IsValidMapping(virtual_addr), "Input address {:#x} is out of bounds", virtual_addr); // Align up the virtual_addr first. virtual_addr = Common::AlignUp(virtual_addr, alignment); @@ -1161,6 +1164,9 @@ VAddr MemoryManager::SearchFree(VAddr virtual_addr, u64 size, u32 alignment) { return virtual_addr; } + // If we didn't hit the return above, then we know the current VMA isn't suitable + it++; + // Search for the first free VMA that fits our mapping. while (it != vma_map.end()) { if (!it->second.IsFree()) { diff --git a/src/core/memory.h b/src/core/memory.h index 9916b1a27..fcea0fbfc 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -200,11 +200,40 @@ public: return virtual_addr + size < max_gpu_address; } - bool IsValidAddress(const void* addr) const noexcept { - const VAddr virtual_addr = reinterpret_cast(addr); + bool IsValidMapping(const VAddr virtual_addr, const u64 size = 0) { const auto end_it = std::prev(vma_map.end()); const VAddr end_addr = end_it->first + end_it->second.size; - return virtual_addr >= vma_map.begin()->first && virtual_addr < end_addr; + + // If the address fails boundary checks, return early. + if (virtual_addr < vma_map.begin()->first || virtual_addr >= end_addr) { + return false; + } + + // If size is zero and boundary checks succeed, then skip more robust checking + if (size == 0) { + return true; + } + + // Now make sure the full address range is contained in vma_map. + auto vma_handle = FindVMA(virtual_addr); + auto addr_to_check = virtual_addr; + s64 size_to_validate = size; + while (vma_handle != vma_map.end() && size_to_validate > 0) { + const auto offset_in_vma = addr_to_check - vma_handle->second.base; + const auto size_in_vma = vma_handle->second.size - offset_in_vma; + size_to_validate -= size_in_vma; + addr_to_check += size_in_vma; + vma_handle++; + + // Make sure there isn't any gap here + if (size_to_validate > 0 && vma_handle != vma_map.end() && + addr_to_check != vma_handle->second.base) { + return false; + } + } + + // If we reach this point and size to validate is not positive, then this mapping is valid. + return size_to_validate <= 0; } u64 ClampRangeSize(VAddr virtual_addr, u64 size); @@ -301,7 +330,7 @@ private: vma.type == VMAType::Pooled; } - VAddr SearchFree(VAddr virtual_addr, u64 size, u32 alignment = 0); + VAddr SearchFree(VAddr virtual_addr, u64 size, u32 alignment); VMAHandle CarveVMA(VAddr virtual_addr, u64 size);