diff --git a/src/core/address_space.cpp b/src/core/address_space.cpp index 846bb5eb4..1140f6720 100644 --- a/src/core/address_space.cpp +++ b/src/core/address_space.cpp @@ -27,7 +27,7 @@ asm(".zerofill SYSTEM_RESERVED,SYSTEM_RESERVED,__SYSTEM_RESERVED,0x7C0004000"); namespace Core { -static constexpr size_t BackingSize = SCE_KERNEL_TOTAL_MEM_PRO; +static constexpr size_t BackingSize = ORBIS_KERNEL_TOTAL_MEM_DEV_PRO; #ifdef _WIN32 @@ -579,9 +579,9 @@ void AddressSpace::Unmap(VAddr virtual_addr, size_t size, VAddr start_in_vma, VA // the entire allocation and remap the portions outside of the requested unmapping range. impl->Unmap(virtual_addr, size, has_backing && !readonly_file); - // TODO: Determine if any titles require partial unmapping support for flexible allocations. + // TODO: Determine if any titles require partial unmapping support for un-backed allocations. ASSERT_MSG(has_backing || (start_in_vma == 0 && end_in_vma == size), - "Partial unmapping of flexible allocations is not supported"); + "Partial unmapping of un-backed allocations is not supported"); if (start_in_vma != 0) { Map(virtual_addr, start_in_vma, 0, phys_base, is_exec); diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp index 1b3ced76d..325ce2d83 100644 --- a/src/core/libraries/kernel/memory.cpp +++ b/src/core/libraries/kernel/memory.cpp @@ -326,7 +326,7 @@ u32 PS4_SYSV_ABI sceKernelIsAddressSanitizerEnabled() { s32 PS4_SYSV_ABI sceKernelBatchMap(OrbisKernelBatchMapEntry* entries, s32 numEntries, s32* numEntriesOut) { return sceKernelBatchMap2(entries, numEntries, numEntriesOut, - MemoryFlags::SCE_KERNEL_MAP_FIXED); // 0x10, 0x410? + MemoryFlags::ORBIS_KERNEL_MAP_FIXED); // 0x10, 0x410? } s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, s32 numEntries, diff --git a/src/core/libraries/kernel/memory.h b/src/core/libraries/kernel/memory.h index ce4ec64fe..c60c7992d 100644 --- a/src/core/libraries/kernel/memory.h +++ b/src/core/libraries/kernel/memory.h @@ -6,11 +6,14 @@ #include "common/bit_field.h" #include "common/types.h" -constexpr u64 SCE_KERNEL_TOTAL_MEM = 5248_MB; -constexpr u64 SCE_KERNEL_TOTAL_MEM_PRO = 5888_MB; +constexpr u64 ORBIS_KERNEL_TOTAL_MEM = 5248_MB; +constexpr u64 ORBIS_KERNEL_TOTAL_MEM_PRO = 5888_MB; +constexpr u64 ORBIS_KERNEL_TOTAL_MEM_DEV = 6656_MB; +// TODO: This value needs confirmation +constexpr u64 ORBIS_KERNEL_TOTAL_MEM_DEV_PRO = 7936_MB; -constexpr u64 SCE_FLEXIBLE_MEMORY_BASE = 64_MB; -constexpr u64 SCE_FLEXIBLE_MEMORY_SIZE = 512_MB; +constexpr u64 ORBIS_FLEXIBLE_MEMORY_BASE = 64_MB; +constexpr u64 ORBIS_FLEXIBLE_MEMORY_SIZE = 512_MB; namespace Core::Loader { class SymbolsResolver; @@ -19,24 +22,24 @@ class SymbolsResolver; namespace Libraries::Kernel { enum MemoryTypes : u32 { - SCE_KERNEL_WB_ONION = 0, // write - back mode (Onion bus) - SCE_KERNEL_WC_GARLIC = 3, // write - combining mode (Garlic bus) - SCE_KERNEL_WB_GARLIC = 10 // write - back mode (Garlic bus) + ORBIS_KERNEL_WB_ONION = 0, // write - back mode (Onion bus) + ORBIS_KERNEL_WC_GARLIC = 3, // write - combining mode (Garlic bus) + ORBIS_KERNEL_WB_GARLIC = 10 // write - back mode (Garlic bus) }; enum MemoryFlags : u32 { - SCE_KERNEL_MAP_FIXED = 0x0010, // Fixed - SCE_KERNEL_MAP_NO_OVERWRITE = 0x0080, - SCE_KERNEL_MAP_NO_COALESCE = 0x400000 + ORBIS_KERNEL_MAP_FIXED = 0x0010, // Fixed + ORBIS_KERNEL_MAP_NO_OVERWRITE = 0x0080, + ORBIS_KERNEL_MAP_NO_COALESCE = 0x400000 }; enum MemoryProtection : u32 { - SCE_KERNEL_PROT_CPU_READ = 0x01, // Permit reads from the CPU - SCE_KERNEL_PROT_CPU_RW = 0x02, // Permit reads/writes from the CPU - SCE_KERNEL_PROT_CPU_WRITE = 0x02, // Permit reads/writes from the CPU (same) - SCE_KERNEL_PROT_GPU_READ = 0x10, // Permit reads from the GPU - SCE_KERNEL_PROT_GPU_WRITE = 0x20, // Permit writes from the GPU - SCE_KERNEL_PROT_GPU_RW = 0x30 // Permit reads/writes from the GPU + ORBIS_KERNEL_PROT_CPU_READ = 0x01, // Permit reads from the CPU + ORBIS_KERNEL_PROT_CPU_RW = 0x02, // Permit reads/writes from the CPU + ORBIS_KERNEL_PROT_CPU_WRITE = 0x02, // Permit reads/writes from the CPU (same) + ORBIS_KERNEL_PROT_GPU_READ = 0x10, // Permit reads from the GPU + ORBIS_KERNEL_PROT_GPU_WRITE = 0x20, // Permit writes from the GPU + ORBIS_KERNEL_PROT_GPU_RW = 0x30 // Permit reads/writes from the GPU }; enum MemoryOpTypes : u32 { diff --git a/src/core/linker.cpp b/src/core/linker.cpp index ad604b3f0..9dcb5c2f2 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -70,7 +70,7 @@ void Linker::Execute(const std::vector args) { } // Configure the direct and flexible memory regions. - u64 fmem_size = SCE_FLEXIBLE_MEMORY_SIZE; + u64 fmem_size = ORBIS_FLEXIBLE_MEMORY_SIZE; bool use_extended_mem1 = true, use_extended_mem2 = true; const auto* proc_param = GetProcParam(); @@ -83,7 +83,7 @@ void Linker::Execute(const std::vector args) { if (mem_param.size >= offsetof(OrbisKernelMemParam, flexible_memory_size) + sizeof(u64*)) { if (const auto* flexible_size = mem_param.flexible_memory_size) { - fmem_size = *flexible_size + SCE_FLEXIBLE_MEMORY_BASE; + fmem_size = *flexible_size + ORBIS_FLEXIBLE_MEMORY_BASE; } } } diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 55c266d70..eb84e5690 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -38,16 +38,9 @@ MemoryManager::~MemoryManager() = default; void MemoryManager::SetupMemoryRegions(u64 flexible_size, bool use_extended_mem1, bool use_extended_mem2) { const bool is_neo = ::Libraries::Kernel::sceKernelIsNeoMode(); - auto total_size = is_neo ? SCE_KERNEL_TOTAL_MEM_PRO : SCE_KERNEL_TOTAL_MEM; + auto total_size = is_neo ? ORBIS_KERNEL_TOTAL_MEM_PRO : ORBIS_KERNEL_TOTAL_MEM; if (Config::isDevKitConsole()) { - const auto old_size = total_size; - // Assuming 2gb is neo for now, will need to link it with sceKernelIsDevKit - total_size += is_neo ? 2_GB : 768_MB; - LOG_WARNING(Kernel_Vmm, - "Config::isDevKitConsole is enabled! Added additional {:s} of direct memory.", - is_neo ? "2 GB" : "768 MB"); - LOG_WARNING(Kernel_Vmm, "Old Direct Size: {:#x} -> New Direct Size: {:#x}", old_size, - total_size); + total_size = is_neo ? ORBIS_KERNEL_TOTAL_MEM_DEV_PRO : ORBIS_KERNEL_TOTAL_MEM_DEV; } if (!use_extended_mem1 && is_neo) { total_size -= 256_MB; @@ -55,14 +48,21 @@ void MemoryManager::SetupMemoryRegions(u64 flexible_size, bool use_extended_mem1 if (!use_extended_mem2 && !is_neo) { total_size -= 128_MB; } - total_flexible_size = flexible_size - SCE_FLEXIBLE_MEMORY_BASE; + total_flexible_size = flexible_size - ORBIS_FLEXIBLE_MEMORY_BASE; total_direct_size = total_size - flexible_size; - // Insert an area that covers direct memory physical block. + // Insert an area that covers the direct memory physical address block. // Note that this should never be called after direct memory allocations have been made. dmem_map.clear(); dmem_map.emplace(0, DirectMemoryArea{0, total_direct_size}); + // Insert an area that covers the flexible memory physical address block. + // Note that this should never be called after flexible memory allocations have been made. + const auto remaining_physical_space = ORBIS_KERNEL_TOTAL_MEM_DEV_PRO - total_direct_size; + fmem_map.clear(); + fmem_map.emplace(total_direct_size, + FlexibleMemoryArea{total_direct_size, remaining_physical_space}); + LOG_INFO(Kernel_Vmm, "Configured memory regions: flexible size = {:#x}, direct size = {:#x}", total_flexible_size, total_direct_size); } @@ -135,7 +135,7 @@ bool MemoryManager::TryWriteBacking(void* address, const void* data, u32 num_byt fmt::ptr(address)); const VAddr virtual_addr = std::bit_cast(address); const auto& vma = FindVMA(virtual_addr)->second; - if (vma.type != VMAType::Direct) { + if (!HasPhysicalBacking(vma)) { return false; } u8* backing = impl.BackingBase() + vma.phys_base + (virtual_addr - vma.base); @@ -293,10 +293,26 @@ s32 MemoryManager::PoolCommit(VAddr virtual_addr, u64 size, MemoryProt prot) { new_vma.name = "anon"; new_vma.type = Core::VMAType::Pooled; new_vma.is_exec = false; - new_vma.phys_base = 0; + + // Find a suitable physical address + auto handle = dmem_map.begin(); + while (handle != dmem_map.end() && (!handle->second.is_pooled || handle->second.size < size)) { + handle++; + } + + ASSERT_MSG(handle->second.is_pooled, "Out of pooled memory"); + + // Use the start of this area as the physical backing for this mapping. + const auto new_dmem_handle = CarveDmemArea(handle->second.base, size); + auto& new_dmem_area = new_dmem_handle->second; + new_dmem_area.is_free = false; + new_dmem_area.is_pooled = false; + new_dmem_area.is_committed = true; + new_vma.phys_base = new_dmem_area.base; + MergeAdjacent(dmem_map, new_dmem_handle); // Perform the mapping - void* out_addr = impl.Map(mapped_addr, size, alignment, -1, false); + void* out_addr = impl.Map(mapped_addr, size, alignment, new_vma.phys_base, false); TRACK_ALLOC(out_addr, size, "VMEM"); if (IsValidGpuMapping(mapped_addr, size)) { @@ -390,8 +406,27 @@ s32 MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, u64 size, Memo auto& new_vma = new_vma_handle->second; // If type is Flexible, we need to track how much flexible memory is used here. + // We also need to determine a reasonable physical base to perform this mapping at. if (type == VMAType::Flexible) { flexible_usage += size; + + // Find a suitable physical address + auto handle = fmem_map.begin(); + while (handle != fmem_map.end() && + (!handle->second.is_free || handle->second.size < size)) { + handle++; + } + + // Some games will end up fragmenting the flexible address space. + ASSERT_MSG(handle != fmem_map.end() && handle->second.is_free, + "No suitable physical memory areas to map"); + + // We'll use the start of this area as the physical backing for this mapping. + const auto new_fmem_handle = CarveFmemArea(handle->second.base, size); + auto& new_fmem_area = new_fmem_handle->second; + new_fmem_area.is_free = false; + phys_addr = new_fmem_area.base; + MergeAdjacent(fmem_map, new_fmem_handle); } new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce); @@ -506,9 +541,18 @@ s32 MemoryManager::PoolDecommit(VAddr virtual_addr, u64 size) { // Track how much pooled memory is decommitted pool_budget += size; + + // Re-pool the direct memory used by this mapping + const auto unmap_phys_base = phys_base + start_in_vma; + const auto new_dmem_handle = CarveDmemArea(unmap_phys_base, size); + auto& new_dmem_area = new_dmem_handle->second; + new_dmem_area.is_free = false; + new_dmem_area.is_pooled = true; + new_dmem_area.is_committed = false; + MergeAdjacent(dmem_map, new_dmem_handle); } - // Mark region as free and attempt to coalesce it with neighbours. + // Mark region as pool reserved and attempt to coalesce it with neighbours. const auto new_it = CarveVMA(virtual_addr, size); auto& vma = new_it->second; vma.type = VMAType::PoolReserved; @@ -521,7 +565,7 @@ s32 MemoryManager::PoolDecommit(VAddr virtual_addr, u64 size) { if (type != VMAType::PoolReserved) { // Unmap the memory region. impl.Unmap(vma_base_addr, vma_base_size, start_in_vma, start_in_vma + size, phys_base, - is_exec, false, false); + is_exec, true, false); TRACK_FREE(virtual_addr, "VMEM"); } @@ -542,7 +586,7 @@ u64 MemoryManager::UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma const auto start_in_vma = virtual_addr - vma_base_addr; const auto adjusted_size = vma_base_size - start_in_vma < size ? vma_base_size - start_in_vma : size; - const bool has_backing = type == VMAType::Direct || type == VMAType::File; + const bool has_backing = HasPhysicalBacking(vma_base) || type == VMAType::File; const auto prot = vma_base.prot; const bool readonly_file = prot == MemoryProt::CpuRead && type == VMAType::File; @@ -552,6 +596,19 @@ u64 MemoryManager::UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma if (type == VMAType::Flexible) { flexible_usage -= adjusted_size; + + // Now that there is a physical backing used for flexible memory, + // manually erase the contents before unmapping to prevent possible issues. + const auto unmap_hardware_address = impl.BackingBase() + phys_base + start_in_vma; + std::memset(unmap_hardware_address, 0, adjusted_size); + + // Address space unmap needs the physical_base from the start of the vma, + // so calculate the phys_base to unmap from here. + const auto unmap_phys_base = phys_base + start_in_vma; + const auto new_fmem_handle = CarveFmemArea(unmap_phys_base, adjusted_size); + auto& new_fmem_area = new_fmem_handle->second; + new_fmem_area.is_free = true; + MergeAdjacent(fmem_map, new_fmem_handle); } // Mark region as free and attempt to coalesce it with neighbours. @@ -721,7 +778,7 @@ s32 MemoryManager::VirtualQuery(VAddr addr, s32 flags, const auto& vma = it->second; info->start = vma.base; info->end = vma.base + vma.size; - info->offset = vma.phys_base; + info->offset = vma.type == VMAType::Flexible ? 0 : vma.phys_base; info->protection = static_cast(vma.prot); info->is_flexible = vma.type == VMAType::Flexible ? 1 : 0; info->is_direct = vma.type == VMAType::Direct ? 1 : 0; @@ -736,7 +793,7 @@ s32 MemoryManager::VirtualQuery(VAddr addr, s32 flags, ASSERT_MSG(vma.phys_base <= dmem_it->second.GetEnd(), "vma.phys_base is not in dmem_map!"); info->memory_type = dmem_it->second.memory_type; } else { - info->memory_type = ::Libraries::Kernel::SCE_KERNEL_WB_ONION; + info->memory_type = ::Libraries::Kernel::ORBIS_KERNEL_WB_ONION; } return ORBIS_OK; @@ -840,7 +897,6 @@ void MemoryManager::NameVirtualRange(VAddr virtual_addr, u64 size, std::string_v if (remaining_size < it->second.size) { // We should split VMAs here, but this could cause trouble for Windows. // Instead log a warning and name the whole VMA. - // it = CarveVMA(current_addr, remaining_size); LOG_WARNING(Kernel_Vmm, "Trying to partially name a range"); } auto& vma = it->second; @@ -1012,6 +1068,30 @@ MemoryManager::DMemHandle MemoryManager::CarveDmemArea(PAddr addr, u64 size) { return dmem_handle; } +MemoryManager::FMemHandle MemoryManager::CarveFmemArea(PAddr addr, u64 size) { + auto fmem_handle = FindFmemArea(addr); + ASSERT_MSG(addr <= fmem_handle->second.GetEnd(), "Physical address not in fmem_map"); + + const FlexibleMemoryArea& area = fmem_handle->second; + ASSERT_MSG(area.base <= addr, "Adding an allocation to already allocated region"); + + const PAddr start_in_area = addr - area.base; + const PAddr end_in_vma = start_in_area + size; + ASSERT_MSG(end_in_vma <= area.size, "Mapping cannot fit inside free region: size = {:#x}", + size); + + if (end_in_vma != area.size) { + // Split VMA at the end of the allocated region + Split(fmem_handle, end_in_vma); + } + if (start_in_area != 0) { + // Split VMA at the start of the allocated region + fmem_handle = Split(fmem_handle, start_in_area); + } + + return fmem_handle; +} + MemoryManager::VMAHandle MemoryManager::Split(VMAHandle vma_handle, u64 offset_in_vma) { auto& old_vma = vma_handle->second; ASSERT(offset_in_vma < old_vma.size && offset_in_vma > 0); @@ -1021,7 +1101,7 @@ MemoryManager::VMAHandle MemoryManager::Split(VMAHandle vma_handle, u64 offset_i new_vma.base += offset_in_vma; new_vma.size -= offset_in_vma; - if (new_vma.type == VMAType::Direct) { + if (HasPhysicalBacking(new_vma)) { new_vma.phys_base += offset_in_vma; } return vma_map.emplace_hint(std::next(vma_handle), new_vma.base, new_vma); @@ -1039,4 +1119,16 @@ MemoryManager::DMemHandle MemoryManager::Split(DMemHandle dmem_handle, u64 offse return dmem_map.emplace_hint(std::next(dmem_handle), new_area.base, new_area); } +MemoryManager::FMemHandle MemoryManager::Split(FMemHandle fmem_handle, u64 offset_in_area) { + auto& old_area = fmem_handle->second; + ASSERT(offset_in_area < old_area.size && offset_in_area > 0); + + auto new_area = old_area; + old_area.size = offset_in_area; + new_area.base += offset_in_area; + new_area.size -= offset_in_area; + + return fmem_map.emplace_hint(std::next(fmem_handle), new_area.base, new_area); +} + } // namespace Core diff --git a/src/core/memory.h b/src/core/memory.h index 285d7dbed..1fbfc3c4b 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -67,6 +67,7 @@ struct DirectMemoryArea { u64 size = 0; s32 memory_type = 0; bool is_pooled = false; + bool is_committed = false; bool is_free = true; PAddr GetEnd() const { @@ -80,6 +81,27 @@ struct DirectMemoryArea { if (memory_type != next.memory_type) { return false; } + if (is_free != next.is_free || is_pooled != next.is_pooled || + is_committed != next.is_committed) { + return false; + } + return true; + } +}; + +struct FlexibleMemoryArea { + PAddr base = 0; + u64 size = 0; + bool is_free = true; + + PAddr GetEnd() const { + return base + size; + } + + bool CanMergeWith(const FlexibleMemoryArea& next) const { + if (base + size != next.base) { + return false; + } if (is_free != next.is_free) { return false; } @@ -117,7 +139,8 @@ struct VirtualMemoryArea { if (base + size != next.base) { return false; } - if (type == VMAType::Direct && phys_base + size != next.phys_base) { + if ((type == VMAType::Direct || type == VMAType::Flexible || type == VMAType::Pooled) && + phys_base + size != next.phys_base) { return false; } if (prot != next.prot || type != next.type) { @@ -131,6 +154,9 @@ class MemoryManager { using DMemMap = std::map; using DMemHandle = DMemMap::iterator; + using FMemMap = std::map; + using FMemHandle = FMemMap::iterator; + using VMAMap = std::map; using VMAHandle = VMAMap::iterator; @@ -238,6 +264,10 @@ private: return std::prev(dmem_map.upper_bound(target)); } + FMemHandle FindFmemArea(PAddr target) { + return std::prev(fmem_map.upper_bound(target)); + } + template Handle MergeAdjacent(auto& handle_map, Handle iter) { const auto next_vma = std::next(iter); @@ -258,16 +288,25 @@ private: return iter; } + bool HasPhysicalBacking(VirtualMemoryArea vma) { + return vma.type == VMAType::Direct || vma.type == VMAType::Flexible || + vma.type == VMAType::Pooled; + } + VAddr SearchFree(VAddr virtual_addr, u64 size, u32 alignment = 0); VMAHandle CarveVMA(VAddr virtual_addr, u64 size); DMemHandle CarveDmemArea(PAddr addr, u64 size); + FMemHandle CarveFmemArea(PAddr addr, u64 size); + VMAHandle Split(VMAHandle vma_handle, u64 offset_in_vma); DMemHandle Split(DMemHandle dmem_handle, u64 offset_in_area); + FMemHandle Split(FMemHandle fmem_handle, u64 offset_in_area); + u64 UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma_base, u64 size); s32 UnmapMemoryImpl(VAddr virtual_addr, u64 size); @@ -275,6 +314,7 @@ private: private: AddressSpace impl; DMemMap dmem_map; + FMemMap fmem_map; VMAMap vma_map; std::mutex mutex; u64 total_direct_size{};