diff --git a/CMakeLists.txt b/CMakeLists.txt index 70609142f..f09e3a1ed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -679,6 +679,7 @@ set(COMMON src/common/logging/backend.cpp src/common/enum.h src/common/io_file.cpp src/common/io_file.h + src/common/lru_cache.h src/common/error.cpp src/common/error.h src/common/scope_exit.h diff --git a/src/common/lru_cache.h b/src/common/lru_cache.h new file mode 100644 index 000000000..4204fad06 --- /dev/null +++ b/src/common/lru_cache.h @@ -0,0 +1,135 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +#include "common/types.h" + +namespace Common { + +template +class LeastRecentlyUsedCache { + struct Item { + ObjectType obj; + TickType tick; + Item* next{}; + Item* prev{}; + }; + +public: + LeastRecentlyUsedCache() : first_item{}, last_item{} {} + ~LeastRecentlyUsedCache() = default; + + size_t Insert(ObjectType obj, TickType tick) { + const auto new_id = Build(); + auto& item = item_pool[new_id]; + item.obj = obj; + item.tick = tick; + Attach(item); + return new_id; + } + + void Touch(size_t id, TickType tick) { + auto& item = item_pool[id]; + if (item.tick >= tick) { + return; + } + item.tick = tick; + if (&item == last_item) { + return; + } + Detach(item); + Attach(item); + } + + void Free(size_t id) { + auto& item = item_pool[id]; + Detach(item); + item.prev = nullptr; + item.next = nullptr; + free_items.push_back(id); + } + + template + void ForEachItemBelow(TickType tick, Func&& func) { + static constexpr bool RETURNS_BOOL = + std::is_same_v, bool>; + Item* iterator = first_item; + while (iterator) { + if (static_cast(tick) - static_cast(iterator->tick) < 0) { + return; + } + Item* next = iterator->next; + if constexpr (RETURNS_BOOL) { + if (func(iterator->obj)) { + return; + } + } else { + func(iterator->obj); + } + iterator = next; + } + } + +private: + size_t Build() { + if (free_items.empty()) { + const size_t item_id = item_pool.size(); + auto& item = item_pool.emplace_back(); + item.next = nullptr; + item.prev = nullptr; + return item_id; + } + const size_t item_id = free_items.front(); + free_items.pop_front(); + auto& item = item_pool[item_id]; + item.next = nullptr; + item.prev = nullptr; + return item_id; + } + + void Attach(Item& item) { + if (!first_item) { + first_item = &item; + } + if (!last_item) { + last_item = &item; + } else { + item.prev = last_item; + last_item->next = &item; + item.next = nullptr; + last_item = &item; + } + } + + void Detach(Item& item) { + if (item.prev) { + item.prev->next = item.next; + } + if (item.next) { + item.next->prev = item.prev; + } + if (&item == first_item) { + first_item = item.next; + if (first_item) { + first_item->prev = nullptr; + } + } + if (&item == last_item) { + last_item = item.prev; + if (last_item) { + last_item->next = nullptr; + } + } + } + + std::deque item_pool; + std::deque free_items; + Item* first_item{}; + Item* last_item{}; +}; + +} // namespace Common diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 3e66fba6a..bcb869286 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -136,7 +136,7 @@ void Liverpool::Process(std::stop_token stoken) { if (submit_done) { VideoCore::EndCapture(); if (rasterizer) { - rasterizer->EndCommandList(); + rasterizer->OnSubmit(); rasterizer->Flush(); } submit_done = false; diff --git a/src/video_core/buffer_cache/buffer.h b/src/video_core/buffer_cache/buffer.h index a7a0ce84f..b02f8c181 100644 --- a/src/video_core/buffer_cache/buffer.h +++ b/src/video_core/buffer_cache/buffer.h @@ -112,6 +112,14 @@ public: return size_bytes; } + void SetLRUId(u64 id) noexcept { + lru_id = id; + } + + u64 LRUId() const noexcept { + return lru_id; + } + vk::Buffer Handle() const noexcept { return buffer; } @@ -151,6 +159,7 @@ public: bool is_deleted{}; int stream_score = 0; size_t size_bytes = 0; + u64 lru_id = 0; std::span mapped_data; const Vulkan::Instance* instance; Vulkan::Scheduler* scheduler; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index c5e5d18f8..8cbeae87a 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -130,6 +130,26 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s "Fault Buffer Parser Pipeline"); instance.GetDevice().destroyShaderModule(module); + + // Set up garbage collection parameters + if (!instance.CanReportMemoryUsage()) { + trigger_gc_memory = DEFAULT_TRIGGER_GC_MEMORY; + critical_gc_memory = DEFAULT_CRITICAL_GC_MEMORY; + return; + } + + const s64 device_local_memory = static_cast(instance.GetTotalMemoryBudget()); + const s64 min_spacing_expected = device_local_memory - 1_GB; + const s64 min_spacing_critical = device_local_memory - 512_MB; + const s64 mem_threshold = std::min(device_local_memory, TARGET_GC_THRESHOLD); + const s64 min_vacancy_expected = (6 * mem_threshold) / 10; + const s64 min_vacancy_critical = (2 * mem_threshold) / 10; + trigger_gc_memory = static_cast( + std::max(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected), + DEFAULT_TRIGGER_GC_MEMORY)); + critical_gc_memory = static_cast( + std::max(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical), + DEFAULT_CRITICAL_GC_MEMORY)); } BufferCache::~BufferCache() = default; @@ -145,10 +165,11 @@ void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) { void BufferCache::ReadMemory(VAddr device_addr, u64 size, bool is_write) { liverpool->SendCommand([this, device_addr, size, is_write] { Buffer& buffer = slot_buffers[FindBuffer(device_addr, size)]; - DownloadBufferMemory(buffer, device_addr, size, is_write); + DownloadBufferMemory(buffer, device_addr, size, is_write); }); } +template void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write) { boost::container::small_vector copies; u64 total_size_bytes = 0; @@ -183,17 +204,24 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.copyBuffer(buffer.buffer, download_buffer.Handle(), copies); - scheduler.Finish(); - auto* memory = Core::Memory::Instance(); - for (const auto& copy : copies) { - const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset; - const u64 dst_offset = copy.dstOffset - offset; - memory->TryWriteBacking(std::bit_cast(copy_device_addr), download + dst_offset, - copy.size); - } - memory_tracker->UnmarkRegionAsGpuModified(device_addr, size); - if (is_write) { - memory_tracker->MarkRegionAsCpuModified(device_addr, size); + const auto write_data = [&]() { + auto* memory = Core::Memory::Instance(); + for (const auto& copy : copies) { + const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset; + const u64 dst_offset = copy.dstOffset - offset; + memory->TryWriteBacking(std::bit_cast(copy_device_addr), download + dst_offset, + copy.size); + } + memory_tracker->UnmarkRegionAsGpuModified(device_addr, size); + if (is_write) { + memory_tracker->MarkRegionAsCpuModified(device_addr, size); + } + }; + if constexpr (async) { + scheduler.DeferOperation(write_data); + } else { + scheduler.Finish(); + write_data(); } } @@ -647,16 +675,6 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) { AllFlags | vk::BufferUsageFlagBits::eShaderDeviceAddress, size); }(); auto& new_buffer = slot_buffers[new_buffer_id]; - boost::container::small_vector bda_addrs; - const u64 start_page = overlap.begin >> CACHING_PAGEBITS; - const u64 size_pages = size >> CACHING_PAGEBITS; - bda_addrs.reserve(size_pages); - for (u64 i = 0; i < size_pages; ++i) { - vk::DeviceAddress addr = new_buffer.BufferDeviceAddress() + (i << CACHING_PAGEBITS); - bda_addrs.push_back(addr); - } - WriteDataBuffer(bda_pagetable_buffer, start_page * sizeof(vk::DeviceAddress), bda_addrs.data(), - bda_addrs.size() * sizeof(vk::DeviceAddress)); const size_t size_bytes = new_buffer.SizeBytes(); const auto cmdbuf = scheduler.CommandBuffer(); scheduler.EndRendering(); @@ -807,6 +825,7 @@ void BufferCache::ChangeRegister(BufferId buffer_id) { const VAddr device_addr_end = device_addr_begin + size; const u64 page_begin = device_addr_begin / CACHING_PAGESIZE; const u64 page_end = Common::DivCeil(device_addr_end, CACHING_PAGESIZE); + const u64 size_pages = page_end - page_begin; for (u64 page = page_begin; page != page_end; ++page) { if constexpr (insert) { page_table[page].buffer_id = buffer_id; @@ -815,8 +834,22 @@ void BufferCache::ChangeRegister(BufferId buffer_id) { } } if constexpr (insert) { + total_used_memory += Common::AlignUp(size, CACHING_PAGESIZE); + buffer.SetLRUId(lru_cache.Insert(buffer_id, gc_tick)); + boost::container::small_vector bda_addrs; + bda_addrs.reserve(size_pages); + for (u64 i = 0; i < size_pages; ++i) { + vk::DeviceAddress addr = buffer.BufferDeviceAddress() + (i << CACHING_PAGEBITS); + bda_addrs.push_back(addr); + } + WriteDataBuffer(bda_pagetable_buffer, page_begin * sizeof(vk::DeviceAddress), + bda_addrs.data(), bda_addrs.size() * sizeof(vk::DeviceAddress)); buffer_ranges.Add(buffer.CpuAddr(), buffer.SizeBytes(), buffer_id); } else { + total_used_memory -= Common::AlignUp(size, CACHING_PAGESIZE); + lru_cache.Free(buffer.LRUId()); + FillBuffer(bda_pagetable_buffer, page_begin * sizeof(vk::DeviceAddress), + size_pages * sizeof(vk::DeviceAddress), 0); buffer_ranges.Subtract(buffer.CpuAddr(), buffer.SizeBytes()); } } @@ -874,6 +907,7 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, .bufferMemoryBarrierCount = 1, .pBufferMemoryBarriers = &post_barrier, }); + TouchBuffer(buffer); } vk::Buffer BufferCache::UploadCopies(Buffer& buffer, std::span copies, @@ -1154,6 +1188,70 @@ void BufferCache::WriteDataBuffer(Buffer& buffer, VAddr address, const void* val }); } +void BufferCache::FillBuffer(Buffer& buffer, VAddr address, u32 num_bytes, u32 value) { + scheduler.EndRendering(); + ASSERT_MSG(num_bytes % 4 == 0, "FillBuffer size must be a multiple of 4 bytes"); + const auto cmdbuf = scheduler.CommandBuffer(); + const vk::BufferMemoryBarrier2 pre_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = buffer.Handle(), + .offset = buffer.Offset(address), + .size = num_bytes, + }; + const vk::BufferMemoryBarrier2 post_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, + .buffer = buffer.Handle(), + .offset = buffer.Offset(address), + .size = num_bytes, + }; + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &pre_barrier, + }); + cmdbuf.fillBuffer(buffer.Handle(), buffer.Offset(address), num_bytes, value); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &post_barrier, + }); +} + +void BufferCache::RunGarbageCollector() { + SCOPE_EXIT { + ++gc_tick; + }; + if (instance.CanReportMemoryUsage()) { + total_used_memory = instance.GetDeviceMemoryUsage(); + } + if (total_used_memory < trigger_gc_memory) { + return; + } + const bool aggressive = total_used_memory >= critical_gc_memory; + const u64 ticks_to_destroy = std::min(aggressive ? 80 : 160, gc_tick); + int max_deletions = aggressive ? 64 : 32; + const auto clean_up = [&](BufferId buffer_id) { + if (max_deletions == 0) { + return; + } + --max_deletions; + Buffer& buffer = slot_buffers[buffer_id]; + // InvalidateMemory(buffer.CpuAddr(), buffer.SizeBytes()); + DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes(), true); + DeleteBuffer(buffer_id); + }; +} + +void BufferCache::TouchBuffer(const Buffer& buffer) { + lru_cache.Touch(buffer.LRUId(), gc_tick); +} + void BufferCache::DeleteBuffer(BufferId buffer_id) { Buffer& buffer = slot_buffers[buffer_id]; Unregister(buffer_id); diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b509ce2d0..2aa67ee42 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -5,7 +5,7 @@ #include #include -#include "common/div_ceil.h" +#include "common/lru_cache.h" #include "common/slot_vector.h" #include "common/types.h" #include "video_core/buffer_cache/buffer.h" @@ -44,6 +44,11 @@ public: static constexpr u64 BDA_PAGETABLE_SIZE = CACHING_NUMPAGES * sizeof(vk::DeviceAddress); static constexpr u64 FAULT_BUFFER_SIZE = CACHING_NUMPAGES / 8; // Bit per page + // Default values for garbage collection + static constexpr s64 DEFAULT_TRIGGER_GC_MEMORY = 1_GB; + static constexpr s64 DEFAULT_CRITICAL_GC_MEMORY = 2_GB; + static constexpr s64 TARGET_GC_THRESHOLD = 8_GB; + struct PageData { BufferId buffer_id{}; }; @@ -162,6 +167,9 @@ public: /// Record memory barrier. Used for buffers when accessed via BDA. void MemoryBarrier(); + /// Runs the garbage collector. + void RunGarbageCollector(); + private: template void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) { @@ -176,6 +184,7 @@ private: return !buffer_id || slot_buffers[buffer_id].is_deleted; } + template void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write); [[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size); @@ -203,6 +212,10 @@ private: void WriteDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes); + void FillBuffer(Buffer& buffer, VAddr address, u32 num_bytes, u32 value); + + void TouchBuffer(const Buffer& buffer); + void DeleteBuffer(BufferId buffer_id); const Vulkan::Instance& instance; @@ -220,6 +233,11 @@ private: Buffer fault_buffer; std::shared_mutex slot_buffers_mutex; Common::SlotVector slot_buffers; + u64 total_used_memory = 0; + u64 trigger_gc_memory = 0; + u64 critical_gc_memory = 0; + u64 gc_tick = 0; + Common::LeastRecentlyUsedCache lru_cache; RangeSet gpu_modified_ranges; SplitRangeMap buffer_ranges; PageTable page_table; diff --git a/src/video_core/page_manager.cpp b/src/video_core/page_manager.cpp index 63297bfdc..daa1218cc 100644 --- a/src/video_core/page_manager.cpp +++ b/src/video_core/page_manager.cpp @@ -4,6 +4,7 @@ #include #include "common/assert.h" #include "common/debug.h" +#include "common/div_ceil.h" #include "common/range_lock.h" #include "common/signal_context.h" #include "core/memory.h" diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 6e5351ddd..03c13a4cb 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -7,6 +7,7 @@ #include "common/assert.h" #include "common/debug.h" +#include "common/types.h" #include "sdl_window.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -155,6 +156,7 @@ Instance::Instance(Frontend::WindowSDL& window, s32 physical_device_index, VK_VERSION_MAJOR(properties.apiVersion), VK_VERSION_MINOR(properties.apiVersion)); CreateDevice(); + CollectPhysicalMemoryInfo(); CollectToolingInfo(); // Check and log format support details. @@ -319,6 +321,8 @@ bool Instance::CreateDevice() { } #endif + supports_memory_budget = add_extension(VK_EXT_MEMORY_BUDGET_EXTENSION_NAME); + const auto family_properties = physical_device.getQueueFamilyProperties(); if (family_properties.empty()) { LOG_CRITICAL(Render_Vulkan, "Physical device reported no queues."); @@ -617,11 +621,55 @@ void Instance::CollectDeviceParameters() { LOG_INFO(Render_Vulkan, "GPU_Vendor: {}", vendor_name); LOG_INFO(Render_Vulkan, "GPU_Model: {}", model_name); + LOG_INFO(Render_Vulkan, "GPU_Integrated: {}", IsIntegrated() ? "Yes" : "No"); LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver: {}", driver_name); LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version: {}", api_version); LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions: {}", extensions); } +void Instance::CollectPhysicalMemoryInfo() { + vk::PhysicalDeviceMemoryBudgetPropertiesEXT budget{}; + vk::PhysicalDeviceMemoryProperties2 props = { + .pNext = supports_memory_budget ? &budget : nullptr, + }; + physical_device.getMemoryProperties2(&props); + const auto& memory_props = props.memoryProperties; + const size_t num_props = memory_props.memoryHeapCount; + total_memory_budget = 0; + u64 device_initial_usage = 0; + u64 local_memory = 0; + for (size_t i = 0; i < num_props; ++i) { + const bool is_device_local = + (memory_props.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) != + vk::MemoryHeapFlags{}; + if (!IsIntegrated() && !is_device_local) { + // Ignore non-device local memory on discrete GPUs. + continue; + } + valid_heaps.push_back(i); + if (is_device_local) { + local_memory += memory_props.memoryHeaps[i].size; + } + if (supports_memory_budget) { + device_initial_usage += budget.heapUsage[i]; + total_memory_budget += budget.heapBudget[i]; + continue; + } + // If memory budget is not supported, use the size of the heap as the budget. + total_memory_budget += memory_props.memoryHeaps[i].size; + } + if (!IsIntegrated()) { + // We reserve some memory for the system. + const u64 system_memory = std::min(total_memory_budget / 8, 1_GB); + total_memory_budget -= system_memory; + return; + } + // Leave at least 8 GB for the system on integrated GPUs. + const s64 available_memory = static_cast(total_memory_budget - device_initial_usage); + total_memory_budget = + static_cast(std::max(available_memory - 8_GB, static_cast(local_memory))); +} + void Instance::CollectToolingInfo() const { if (driver_id == vk::DriverId::eAmdProprietary || driver_id == vk::DriverId::eIntelProprietaryWindows) { @@ -641,6 +689,20 @@ void Instance::CollectToolingInfo() const { } } +u64 Instance::GetDeviceMemoryUsage() const { + vk::PhysicalDeviceMemoryBudgetPropertiesEXT memory_budget_props{}; + vk::PhysicalDeviceMemoryProperties2 props = { + .pNext = &memory_budget_props, + }; + physical_device.getMemoryProperties2(&props); + + u64 total_usage = 0; + for (const size_t heap : valid_heaps) { + total_usage += memory_budget_props.heapUsage[heap]; + } + return total_usage; +} + vk::FormatFeatureFlags2 Instance::GetFormatFeatureFlags(vk::Format format) const { const auto it = format_properties.find(format); if (it == format_properties.end()) { diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index b21e00a71..c34c12589 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -284,6 +284,11 @@ public: return properties.deviceName; } + /// Returns if the device is an integrated GPU. + bool IsIntegrated() const { + return properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu; + } + /// Returns the pipeline cache unique identifier const auto GetPipelineCacheUUID() const { return properties.pipelineCacheUUID; @@ -386,6 +391,19 @@ public: return features.logicOp; } + /// Returns whether the device can report memory usage. + bool CanReportMemoryUsage() const { + return supports_memory_budget; + } + + /// Returns the amount of memory used. + [[nodiscard]] u64 GetDeviceMemoryUsage() const; + + /// Returns the total memory budget available to the device. + [[nodiscard]] u64 GetTotalMemoryBudget() const { + return total_memory_budget; + } + /// Determines if a format is supported for a set of feature flags. [[nodiscard]] bool IsFormatSupported(vk::Format format, vk::FormatFeatureFlags2 flags) const; @@ -396,8 +414,9 @@ private: /// Creates the VMA allocator handle void CreateAllocator(); - /// Collects telemetry information from the device. + /// Collects various information from the device. void CollectDeviceParameters(); + void CollectPhysicalMemoryInfo(); void CollectToolingInfo() const; /// Gets the supported feature flags for a format. @@ -450,6 +469,9 @@ private: bool shader_atomic_float2{}; bool workgroup_memory_explicit_layout{}; bool portability_subset{}; + bool supports_memory_budget{}; + u64 total_memory_budget{}; + std::vector valid_heaps; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c3e221739..fa84a6b42 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -446,12 +446,14 @@ void Rasterizer::Finish() { scheduler.Finish(); } -void Rasterizer::EndCommandList() { +void Rasterizer::OnSubmit() { if (fault_process_pending) { fault_process_pending = false; buffer_cache.ProcessFaultBuffer(); } texture_cache.ProcessDownloadImages(); + texture_cache.RunGarbageCollector(); + buffer_cache.RunGarbageCollector(); } bool Rasterizer::BindResources(const Pipeline* pipeline) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 79e7722b8..a1d59021b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -68,7 +68,7 @@ public: void CpSync(); u64 Flush(); void Finish(); - void EndCommandList(); + void OnSubmit(); PipelineCache& GetPipelineCache() { return pipeline_cache; diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 2dbaff053..b791b548b 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -111,6 +111,11 @@ struct Image { return track_addr != 0 && track_addr_end != 0; } + bool SafeToDownload() const { + return True(flags & ImageFlagBits::GpuModified) && + False(flags & (ImageFlagBits::GpuDirty | ImageFlagBits::CpuDirty)); + } + const Vulkan::Instance* instance; Vulkan::Scheduler* scheduler; ImageInfo info; @@ -122,6 +127,7 @@ struct Image { std::vector image_view_infos; std::vector image_view_ids; ImageId depth_id{}; + u64 lru_id{}; // Resource state tracking struct { diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 024d2e785..fa2029b8f 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -7,6 +7,7 @@ #include "common/assert.h" #include "common/config.h" #include "common/debug.h" +#include "common/scope_exit.h" #include "core/memory.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/page_manager.h" @@ -28,6 +29,28 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& // Create basic null image at fixed image ID. const auto null_id = GetNullImage(vk::Format::eR8G8B8A8Unorm); ASSERT(null_id.index == NULL_IMAGE_ID.index); + + // Set up garbage collection parameters. + if (!instance.CanReportMemoryUsage()) { + trigger_gc_memory = 0; + pressure_gc_memory = DEFAULT_PRESSURE_GC_MEMORY; + critical_gc_memory = DEFAULT_CRITICAL_GC_MEMORY; + return; + } + + const s64 device_local_memory = static_cast(instance.GetTotalMemoryBudget()); + const s64 min_spacing_expected = device_local_memory - 1_GB; + const s64 min_spacing_critical = device_local_memory - 512_MB; + const s64 mem_threshold = std::min(device_local_memory, TARGET_GC_THRESHOLD); + const s64 min_vacancy_expected = (6 * mem_threshold) / 10; + const s64 min_vacancy_critical = (2 * mem_threshold) / 10; + pressure_gc_memory = static_cast( + std::max(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected), + DEFAULT_PRESSURE_GC_MEMORY)); + critical_gc_memory = static_cast( + std::max(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical), + DEFAULT_CRITICAL_GC_MEMORY)); + trigger_gc_memory = static_cast((device_local_memory - mem_threshold) / 2); } TextureCache::~TextureCache() = default; @@ -459,6 +482,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) { Image& image = slot_images[image_id]; image.tick_accessed_last = scheduler.CurrentTick(); + TouchImage(image); // If the image requested is a subresource of the image from cache record its location. if (view_mip > 0) { @@ -557,6 +581,7 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) { RegisterImage(stencil_id); } Image& image = slot_images[stencil_id]; + TouchImage(image); image.AssociateDepth(image_id); } @@ -719,6 +744,8 @@ void TextureCache::RegisterImage(ImageId image_id) { ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Trying to register an already registered image"); image.flags |= ImageFlagBits::Registered; + total_used_memory += Common::AlignUp(image.info.guest_size, 1024); + image.lru_id = lru_cache.Insert(image_id, gc_tick); ForEachPage(image.info.guest_address, image.info.guest_size, [this, image_id](u64 page) { page_table[page].push_back(image_id); }); } @@ -728,6 +755,8 @@ void TextureCache::UnregisterImage(ImageId image_id) { ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), "Trying to unregister an already unregistered image"); image.flags &= ~ImageFlagBits::Registered; + lru_cache.Free(image.lru_id); + total_used_memory -= Common::AlignUp(image.info.guest_size, 1024); ForEachPage(image.info.guest_address, image.info.guest_size, [this, image_id](u64 page) { const auto page_it = page_table.find(page); if (page_it == nullptr) { @@ -852,6 +881,77 @@ void TextureCache::UntrackImageTail(ImageId image_id) { tracker.UpdatePageWatchers(addr, size); } +void TextureCache::RunGarbageCollector() { + SCOPE_EXIT { + ++gc_tick; + }; + if (instance.CanReportMemoryUsage()) { + total_used_memory = instance.GetDeviceMemoryUsage(); + } + if (total_used_memory < trigger_gc_memory) { + return; + } + std::scoped_lock lock{mutex}; + bool pressured = false; + bool aggresive = false; + u64 ticks_to_destroy = 0; + size_t num_deletions = 0; + + const auto configure = [&](bool allow_aggressive) { + pressured = total_used_memory >= pressure_gc_memory; + aggresive = allow_aggressive && total_used_memory >= critical_gc_memory; + ticks_to_destroy = aggresive ? 160 : pressured ? 80 : 16; + ticks_to_destroy = std::min(ticks_to_destroy, gc_tick); + num_deletions = aggresive ? 40 : pressured ? 20 : 10; + }; + const auto clean_up = [&](ImageId image_id) { + if (num_deletions == 0) { + return true; + } + --num_deletions; + auto& image = slot_images[image_id]; + const bool download = image.SafeToDownload(); + const bool linear = image.info.tiling_mode == AmdGpu::TilingMode::Display_Linear; + if (!linear && download) { + // This is a workaround for now. We can't handle non-linear image downloads. + return false; + } + if (download && !pressured) { + return false; + } + if (download) { + DownloadImageMemory(image_id); + } + FreeImage(image_id); + if (total_used_memory < critical_gc_memory) { + if (aggresive) { + num_deletions >>= 2; + aggresive = false; + return false; + } + if (pressured && total_used_memory < pressure_gc_memory) { + num_deletions >>= 1; + pressured = false; + } + } + return false; + }; + + // Try to remove anything old enough and not high priority. + configure(false); + lru_cache.ForEachItemBelow(gc_tick - ticks_to_destroy, clean_up); + + if (total_used_memory >= critical_gc_memory) { + // If we are still over the critical limit, run an aggressive GC + configure(true); + lru_cache.ForEachItemBelow(gc_tick - ticks_to_destroy, clean_up); + } +} + +void TextureCache::TouchImage(const Image& image) { + lru_cache.Touch(image.lru_id, gc_tick); +} + void TextureCache::DeleteImage(ImageId image_id) { Image& image = slot_images[image_id]; ASSERT_MSG(!image.IsTracked(), "Image was not untracked"); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 9a9679c0a..c4f09f6a0 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -7,6 +7,7 @@ #include #include +#include "common/lru_cache.h" #include "common/slot_vector.h" #include "video_core/amdgpu/resource.h" #include "video_core/multi_level_page_table.h" @@ -37,6 +38,11 @@ DECLARE_ENUM_FLAG_OPERATORS(FindFlags) static constexpr u32 MaxInvalidateDist = 12_MB; class TextureCache { + // Default values for garbage collection + static constexpr s64 DEFAULT_PRESSURE_GC_MEMORY = 1_GB + 512_MB; + static constexpr s64 DEFAULT_CRITICAL_GC_MEMORY = 3_GB; + static constexpr s64 TARGET_GC_THRESHOLD = 8_GB; + struct Traits { using Entry = boost::container::small_vector; static constexpr size_t AddressSpaceBits = 40; @@ -126,6 +132,7 @@ public: std::scoped_lock lock{mutex}; Image& image = slot_images[image_id]; TrackImage(image_id); + TouchImage(image); RefreshImage(image, custom_scheduler); } @@ -150,12 +157,18 @@ public: /// Retrieves the image with the specified id. [[nodiscard]] Image& GetImage(ImageId id) { - return slot_images[id]; + auto& image = slot_images[id]; + TouchImage(image); + return image; } /// Retrieves the image view with the specified id. [[nodiscard]] ImageView& GetImageView(ImageId id) { - return slot_image_views[id]; + auto& view = slot_image_views[id]; + // Maybe this is not needed. + Image& image = slot_images[view.image_id]; + TouchImage(image); + return view; } /// Registers an image view for provided image @@ -199,6 +212,9 @@ public: return false; } + /// Runs the garbage collector. + void RunGarbageCollector(); + template void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { using FuncReturn = typename std::invoke_result::type; @@ -287,6 +303,9 @@ private: /// Removes the image and any views/surface metas that reference it. void DeleteImage(ImageId image_id); + /// Touch the image in the LRU cache. + void TouchImage(const Image& image); + void FreeImage(ImageId image_id) { UntrackImage(image_id); UnregisterImage(image_id); @@ -305,6 +324,12 @@ private: tsl::robin_map samplers; tsl::robin_map null_images; std::unordered_set download_images; + u64 total_used_memory = 0; + u64 trigger_gc_memory = 0; + u64 pressure_gc_memory = 0; + u64 critical_gc_memory = 0; + u64 gc_tick = 0; + Common::LeastRecentlyUsedCache lru_cache; PageTable page_table; std::mutex mutex;