mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-09 05:08:43 +00:00
video_core: garbage collector (part 1) (#3350)
* Memory information * Buffer cache GC * Texture cache GC * Fix ChangeRegister * Better image touching * Buffer async download on GC destroy * Handle image download, SKIP NON-LINEAR WORKAROUND * Only download when not dirty * Correctly handle BDA pagefile update * Restructure ChangeRegistration
This commit is contained in:
committed by
GitHub
parent
2f701311f2
commit
841aa9e43d
@@ -679,6 +679,7 @@ set(COMMON src/common/logging/backend.cpp
|
||||
src/common/enum.h
|
||||
src/common/io_file.cpp
|
||||
src/common/io_file.h
|
||||
src/common/lru_cache.h
|
||||
src/common/error.cpp
|
||||
src/common/error.h
|
||||
src/common/scope_exit.h
|
||||
|
||||
135
src/common/lru_cache.h
Normal file
135
src/common/lru_cache.h
Normal file
@@ -0,0 +1,135 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <deque>
|
||||
#include <type_traits>
|
||||
|
||||
#include "common/types.h"
|
||||
|
||||
namespace Common {
|
||||
|
||||
template <typename ObjectType, typename TickType>
|
||||
class LeastRecentlyUsedCache {
|
||||
struct Item {
|
||||
ObjectType obj;
|
||||
TickType tick;
|
||||
Item* next{};
|
||||
Item* prev{};
|
||||
};
|
||||
|
||||
public:
|
||||
LeastRecentlyUsedCache() : first_item{}, last_item{} {}
|
||||
~LeastRecentlyUsedCache() = default;
|
||||
|
||||
size_t Insert(ObjectType obj, TickType tick) {
|
||||
const auto new_id = Build();
|
||||
auto& item = item_pool[new_id];
|
||||
item.obj = obj;
|
||||
item.tick = tick;
|
||||
Attach(item);
|
||||
return new_id;
|
||||
}
|
||||
|
||||
void Touch(size_t id, TickType tick) {
|
||||
auto& item = item_pool[id];
|
||||
if (item.tick >= tick) {
|
||||
return;
|
||||
}
|
||||
item.tick = tick;
|
||||
if (&item == last_item) {
|
||||
return;
|
||||
}
|
||||
Detach(item);
|
||||
Attach(item);
|
||||
}
|
||||
|
||||
void Free(size_t id) {
|
||||
auto& item = item_pool[id];
|
||||
Detach(item);
|
||||
item.prev = nullptr;
|
||||
item.next = nullptr;
|
||||
free_items.push_back(id);
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
void ForEachItemBelow(TickType tick, Func&& func) {
|
||||
static constexpr bool RETURNS_BOOL =
|
||||
std::is_same_v<std::invoke_result<Func, ObjectType>, bool>;
|
||||
Item* iterator = first_item;
|
||||
while (iterator) {
|
||||
if (static_cast<s64>(tick) - static_cast<s64>(iterator->tick) < 0) {
|
||||
return;
|
||||
}
|
||||
Item* next = iterator->next;
|
||||
if constexpr (RETURNS_BOOL) {
|
||||
if (func(iterator->obj)) {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
func(iterator->obj);
|
||||
}
|
||||
iterator = next;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
size_t Build() {
|
||||
if (free_items.empty()) {
|
||||
const size_t item_id = item_pool.size();
|
||||
auto& item = item_pool.emplace_back();
|
||||
item.next = nullptr;
|
||||
item.prev = nullptr;
|
||||
return item_id;
|
||||
}
|
||||
const size_t item_id = free_items.front();
|
||||
free_items.pop_front();
|
||||
auto& item = item_pool[item_id];
|
||||
item.next = nullptr;
|
||||
item.prev = nullptr;
|
||||
return item_id;
|
||||
}
|
||||
|
||||
void Attach(Item& item) {
|
||||
if (!first_item) {
|
||||
first_item = &item;
|
||||
}
|
||||
if (!last_item) {
|
||||
last_item = &item;
|
||||
} else {
|
||||
item.prev = last_item;
|
||||
last_item->next = &item;
|
||||
item.next = nullptr;
|
||||
last_item = &item;
|
||||
}
|
||||
}
|
||||
|
||||
void Detach(Item& item) {
|
||||
if (item.prev) {
|
||||
item.prev->next = item.next;
|
||||
}
|
||||
if (item.next) {
|
||||
item.next->prev = item.prev;
|
||||
}
|
||||
if (&item == first_item) {
|
||||
first_item = item.next;
|
||||
if (first_item) {
|
||||
first_item->prev = nullptr;
|
||||
}
|
||||
}
|
||||
if (&item == last_item) {
|
||||
last_item = item.prev;
|
||||
if (last_item) {
|
||||
last_item->next = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::deque<Item> item_pool;
|
||||
std::deque<size_t> free_items;
|
||||
Item* first_item{};
|
||||
Item* last_item{};
|
||||
};
|
||||
|
||||
} // namespace Common
|
||||
@@ -136,7 +136,7 @@ void Liverpool::Process(std::stop_token stoken) {
|
||||
if (submit_done) {
|
||||
VideoCore::EndCapture();
|
||||
if (rasterizer) {
|
||||
rasterizer->EndCommandList();
|
||||
rasterizer->OnSubmit();
|
||||
rasterizer->Flush();
|
||||
}
|
||||
submit_done = false;
|
||||
|
||||
@@ -112,6 +112,14 @@ public:
|
||||
return size_bytes;
|
||||
}
|
||||
|
||||
void SetLRUId(u64 id) noexcept {
|
||||
lru_id = id;
|
||||
}
|
||||
|
||||
u64 LRUId() const noexcept {
|
||||
return lru_id;
|
||||
}
|
||||
|
||||
vk::Buffer Handle() const noexcept {
|
||||
return buffer;
|
||||
}
|
||||
@@ -151,6 +159,7 @@ public:
|
||||
bool is_deleted{};
|
||||
int stream_score = 0;
|
||||
size_t size_bytes = 0;
|
||||
u64 lru_id = 0;
|
||||
std::span<u8> mapped_data;
|
||||
const Vulkan::Instance* instance;
|
||||
Vulkan::Scheduler* scheduler;
|
||||
|
||||
@@ -130,6 +130,26 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
||||
"Fault Buffer Parser Pipeline");
|
||||
|
||||
instance.GetDevice().destroyShaderModule(module);
|
||||
|
||||
// Set up garbage collection parameters
|
||||
if (!instance.CanReportMemoryUsage()) {
|
||||
trigger_gc_memory = DEFAULT_TRIGGER_GC_MEMORY;
|
||||
critical_gc_memory = DEFAULT_CRITICAL_GC_MEMORY;
|
||||
return;
|
||||
}
|
||||
|
||||
const s64 device_local_memory = static_cast<s64>(instance.GetTotalMemoryBudget());
|
||||
const s64 min_spacing_expected = device_local_memory - 1_GB;
|
||||
const s64 min_spacing_critical = device_local_memory - 512_MB;
|
||||
const s64 mem_threshold = std::min<s64>(device_local_memory, TARGET_GC_THRESHOLD);
|
||||
const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
|
||||
const s64 min_vacancy_critical = (2 * mem_threshold) / 10;
|
||||
trigger_gc_memory = static_cast<u64>(
|
||||
std::max<u64>(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected),
|
||||
DEFAULT_TRIGGER_GC_MEMORY));
|
||||
critical_gc_memory = static_cast<u64>(
|
||||
std::max<u64>(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical),
|
||||
DEFAULT_CRITICAL_GC_MEMORY));
|
||||
}
|
||||
|
||||
BufferCache::~BufferCache() = default;
|
||||
@@ -145,10 +165,11 @@ void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) {
|
||||
void BufferCache::ReadMemory(VAddr device_addr, u64 size, bool is_write) {
|
||||
liverpool->SendCommand<true>([this, device_addr, size, is_write] {
|
||||
Buffer& buffer = slot_buffers[FindBuffer(device_addr, size)];
|
||||
DownloadBufferMemory(buffer, device_addr, size, is_write);
|
||||
DownloadBufferMemory<false>(buffer, device_addr, size, is_write);
|
||||
});
|
||||
}
|
||||
|
||||
template <bool async>
|
||||
void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write) {
|
||||
boost::container::small_vector<vk::BufferCopy, 1> copies;
|
||||
u64 total_size_bytes = 0;
|
||||
@@ -183,17 +204,24 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.copyBuffer(buffer.buffer, download_buffer.Handle(), copies);
|
||||
scheduler.Finish();
|
||||
auto* memory = Core::Memory::Instance();
|
||||
for (const auto& copy : copies) {
|
||||
const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset;
|
||||
const u64 dst_offset = copy.dstOffset - offset;
|
||||
memory->TryWriteBacking(std::bit_cast<u8*>(copy_device_addr), download + dst_offset,
|
||||
copy.size);
|
||||
}
|
||||
memory_tracker->UnmarkRegionAsGpuModified(device_addr, size);
|
||||
if (is_write) {
|
||||
memory_tracker->MarkRegionAsCpuModified(device_addr, size);
|
||||
const auto write_data = [&]() {
|
||||
auto* memory = Core::Memory::Instance();
|
||||
for (const auto& copy : copies) {
|
||||
const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset;
|
||||
const u64 dst_offset = copy.dstOffset - offset;
|
||||
memory->TryWriteBacking(std::bit_cast<u8*>(copy_device_addr), download + dst_offset,
|
||||
copy.size);
|
||||
}
|
||||
memory_tracker->UnmarkRegionAsGpuModified(device_addr, size);
|
||||
if (is_write) {
|
||||
memory_tracker->MarkRegionAsCpuModified(device_addr, size);
|
||||
}
|
||||
};
|
||||
if constexpr (async) {
|
||||
scheduler.DeferOperation(write_data);
|
||||
} else {
|
||||
scheduler.Finish();
|
||||
write_data();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -647,16 +675,6 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
|
||||
AllFlags | vk::BufferUsageFlagBits::eShaderDeviceAddress, size);
|
||||
}();
|
||||
auto& new_buffer = slot_buffers[new_buffer_id];
|
||||
boost::container::small_vector<vk::DeviceAddress, 128> bda_addrs;
|
||||
const u64 start_page = overlap.begin >> CACHING_PAGEBITS;
|
||||
const u64 size_pages = size >> CACHING_PAGEBITS;
|
||||
bda_addrs.reserve(size_pages);
|
||||
for (u64 i = 0; i < size_pages; ++i) {
|
||||
vk::DeviceAddress addr = new_buffer.BufferDeviceAddress() + (i << CACHING_PAGEBITS);
|
||||
bda_addrs.push_back(addr);
|
||||
}
|
||||
WriteDataBuffer(bda_pagetable_buffer, start_page * sizeof(vk::DeviceAddress), bda_addrs.data(),
|
||||
bda_addrs.size() * sizeof(vk::DeviceAddress));
|
||||
const size_t size_bytes = new_buffer.SizeBytes();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
scheduler.EndRendering();
|
||||
@@ -807,6 +825,7 @@ void BufferCache::ChangeRegister(BufferId buffer_id) {
|
||||
const VAddr device_addr_end = device_addr_begin + size;
|
||||
const u64 page_begin = device_addr_begin / CACHING_PAGESIZE;
|
||||
const u64 page_end = Common::DivCeil(device_addr_end, CACHING_PAGESIZE);
|
||||
const u64 size_pages = page_end - page_begin;
|
||||
for (u64 page = page_begin; page != page_end; ++page) {
|
||||
if constexpr (insert) {
|
||||
page_table[page].buffer_id = buffer_id;
|
||||
@@ -815,8 +834,22 @@ void BufferCache::ChangeRegister(BufferId buffer_id) {
|
||||
}
|
||||
}
|
||||
if constexpr (insert) {
|
||||
total_used_memory += Common::AlignUp(size, CACHING_PAGESIZE);
|
||||
buffer.SetLRUId(lru_cache.Insert(buffer_id, gc_tick));
|
||||
boost::container::small_vector<vk::DeviceAddress, 128> bda_addrs;
|
||||
bda_addrs.reserve(size_pages);
|
||||
for (u64 i = 0; i < size_pages; ++i) {
|
||||
vk::DeviceAddress addr = buffer.BufferDeviceAddress() + (i << CACHING_PAGEBITS);
|
||||
bda_addrs.push_back(addr);
|
||||
}
|
||||
WriteDataBuffer(bda_pagetable_buffer, page_begin * sizeof(vk::DeviceAddress),
|
||||
bda_addrs.data(), bda_addrs.size() * sizeof(vk::DeviceAddress));
|
||||
buffer_ranges.Add(buffer.CpuAddr(), buffer.SizeBytes(), buffer_id);
|
||||
} else {
|
||||
total_used_memory -= Common::AlignUp(size, CACHING_PAGESIZE);
|
||||
lru_cache.Free(buffer.LRUId());
|
||||
FillBuffer(bda_pagetable_buffer, page_begin * sizeof(vk::DeviceAddress),
|
||||
size_pages * sizeof(vk::DeviceAddress), 0);
|
||||
buffer_ranges.Subtract(buffer.CpuAddr(), buffer.SizeBytes());
|
||||
}
|
||||
}
|
||||
@@ -874,6 +907,7 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &post_barrier,
|
||||
});
|
||||
TouchBuffer(buffer);
|
||||
}
|
||||
|
||||
vk::Buffer BufferCache::UploadCopies(Buffer& buffer, std::span<vk::BufferCopy> copies,
|
||||
@@ -1154,6 +1188,70 @@ void BufferCache::WriteDataBuffer(Buffer& buffer, VAddr address, const void* val
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCache::FillBuffer(Buffer& buffer, VAddr address, u32 num_bytes, u32 value) {
|
||||
scheduler.EndRendering();
|
||||
ASSERT_MSG(num_bytes % 4 == 0, "FillBuffer size must be a multiple of 4 bytes");
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
const vk::BufferMemoryBarrier2 pre_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.buffer = buffer.Handle(),
|
||||
.offset = buffer.Offset(address),
|
||||
.size = num_bytes,
|
||||
};
|
||||
const vk::BufferMemoryBarrier2 post_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
||||
.buffer = buffer.Handle(),
|
||||
.offset = buffer.Offset(address),
|
||||
.size = num_bytes,
|
||||
};
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &pre_barrier,
|
||||
});
|
||||
cmdbuf.fillBuffer(buffer.Handle(), buffer.Offset(address), num_bytes, value);
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &post_barrier,
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCache::RunGarbageCollector() {
|
||||
SCOPE_EXIT {
|
||||
++gc_tick;
|
||||
};
|
||||
if (instance.CanReportMemoryUsage()) {
|
||||
total_used_memory = instance.GetDeviceMemoryUsage();
|
||||
}
|
||||
if (total_used_memory < trigger_gc_memory) {
|
||||
return;
|
||||
}
|
||||
const bool aggressive = total_used_memory >= critical_gc_memory;
|
||||
const u64 ticks_to_destroy = std::min<u64>(aggressive ? 80 : 160, gc_tick);
|
||||
int max_deletions = aggressive ? 64 : 32;
|
||||
const auto clean_up = [&](BufferId buffer_id) {
|
||||
if (max_deletions == 0) {
|
||||
return;
|
||||
}
|
||||
--max_deletions;
|
||||
Buffer& buffer = slot_buffers[buffer_id];
|
||||
// InvalidateMemory(buffer.CpuAddr(), buffer.SizeBytes());
|
||||
DownloadBufferMemory<true>(buffer, buffer.CpuAddr(), buffer.SizeBytes(), true);
|
||||
DeleteBuffer(buffer_id);
|
||||
};
|
||||
}
|
||||
|
||||
void BufferCache::TouchBuffer(const Buffer& buffer) {
|
||||
lru_cache.Touch(buffer.LRUId(), gc_tick);
|
||||
}
|
||||
|
||||
void BufferCache::DeleteBuffer(BufferId buffer_id) {
|
||||
Buffer& buffer = slot_buffers[buffer_id];
|
||||
Unregister(buffer_id);
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
|
||||
#include <shared_mutex>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include "common/div_ceil.h"
|
||||
#include "common/lru_cache.h"
|
||||
#include "common/slot_vector.h"
|
||||
#include "common/types.h"
|
||||
#include "video_core/buffer_cache/buffer.h"
|
||||
@@ -44,6 +44,11 @@ public:
|
||||
static constexpr u64 BDA_PAGETABLE_SIZE = CACHING_NUMPAGES * sizeof(vk::DeviceAddress);
|
||||
static constexpr u64 FAULT_BUFFER_SIZE = CACHING_NUMPAGES / 8; // Bit per page
|
||||
|
||||
// Default values for garbage collection
|
||||
static constexpr s64 DEFAULT_TRIGGER_GC_MEMORY = 1_GB;
|
||||
static constexpr s64 DEFAULT_CRITICAL_GC_MEMORY = 2_GB;
|
||||
static constexpr s64 TARGET_GC_THRESHOLD = 8_GB;
|
||||
|
||||
struct PageData {
|
||||
BufferId buffer_id{};
|
||||
};
|
||||
@@ -162,6 +167,9 @@ public:
|
||||
/// Record memory barrier. Used for buffers when accessed via BDA.
|
||||
void MemoryBarrier();
|
||||
|
||||
/// Runs the garbage collector.
|
||||
void RunGarbageCollector();
|
||||
|
||||
private:
|
||||
template <typename Func>
|
||||
void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) {
|
||||
@@ -176,6 +184,7 @@ private:
|
||||
return !buffer_id || slot_buffers[buffer_id].is_deleted;
|
||||
}
|
||||
|
||||
template <bool async>
|
||||
void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write);
|
||||
|
||||
[[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size);
|
||||
@@ -203,6 +212,10 @@ private:
|
||||
|
||||
void WriteDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes);
|
||||
|
||||
void FillBuffer(Buffer& buffer, VAddr address, u32 num_bytes, u32 value);
|
||||
|
||||
void TouchBuffer(const Buffer& buffer);
|
||||
|
||||
void DeleteBuffer(BufferId buffer_id);
|
||||
|
||||
const Vulkan::Instance& instance;
|
||||
@@ -220,6 +233,11 @@ private:
|
||||
Buffer fault_buffer;
|
||||
std::shared_mutex slot_buffers_mutex;
|
||||
Common::SlotVector<Buffer> slot_buffers;
|
||||
u64 total_used_memory = 0;
|
||||
u64 trigger_gc_memory = 0;
|
||||
u64 critical_gc_memory = 0;
|
||||
u64 gc_tick = 0;
|
||||
Common::LeastRecentlyUsedCache<BufferId, u64> lru_cache;
|
||||
RangeSet gpu_modified_ranges;
|
||||
SplitRangeMap<BufferId> buffer_ranges;
|
||||
PageTable page_table;
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include "common/assert.h"
|
||||
#include "common/debug.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "common/range_lock.h"
|
||||
#include "common/signal_context.h"
|
||||
#include "core/memory.h"
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/debug.h"
|
||||
#include "common/types.h"
|
||||
#include "sdl_window.h"
|
||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
@@ -155,6 +156,7 @@ Instance::Instance(Frontend::WindowSDL& window, s32 physical_device_index,
|
||||
VK_VERSION_MAJOR(properties.apiVersion), VK_VERSION_MINOR(properties.apiVersion));
|
||||
|
||||
CreateDevice();
|
||||
CollectPhysicalMemoryInfo();
|
||||
CollectToolingInfo();
|
||||
|
||||
// Check and log format support details.
|
||||
@@ -319,6 +321,8 @@ bool Instance::CreateDevice() {
|
||||
}
|
||||
#endif
|
||||
|
||||
supports_memory_budget = add_extension(VK_EXT_MEMORY_BUDGET_EXTENSION_NAME);
|
||||
|
||||
const auto family_properties = physical_device.getQueueFamilyProperties();
|
||||
if (family_properties.empty()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Physical device reported no queues.");
|
||||
@@ -617,11 +621,55 @@ void Instance::CollectDeviceParameters() {
|
||||
|
||||
LOG_INFO(Render_Vulkan, "GPU_Vendor: {}", vendor_name);
|
||||
LOG_INFO(Render_Vulkan, "GPU_Model: {}", model_name);
|
||||
LOG_INFO(Render_Vulkan, "GPU_Integrated: {}", IsIntegrated() ? "Yes" : "No");
|
||||
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver: {}", driver_name);
|
||||
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version: {}", api_version);
|
||||
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions: {}", extensions);
|
||||
}
|
||||
|
||||
void Instance::CollectPhysicalMemoryInfo() {
|
||||
vk::PhysicalDeviceMemoryBudgetPropertiesEXT budget{};
|
||||
vk::PhysicalDeviceMemoryProperties2 props = {
|
||||
.pNext = supports_memory_budget ? &budget : nullptr,
|
||||
};
|
||||
physical_device.getMemoryProperties2(&props);
|
||||
const auto& memory_props = props.memoryProperties;
|
||||
const size_t num_props = memory_props.memoryHeapCount;
|
||||
total_memory_budget = 0;
|
||||
u64 device_initial_usage = 0;
|
||||
u64 local_memory = 0;
|
||||
for (size_t i = 0; i < num_props; ++i) {
|
||||
const bool is_device_local =
|
||||
(memory_props.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) !=
|
||||
vk::MemoryHeapFlags{};
|
||||
if (!IsIntegrated() && !is_device_local) {
|
||||
// Ignore non-device local memory on discrete GPUs.
|
||||
continue;
|
||||
}
|
||||
valid_heaps.push_back(i);
|
||||
if (is_device_local) {
|
||||
local_memory += memory_props.memoryHeaps[i].size;
|
||||
}
|
||||
if (supports_memory_budget) {
|
||||
device_initial_usage += budget.heapUsage[i];
|
||||
total_memory_budget += budget.heapBudget[i];
|
||||
continue;
|
||||
}
|
||||
// If memory budget is not supported, use the size of the heap as the budget.
|
||||
total_memory_budget += memory_props.memoryHeaps[i].size;
|
||||
}
|
||||
if (!IsIntegrated()) {
|
||||
// We reserve some memory for the system.
|
||||
const u64 system_memory = std::min<u64>(total_memory_budget / 8, 1_GB);
|
||||
total_memory_budget -= system_memory;
|
||||
return;
|
||||
}
|
||||
// Leave at least 8 GB for the system on integrated GPUs.
|
||||
const s64 available_memory = static_cast<s64>(total_memory_budget - device_initial_usage);
|
||||
total_memory_budget =
|
||||
static_cast<u64>(std::max<s64>(available_memory - 8_GB, static_cast<s64>(local_memory)));
|
||||
}
|
||||
|
||||
void Instance::CollectToolingInfo() const {
|
||||
if (driver_id == vk::DriverId::eAmdProprietary ||
|
||||
driver_id == vk::DriverId::eIntelProprietaryWindows) {
|
||||
@@ -641,6 +689,20 @@ void Instance::CollectToolingInfo() const {
|
||||
}
|
||||
}
|
||||
|
||||
u64 Instance::GetDeviceMemoryUsage() const {
|
||||
vk::PhysicalDeviceMemoryBudgetPropertiesEXT memory_budget_props{};
|
||||
vk::PhysicalDeviceMemoryProperties2 props = {
|
||||
.pNext = &memory_budget_props,
|
||||
};
|
||||
physical_device.getMemoryProperties2(&props);
|
||||
|
||||
u64 total_usage = 0;
|
||||
for (const size_t heap : valid_heaps) {
|
||||
total_usage += memory_budget_props.heapUsage[heap];
|
||||
}
|
||||
return total_usage;
|
||||
}
|
||||
|
||||
vk::FormatFeatureFlags2 Instance::GetFormatFeatureFlags(vk::Format format) const {
|
||||
const auto it = format_properties.find(format);
|
||||
if (it == format_properties.end()) {
|
||||
|
||||
@@ -284,6 +284,11 @@ public:
|
||||
return properties.deviceName;
|
||||
}
|
||||
|
||||
/// Returns if the device is an integrated GPU.
|
||||
bool IsIntegrated() const {
|
||||
return properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu;
|
||||
}
|
||||
|
||||
/// Returns the pipeline cache unique identifier
|
||||
const auto GetPipelineCacheUUID() const {
|
||||
return properties.pipelineCacheUUID;
|
||||
@@ -386,6 +391,19 @@ public:
|
||||
return features.logicOp;
|
||||
}
|
||||
|
||||
/// Returns whether the device can report memory usage.
|
||||
bool CanReportMemoryUsage() const {
|
||||
return supports_memory_budget;
|
||||
}
|
||||
|
||||
/// Returns the amount of memory used.
|
||||
[[nodiscard]] u64 GetDeviceMemoryUsage() const;
|
||||
|
||||
/// Returns the total memory budget available to the device.
|
||||
[[nodiscard]] u64 GetTotalMemoryBudget() const {
|
||||
return total_memory_budget;
|
||||
}
|
||||
|
||||
/// Determines if a format is supported for a set of feature flags.
|
||||
[[nodiscard]] bool IsFormatSupported(vk::Format format, vk::FormatFeatureFlags2 flags) const;
|
||||
|
||||
@@ -396,8 +414,9 @@ private:
|
||||
/// Creates the VMA allocator handle
|
||||
void CreateAllocator();
|
||||
|
||||
/// Collects telemetry information from the device.
|
||||
/// Collects various information from the device.
|
||||
void CollectDeviceParameters();
|
||||
void CollectPhysicalMemoryInfo();
|
||||
void CollectToolingInfo() const;
|
||||
|
||||
/// Gets the supported feature flags for a format.
|
||||
@@ -450,6 +469,9 @@ private:
|
||||
bool shader_atomic_float2{};
|
||||
bool workgroup_memory_explicit_layout{};
|
||||
bool portability_subset{};
|
||||
bool supports_memory_budget{};
|
||||
u64 total_memory_budget{};
|
||||
std::vector<size_t> valid_heaps;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -446,12 +446,14 @@ void Rasterizer::Finish() {
|
||||
scheduler.Finish();
|
||||
}
|
||||
|
||||
void Rasterizer::EndCommandList() {
|
||||
void Rasterizer::OnSubmit() {
|
||||
if (fault_process_pending) {
|
||||
fault_process_pending = false;
|
||||
buffer_cache.ProcessFaultBuffer();
|
||||
}
|
||||
texture_cache.ProcessDownloadImages();
|
||||
texture_cache.RunGarbageCollector();
|
||||
buffer_cache.RunGarbageCollector();
|
||||
}
|
||||
|
||||
bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
||||
|
||||
@@ -68,7 +68,7 @@ public:
|
||||
void CpSync();
|
||||
u64 Flush();
|
||||
void Finish();
|
||||
void EndCommandList();
|
||||
void OnSubmit();
|
||||
|
||||
PipelineCache& GetPipelineCache() {
|
||||
return pipeline_cache;
|
||||
|
||||
@@ -111,6 +111,11 @@ struct Image {
|
||||
return track_addr != 0 && track_addr_end != 0;
|
||||
}
|
||||
|
||||
bool SafeToDownload() const {
|
||||
return True(flags & ImageFlagBits::GpuModified) &&
|
||||
False(flags & (ImageFlagBits::GpuDirty | ImageFlagBits::CpuDirty));
|
||||
}
|
||||
|
||||
const Vulkan::Instance* instance;
|
||||
Vulkan::Scheduler* scheduler;
|
||||
ImageInfo info;
|
||||
@@ -122,6 +127,7 @@ struct Image {
|
||||
std::vector<ImageViewInfo> image_view_infos;
|
||||
std::vector<ImageViewId> image_view_ids;
|
||||
ImageId depth_id{};
|
||||
u64 lru_id{};
|
||||
|
||||
// Resource state tracking
|
||||
struct {
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include "common/assert.h"
|
||||
#include "common/config.h"
|
||||
#include "common/debug.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/page_manager.h"
|
||||
@@ -28,6 +29,28 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler&
|
||||
// Create basic null image at fixed image ID.
|
||||
const auto null_id = GetNullImage(vk::Format::eR8G8B8A8Unorm);
|
||||
ASSERT(null_id.index == NULL_IMAGE_ID.index);
|
||||
|
||||
// Set up garbage collection parameters.
|
||||
if (!instance.CanReportMemoryUsage()) {
|
||||
trigger_gc_memory = 0;
|
||||
pressure_gc_memory = DEFAULT_PRESSURE_GC_MEMORY;
|
||||
critical_gc_memory = DEFAULT_CRITICAL_GC_MEMORY;
|
||||
return;
|
||||
}
|
||||
|
||||
const s64 device_local_memory = static_cast<s64>(instance.GetTotalMemoryBudget());
|
||||
const s64 min_spacing_expected = device_local_memory - 1_GB;
|
||||
const s64 min_spacing_critical = device_local_memory - 512_MB;
|
||||
const s64 mem_threshold = std::min<s64>(device_local_memory, TARGET_GC_THRESHOLD);
|
||||
const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
|
||||
const s64 min_vacancy_critical = (2 * mem_threshold) / 10;
|
||||
pressure_gc_memory = static_cast<u64>(
|
||||
std::max<u64>(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected),
|
||||
DEFAULT_PRESSURE_GC_MEMORY));
|
||||
critical_gc_memory = static_cast<u64>(
|
||||
std::max<u64>(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical),
|
||||
DEFAULT_CRITICAL_GC_MEMORY));
|
||||
trigger_gc_memory = static_cast<u64>((device_local_memory - mem_threshold) / 2);
|
||||
}
|
||||
|
||||
TextureCache::~TextureCache() = default;
|
||||
@@ -459,6 +482,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) {
|
||||
|
||||
Image& image = slot_images[image_id];
|
||||
image.tick_accessed_last = scheduler.CurrentTick();
|
||||
TouchImage(image);
|
||||
|
||||
// If the image requested is a subresource of the image from cache record its location.
|
||||
if (view_mip > 0) {
|
||||
@@ -557,6 +581,7 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) {
|
||||
RegisterImage(stencil_id);
|
||||
}
|
||||
Image& image = slot_images[stencil_id];
|
||||
TouchImage(image);
|
||||
image.AssociateDepth(image_id);
|
||||
}
|
||||
|
||||
@@ -719,6 +744,8 @@ void TextureCache::RegisterImage(ImageId image_id) {
|
||||
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
|
||||
"Trying to register an already registered image");
|
||||
image.flags |= ImageFlagBits::Registered;
|
||||
total_used_memory += Common::AlignUp(image.info.guest_size, 1024);
|
||||
image.lru_id = lru_cache.Insert(image_id, gc_tick);
|
||||
ForEachPage(image.info.guest_address, image.info.guest_size,
|
||||
[this, image_id](u64 page) { page_table[page].push_back(image_id); });
|
||||
}
|
||||
@@ -728,6 +755,8 @@ void TextureCache::UnregisterImage(ImageId image_id) {
|
||||
ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
|
||||
"Trying to unregister an already unregistered image");
|
||||
image.flags &= ~ImageFlagBits::Registered;
|
||||
lru_cache.Free(image.lru_id);
|
||||
total_used_memory -= Common::AlignUp(image.info.guest_size, 1024);
|
||||
ForEachPage(image.info.guest_address, image.info.guest_size, [this, image_id](u64 page) {
|
||||
const auto page_it = page_table.find(page);
|
||||
if (page_it == nullptr) {
|
||||
@@ -852,6 +881,77 @@ void TextureCache::UntrackImageTail(ImageId image_id) {
|
||||
tracker.UpdatePageWatchers<false>(addr, size);
|
||||
}
|
||||
|
||||
void TextureCache::RunGarbageCollector() {
|
||||
SCOPE_EXIT {
|
||||
++gc_tick;
|
||||
};
|
||||
if (instance.CanReportMemoryUsage()) {
|
||||
total_used_memory = instance.GetDeviceMemoryUsage();
|
||||
}
|
||||
if (total_used_memory < trigger_gc_memory) {
|
||||
return;
|
||||
}
|
||||
std::scoped_lock lock{mutex};
|
||||
bool pressured = false;
|
||||
bool aggresive = false;
|
||||
u64 ticks_to_destroy = 0;
|
||||
size_t num_deletions = 0;
|
||||
|
||||
const auto configure = [&](bool allow_aggressive) {
|
||||
pressured = total_used_memory >= pressure_gc_memory;
|
||||
aggresive = allow_aggressive && total_used_memory >= critical_gc_memory;
|
||||
ticks_to_destroy = aggresive ? 160 : pressured ? 80 : 16;
|
||||
ticks_to_destroy = std::min(ticks_to_destroy, gc_tick);
|
||||
num_deletions = aggresive ? 40 : pressured ? 20 : 10;
|
||||
};
|
||||
const auto clean_up = [&](ImageId image_id) {
|
||||
if (num_deletions == 0) {
|
||||
return true;
|
||||
}
|
||||
--num_deletions;
|
||||
auto& image = slot_images[image_id];
|
||||
const bool download = image.SafeToDownload();
|
||||
const bool linear = image.info.tiling_mode == AmdGpu::TilingMode::Display_Linear;
|
||||
if (!linear && download) {
|
||||
// This is a workaround for now. We can't handle non-linear image downloads.
|
||||
return false;
|
||||
}
|
||||
if (download && !pressured) {
|
||||
return false;
|
||||
}
|
||||
if (download) {
|
||||
DownloadImageMemory(image_id);
|
||||
}
|
||||
FreeImage(image_id);
|
||||
if (total_used_memory < critical_gc_memory) {
|
||||
if (aggresive) {
|
||||
num_deletions >>= 2;
|
||||
aggresive = false;
|
||||
return false;
|
||||
}
|
||||
if (pressured && total_used_memory < pressure_gc_memory) {
|
||||
num_deletions >>= 1;
|
||||
pressured = false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
// Try to remove anything old enough and not high priority.
|
||||
configure(false);
|
||||
lru_cache.ForEachItemBelow(gc_tick - ticks_to_destroy, clean_up);
|
||||
|
||||
if (total_used_memory >= critical_gc_memory) {
|
||||
// If we are still over the critical limit, run an aggressive GC
|
||||
configure(true);
|
||||
lru_cache.ForEachItemBelow(gc_tick - ticks_to_destroy, clean_up);
|
||||
}
|
||||
}
|
||||
|
||||
void TextureCache::TouchImage(const Image& image) {
|
||||
lru_cache.Touch(image.lru_id, gc_tick);
|
||||
}
|
||||
|
||||
void TextureCache::DeleteImage(ImageId image_id) {
|
||||
Image& image = slot_images[image_id];
|
||||
ASSERT_MSG(!image.IsTracked(), "Image was not untracked");
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <tsl/robin_map.h>
|
||||
|
||||
#include "common/lru_cache.h"
|
||||
#include "common/slot_vector.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/multi_level_page_table.h"
|
||||
@@ -37,6 +38,11 @@ DECLARE_ENUM_FLAG_OPERATORS(FindFlags)
|
||||
static constexpr u32 MaxInvalidateDist = 12_MB;
|
||||
|
||||
class TextureCache {
|
||||
// Default values for garbage collection
|
||||
static constexpr s64 DEFAULT_PRESSURE_GC_MEMORY = 1_GB + 512_MB;
|
||||
static constexpr s64 DEFAULT_CRITICAL_GC_MEMORY = 3_GB;
|
||||
static constexpr s64 TARGET_GC_THRESHOLD = 8_GB;
|
||||
|
||||
struct Traits {
|
||||
using Entry = boost::container::small_vector<ImageId, 16>;
|
||||
static constexpr size_t AddressSpaceBits = 40;
|
||||
@@ -126,6 +132,7 @@ public:
|
||||
std::scoped_lock lock{mutex};
|
||||
Image& image = slot_images[image_id];
|
||||
TrackImage(image_id);
|
||||
TouchImage(image);
|
||||
RefreshImage(image, custom_scheduler);
|
||||
}
|
||||
|
||||
@@ -150,12 +157,18 @@ public:
|
||||
|
||||
/// Retrieves the image with the specified id.
|
||||
[[nodiscard]] Image& GetImage(ImageId id) {
|
||||
return slot_images[id];
|
||||
auto& image = slot_images[id];
|
||||
TouchImage(image);
|
||||
return image;
|
||||
}
|
||||
|
||||
/// Retrieves the image view with the specified id.
|
||||
[[nodiscard]] ImageView& GetImageView(ImageId id) {
|
||||
return slot_image_views[id];
|
||||
auto& view = slot_image_views[id];
|
||||
// Maybe this is not needed.
|
||||
Image& image = slot_images[view.image_id];
|
||||
TouchImage(image);
|
||||
return view;
|
||||
}
|
||||
|
||||
/// Registers an image view for provided image
|
||||
@@ -199,6 +212,9 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Runs the garbage collector.
|
||||
void RunGarbageCollector();
|
||||
|
||||
template <typename Func>
|
||||
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) {
|
||||
using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
|
||||
@@ -287,6 +303,9 @@ private:
|
||||
/// Removes the image and any views/surface metas that reference it.
|
||||
void DeleteImage(ImageId image_id);
|
||||
|
||||
/// Touch the image in the LRU cache.
|
||||
void TouchImage(const Image& image);
|
||||
|
||||
void FreeImage(ImageId image_id) {
|
||||
UntrackImage(image_id);
|
||||
UnregisterImage(image_id);
|
||||
@@ -305,6 +324,12 @@ private:
|
||||
tsl::robin_map<u64, Sampler> samplers;
|
||||
tsl::robin_map<vk::Format, ImageId> null_images;
|
||||
std::unordered_set<ImageId> download_images;
|
||||
u64 total_used_memory = 0;
|
||||
u64 trigger_gc_memory = 0;
|
||||
u64 pressure_gc_memory = 0;
|
||||
u64 critical_gc_memory = 0;
|
||||
u64 gc_tick = 0;
|
||||
Common::LeastRecentlyUsedCache<ImageId, u64> lru_cache;
|
||||
PageTable page_table;
|
||||
std::mutex mutex;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user