From 3c0e11f606f145dec91391f42e2a3790bab63610 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Wed, 4 Sep 2024 12:45:28 +0300 Subject: [PATCH] buffer_cache: Do not cache buffer views --- src/video_core/buffer_cache/buffer.cpp | 36 +++++++------------- src/video_core/buffer_cache/buffer.h | 18 +++------- src/video_core/buffer_cache/buffer_cache.cpp | 14 +++++--- 3 files changed, 26 insertions(+), 42 deletions(-) diff --git a/src/video_core/buffer_cache/buffer.cpp b/src/video_core/buffer_cache/buffer.cpp index adcea000b..702958034 100644 --- a/src/video_core/buffer_cache/buffer.cpp +++ b/src/video_core/buffer_cache/buffer.cpp @@ -91,10 +91,10 @@ void UniqueBuffer::Create(const vk::BufferCreateInfo& buffer_ci, MemoryUsage usa buffer = vk::Buffer{unsafe_buffer}; } -Buffer::Buffer(const Vulkan::Instance& instance_, MemoryUsage usage_, VAddr cpu_addr_, - vk::BufferUsageFlags flags, u64 size_bytes_) - : cpu_addr{cpu_addr_}, size_bytes{size_bytes_}, instance{&instance_}, usage{usage_}, - buffer{instance->GetDevice(), instance->GetAllocator()} { +Buffer::Buffer(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, MemoryUsage usage_, + VAddr cpu_addr_, vk::BufferUsageFlags flags, u64 size_bytes_) + : cpu_addr{cpu_addr_}, size_bytes{size_bytes_}, instance{&instance_}, scheduler{&scheduler_}, + usage{usage_}, buffer{instance->GetDevice(), instance->GetAllocator()} { // Create buffer object. const vk::BufferCreateInfo buffer_ci = { .size = size_bytes, @@ -117,13 +117,6 @@ Buffer::Buffer(const Vulkan::Instance& instance_, MemoryUsage usage_, VAddr cpu_ vk::BufferView Buffer::View(u32 offset, u32 size, bool is_written, AmdGpu::DataFormat dfmt, AmdGpu::NumberFormat nfmt) { - const auto it{std::ranges::find_if(views, [=](const BufferView& view) { - return offset == view.offset && size == view.size && is_written == view.is_written && - dfmt == view.dfmt && nfmt == view.nfmt; - })}; - if (it != views.end()) { - return *it->handle; - } const vk::BufferUsageFlags2CreateInfoKHR usage_flags = { .usage = is_written ? vk::BufferUsageFlagBits2KHR::eStorageTexelBuffer : vk::BufferUsageFlagBits2KHR::eUniformTexelBuffer, @@ -135,23 +128,18 @@ vk::BufferView Buffer::View(u32 offset, u32 size, bool is_written, AmdGpu::DataF .offset = offset, .range = size, }; - views.push_back({ - .offset = offset, - .size = size, - .is_written = is_written, - .dfmt = dfmt, - .nfmt = nfmt, - .handle = instance->GetDevice().createBufferViewUnique(view_ci), - }); - return *views.back().handle; + const auto view = instance->GetDevice().createBufferView(view_ci); + scheduler->DeferOperation( + [view, device = instance->GetDevice()] { device.destroyBufferView(view); }); + return view; } constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; -StreamBuffer::StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler_, +StreamBuffer::StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, MemoryUsage usage, u64 size_bytes) - : Buffer{instance, usage, 0, AllFlags, size_bytes}, scheduler{scheduler_} { + : Buffer{instance, scheduler, usage, 0, AllFlags, size_bytes} { ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); const auto device = instance.GetDevice(); @@ -206,7 +194,7 @@ void StreamBuffer::Commit() { auto& watch = current_watches[current_watch_cursor++]; watch.upper_bound = offset; - watch.tick = scheduler.CurrentTick(); + watch.tick = scheduler->CurrentTick(); } void StreamBuffer::ReserveWatches(std::vector& watches, std::size_t grow_size) { @@ -220,7 +208,7 @@ void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) { while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) { auto& watch = previous_watches[wait_cursor]; wait_bound = watch.upper_bound; - scheduler.Wait(watch.tick); + scheduler->Wait(watch.tick); ++wait_cursor; } } diff --git a/src/video_core/buffer_cache/buffer.h b/src/video_core/buffer_cache/buffer.h index 334975788..403d4ed85 100644 --- a/src/video_core/buffer_cache/buffer.h +++ b/src/video_core/buffer_cache/buffer.h @@ -73,8 +73,9 @@ struct UniqueBuffer { class Buffer { public: - explicit Buffer(const Vulkan::Instance& instance, MemoryUsage usage, VAddr cpu_addr_, - vk::BufferUsageFlags flags, u64 size_bytes_); + explicit Buffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, + MemoryUsage usage, VAddr cpu_addr_, vk::BufferUsageFlags flags, + u64 size_bytes_); Buffer& operator=(const Buffer&) = delete; Buffer(const Buffer&) = delete; @@ -144,20 +145,12 @@ public: int stream_score = 0; size_t size_bytes = 0; std::span mapped_data; - const Vulkan::Instance* instance{}; + const Vulkan::Instance* instance; + Vulkan::Scheduler* scheduler; MemoryUsage usage; UniqueBuffer buffer; vk::AccessFlagBits2 access_mask{vk::AccessFlagBits2::eNone}; vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eNone}; - struct BufferView { - u32 offset; - u32 size; - bool is_written; - AmdGpu::DataFormat dfmt; - AmdGpu::NumberFormat nfmt; - vk::UniqueBufferView handle; - }; - std::vector views; }; class StreamBuffer : public Buffer { @@ -196,7 +189,6 @@ private: void WaitPendingOperations(u64 requested_upper_bound); private: - Vulkan::Scheduler& scheduler; u64 offset{}; u64 mapped_size{}; std::vector current_watches; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 25a15f694..6a32e8fb5 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -27,7 +27,7 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize}, memory_tracker{&tracker} { // Ensure the first slot is used for the null buffer - void(slot_buffers.insert(instance, MemoryUsage::DeviceLocal, 0, ReadFlags, 1)); + void(slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, ReadFlags, 1)); } BufferCache::~BufferCache() = default; @@ -236,7 +236,7 @@ std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b bool is_texel_buffer) { static constexpr u64 StreamThreshold = CACHING_PAGESIZE; const bool is_gpu_dirty = memory_tracker.IsRegionGpuModified(device_addr, size); - if (!is_written && !is_texel_buffer && size <= StreamThreshold && !is_gpu_dirty) { + if (!is_written && size <= StreamThreshold && !is_gpu_dirty) { // For small uniform buffers that have not been modified by gpu // use device local stream buffer to reduce renderpass breaks. const u64 offset = stream_buffer.Copy(device_addr, size, instance.UniformMinAlignment()); @@ -424,8 +424,8 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) { wanted_size = static_cast(device_addr_end - device_addr); const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size); const u32 size = static_cast(overlap.end - overlap.begin); - const BufferId new_buffer_id = - slot_buffers.insert(instance, MemoryUsage::DeviceLocal, overlap.begin, AllFlags, size); + const BufferId new_buffer_id = slot_buffers.insert( + instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin, AllFlags, size); auto& new_buffer = slot_buffers[new_buffer_id]; const size_t size_bytes = new_buffer.SizeBytes(); const auto cmdbuf = scheduler.CommandBuffer(); @@ -505,7 +505,11 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, } else { // For large one time transfers use a temporary host buffer. // RenderDoc can lag quite a bit if the stream buffer is too large. - Buffer temp_buffer{instance, MemoryUsage::Upload, 0, vk::BufferUsageFlagBits::eTransferSrc, + Buffer temp_buffer{instance, + scheduler, + MemoryUsage::Upload, + 0, + vk::BufferUsageFlagBits::eTransferSrc, total_size_bytes}; src_buffer = temp_buffer.Handle(); u8* const staging = temp_buffer.mapped_data.data();