buffer_cache: Do not cache buffer views

This commit is contained in:
IndecisiveTurtle 2024-09-04 12:45:28 +03:00
parent cb66b92854
commit 3c0e11f606
3 changed files with 26 additions and 42 deletions

View File

@ -91,10 +91,10 @@ void UniqueBuffer::Create(const vk::BufferCreateInfo& buffer_ci, MemoryUsage usa
buffer = vk::Buffer{unsafe_buffer}; buffer = vk::Buffer{unsafe_buffer};
} }
Buffer::Buffer(const Vulkan::Instance& instance_, MemoryUsage usage_, VAddr cpu_addr_, Buffer::Buffer(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, MemoryUsage usage_,
vk::BufferUsageFlags flags, u64 size_bytes_) VAddr cpu_addr_, vk::BufferUsageFlags flags, u64 size_bytes_)
: cpu_addr{cpu_addr_}, size_bytes{size_bytes_}, instance{&instance_}, usage{usage_}, : cpu_addr{cpu_addr_}, size_bytes{size_bytes_}, instance{&instance_}, scheduler{&scheduler_},
buffer{instance->GetDevice(), instance->GetAllocator()} { usage{usage_}, buffer{instance->GetDevice(), instance->GetAllocator()} {
// Create buffer object. // Create buffer object.
const vk::BufferCreateInfo buffer_ci = { const vk::BufferCreateInfo buffer_ci = {
.size = size_bytes, .size = size_bytes,
@ -117,13 +117,6 @@ Buffer::Buffer(const Vulkan::Instance& instance_, MemoryUsage usage_, VAddr cpu_
vk::BufferView Buffer::View(u32 offset, u32 size, bool is_written, AmdGpu::DataFormat dfmt, vk::BufferView Buffer::View(u32 offset, u32 size, bool is_written, AmdGpu::DataFormat dfmt,
AmdGpu::NumberFormat nfmt) { AmdGpu::NumberFormat nfmt) {
const auto it{std::ranges::find_if(views, [=](const BufferView& view) {
return offset == view.offset && size == view.size && is_written == view.is_written &&
dfmt == view.dfmt && nfmt == view.nfmt;
})};
if (it != views.end()) {
return *it->handle;
}
const vk::BufferUsageFlags2CreateInfoKHR usage_flags = { const vk::BufferUsageFlags2CreateInfoKHR usage_flags = {
.usage = is_written ? vk::BufferUsageFlagBits2KHR::eStorageTexelBuffer .usage = is_written ? vk::BufferUsageFlagBits2KHR::eStorageTexelBuffer
: vk::BufferUsageFlagBits2KHR::eUniformTexelBuffer, : vk::BufferUsageFlagBits2KHR::eUniformTexelBuffer,
@ -135,23 +128,18 @@ vk::BufferView Buffer::View(u32 offset, u32 size, bool is_written, AmdGpu::DataF
.offset = offset, .offset = offset,
.range = size, .range = size,
}; };
views.push_back({ const auto view = instance->GetDevice().createBufferView(view_ci);
.offset = offset, scheduler->DeferOperation(
.size = size, [view, device = instance->GetDevice()] { device.destroyBufferView(view); });
.is_written = is_written, return view;
.dfmt = dfmt,
.nfmt = nfmt,
.handle = instance->GetDevice().createBufferViewUnique(view_ci),
});
return *views.back().handle;
} }
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
StreamBuffer::StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler_, StreamBuffer::StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
MemoryUsage usage, u64 size_bytes) MemoryUsage usage, u64 size_bytes)
: Buffer{instance, usage, 0, AllFlags, size_bytes}, scheduler{scheduler_} { : Buffer{instance, scheduler, usage, 0, AllFlags, size_bytes} {
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
const auto device = instance.GetDevice(); const auto device = instance.GetDevice();
@ -206,7 +194,7 @@ void StreamBuffer::Commit() {
auto& watch = current_watches[current_watch_cursor++]; auto& watch = current_watches[current_watch_cursor++];
watch.upper_bound = offset; watch.upper_bound = offset;
watch.tick = scheduler.CurrentTick(); watch.tick = scheduler->CurrentTick();
} }
void StreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) { void StreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
@ -220,7 +208,7 @@ void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) { while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) {
auto& watch = previous_watches[wait_cursor]; auto& watch = previous_watches[wait_cursor];
wait_bound = watch.upper_bound; wait_bound = watch.upper_bound;
scheduler.Wait(watch.tick); scheduler->Wait(watch.tick);
++wait_cursor; ++wait_cursor;
} }
} }

View File

@ -73,8 +73,9 @@ struct UniqueBuffer {
class Buffer { class Buffer {
public: public:
explicit Buffer(const Vulkan::Instance& instance, MemoryUsage usage, VAddr cpu_addr_, explicit Buffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
vk::BufferUsageFlags flags, u64 size_bytes_); MemoryUsage usage, VAddr cpu_addr_, vk::BufferUsageFlags flags,
u64 size_bytes_);
Buffer& operator=(const Buffer&) = delete; Buffer& operator=(const Buffer&) = delete;
Buffer(const Buffer&) = delete; Buffer(const Buffer&) = delete;
@ -144,20 +145,12 @@ public:
int stream_score = 0; int stream_score = 0;
size_t size_bytes = 0; size_t size_bytes = 0;
std::span<u8> mapped_data; std::span<u8> mapped_data;
const Vulkan::Instance* instance{}; const Vulkan::Instance* instance;
Vulkan::Scheduler* scheduler;
MemoryUsage usage; MemoryUsage usage;
UniqueBuffer buffer; UniqueBuffer buffer;
vk::AccessFlagBits2 access_mask{vk::AccessFlagBits2::eNone}; vk::AccessFlagBits2 access_mask{vk::AccessFlagBits2::eNone};
vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eNone}; vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eNone};
struct BufferView {
u32 offset;
u32 size;
bool is_written;
AmdGpu::DataFormat dfmt;
AmdGpu::NumberFormat nfmt;
vk::UniqueBufferView handle;
};
std::vector<BufferView> views;
}; };
class StreamBuffer : public Buffer { class StreamBuffer : public Buffer {
@ -196,7 +189,6 @@ private:
void WaitPendingOperations(u64 requested_upper_bound); void WaitPendingOperations(u64 requested_upper_bound);
private: private:
Vulkan::Scheduler& scheduler;
u64 offset{}; u64 offset{};
u64 mapped_size{}; u64 mapped_size{};
std::vector<Watch> current_watches; std::vector<Watch> current_watches;

View File

@ -27,7 +27,7 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize}, stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
memory_tracker{&tracker} { memory_tracker{&tracker} {
// Ensure the first slot is used for the null buffer // Ensure the first slot is used for the null buffer
void(slot_buffers.insert(instance, MemoryUsage::DeviceLocal, 0, ReadFlags, 1)); void(slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, ReadFlags, 1));
} }
BufferCache::~BufferCache() = default; BufferCache::~BufferCache() = default;
@ -236,7 +236,7 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
bool is_texel_buffer) { bool is_texel_buffer) {
static constexpr u64 StreamThreshold = CACHING_PAGESIZE; static constexpr u64 StreamThreshold = CACHING_PAGESIZE;
const bool is_gpu_dirty = memory_tracker.IsRegionGpuModified(device_addr, size); const bool is_gpu_dirty = memory_tracker.IsRegionGpuModified(device_addr, size);
if (!is_written && !is_texel_buffer && size <= StreamThreshold && !is_gpu_dirty) { if (!is_written && size <= StreamThreshold && !is_gpu_dirty) {
// For small uniform buffers that have not been modified by gpu // For small uniform buffers that have not been modified by gpu
// use device local stream buffer to reduce renderpass breaks. // use device local stream buffer to reduce renderpass breaks.
const u64 offset = stream_buffer.Copy(device_addr, size, instance.UniformMinAlignment()); const u64 offset = stream_buffer.Copy(device_addr, size, instance.UniformMinAlignment());
@ -424,8 +424,8 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
wanted_size = static_cast<u32>(device_addr_end - device_addr); wanted_size = static_cast<u32>(device_addr_end - device_addr);
const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size); const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size);
const u32 size = static_cast<u32>(overlap.end - overlap.begin); const u32 size = static_cast<u32>(overlap.end - overlap.begin);
const BufferId new_buffer_id = const BufferId new_buffer_id = slot_buffers.insert(
slot_buffers.insert(instance, MemoryUsage::DeviceLocal, overlap.begin, AllFlags, size); instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin, AllFlags, size);
auto& new_buffer = slot_buffers[new_buffer_id]; auto& new_buffer = slot_buffers[new_buffer_id];
const size_t size_bytes = new_buffer.SizeBytes(); const size_t size_bytes = new_buffer.SizeBytes();
const auto cmdbuf = scheduler.CommandBuffer(); const auto cmdbuf = scheduler.CommandBuffer();
@ -505,7 +505,11 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
} else { } else {
// For large one time transfers use a temporary host buffer. // For large one time transfers use a temporary host buffer.
// RenderDoc can lag quite a bit if the stream buffer is too large. // RenderDoc can lag quite a bit if the stream buffer is too large.
Buffer temp_buffer{instance, MemoryUsage::Upload, 0, vk::BufferUsageFlagBits::eTransferSrc, Buffer temp_buffer{instance,
scheduler,
MemoryUsage::Upload,
0,
vk::BufferUsageFlagBits::eTransferSrc,
total_size_bytes}; total_size_bytes};
src_buffer = temp_buffer.Handle(); src_buffer = temp_buffer.Handle();
u8* const staging = temp_buffer.mapped_data.data(); u8* const staging = temp_buffer.mapped_data.data();