buffer_cache: Do not cache buffer views

This commit is contained in:
IndecisiveTurtle 2024-09-04 12:45:28 +03:00
parent cb66b92854
commit 3c0e11f606
3 changed files with 26 additions and 42 deletions

View File

@ -91,10 +91,10 @@ void UniqueBuffer::Create(const vk::BufferCreateInfo& buffer_ci, MemoryUsage usa
buffer = vk::Buffer{unsafe_buffer};
}
Buffer::Buffer(const Vulkan::Instance& instance_, MemoryUsage usage_, VAddr cpu_addr_,
vk::BufferUsageFlags flags, u64 size_bytes_)
: cpu_addr{cpu_addr_}, size_bytes{size_bytes_}, instance{&instance_}, usage{usage_},
buffer{instance->GetDevice(), instance->GetAllocator()} {
Buffer::Buffer(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, MemoryUsage usage_,
VAddr cpu_addr_, vk::BufferUsageFlags flags, u64 size_bytes_)
: cpu_addr{cpu_addr_}, size_bytes{size_bytes_}, instance{&instance_}, scheduler{&scheduler_},
usage{usage_}, buffer{instance->GetDevice(), instance->GetAllocator()} {
// Create buffer object.
const vk::BufferCreateInfo buffer_ci = {
.size = size_bytes,
@ -117,13 +117,6 @@ Buffer::Buffer(const Vulkan::Instance& instance_, MemoryUsage usage_, VAddr cpu_
vk::BufferView Buffer::View(u32 offset, u32 size, bool is_written, AmdGpu::DataFormat dfmt,
AmdGpu::NumberFormat nfmt) {
const auto it{std::ranges::find_if(views, [=](const BufferView& view) {
return offset == view.offset && size == view.size && is_written == view.is_written &&
dfmt == view.dfmt && nfmt == view.nfmt;
})};
if (it != views.end()) {
return *it->handle;
}
const vk::BufferUsageFlags2CreateInfoKHR usage_flags = {
.usage = is_written ? vk::BufferUsageFlagBits2KHR::eStorageTexelBuffer
: vk::BufferUsageFlagBits2KHR::eUniformTexelBuffer,
@ -135,23 +128,18 @@ vk::BufferView Buffer::View(u32 offset, u32 size, bool is_written, AmdGpu::DataF
.offset = offset,
.range = size,
};
views.push_back({
.offset = offset,
.size = size,
.is_written = is_written,
.dfmt = dfmt,
.nfmt = nfmt,
.handle = instance->GetDevice().createBufferViewUnique(view_ci),
});
return *views.back().handle;
const auto view = instance->GetDevice().createBufferView(view_ci);
scheduler->DeferOperation(
[view, device = instance->GetDevice()] { device.destroyBufferView(view); });
return view;
}
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
StreamBuffer::StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler_,
StreamBuffer::StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
MemoryUsage usage, u64 size_bytes)
: Buffer{instance, usage, 0, AllFlags, size_bytes}, scheduler{scheduler_} {
: Buffer{instance, scheduler, usage, 0, AllFlags, size_bytes} {
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
const auto device = instance.GetDevice();
@ -206,7 +194,7 @@ void StreamBuffer::Commit() {
auto& watch = current_watches[current_watch_cursor++];
watch.upper_bound = offset;
watch.tick = scheduler.CurrentTick();
watch.tick = scheduler->CurrentTick();
}
void StreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
@ -220,7 +208,7 @@ void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) {
auto& watch = previous_watches[wait_cursor];
wait_bound = watch.upper_bound;
scheduler.Wait(watch.tick);
scheduler->Wait(watch.tick);
++wait_cursor;
}
}

View File

@ -73,8 +73,9 @@ struct UniqueBuffer {
class Buffer {
public:
explicit Buffer(const Vulkan::Instance& instance, MemoryUsage usage, VAddr cpu_addr_,
vk::BufferUsageFlags flags, u64 size_bytes_);
explicit Buffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
MemoryUsage usage, VAddr cpu_addr_, vk::BufferUsageFlags flags,
u64 size_bytes_);
Buffer& operator=(const Buffer&) = delete;
Buffer(const Buffer&) = delete;
@ -144,20 +145,12 @@ public:
int stream_score = 0;
size_t size_bytes = 0;
std::span<u8> mapped_data;
const Vulkan::Instance* instance{};
const Vulkan::Instance* instance;
Vulkan::Scheduler* scheduler;
MemoryUsage usage;
UniqueBuffer buffer;
vk::AccessFlagBits2 access_mask{vk::AccessFlagBits2::eNone};
vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eNone};
struct BufferView {
u32 offset;
u32 size;
bool is_written;
AmdGpu::DataFormat dfmt;
AmdGpu::NumberFormat nfmt;
vk::UniqueBufferView handle;
};
std::vector<BufferView> views;
};
class StreamBuffer : public Buffer {
@ -196,7 +189,6 @@ private:
void WaitPendingOperations(u64 requested_upper_bound);
private:
Vulkan::Scheduler& scheduler;
u64 offset{};
u64 mapped_size{};
std::vector<Watch> current_watches;

View File

@ -27,7 +27,7 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
memory_tracker{&tracker} {
// Ensure the first slot is used for the null buffer
void(slot_buffers.insert(instance, MemoryUsage::DeviceLocal, 0, ReadFlags, 1));
void(slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, ReadFlags, 1));
}
BufferCache::~BufferCache() = default;
@ -236,7 +236,7 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
bool is_texel_buffer) {
static constexpr u64 StreamThreshold = CACHING_PAGESIZE;
const bool is_gpu_dirty = memory_tracker.IsRegionGpuModified(device_addr, size);
if (!is_written && !is_texel_buffer && size <= StreamThreshold && !is_gpu_dirty) {
if (!is_written && size <= StreamThreshold && !is_gpu_dirty) {
// For small uniform buffers that have not been modified by gpu
// use device local stream buffer to reduce renderpass breaks.
const u64 offset = stream_buffer.Copy(device_addr, size, instance.UniformMinAlignment());
@ -424,8 +424,8 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
wanted_size = static_cast<u32>(device_addr_end - device_addr);
const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size);
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
const BufferId new_buffer_id =
slot_buffers.insert(instance, MemoryUsage::DeviceLocal, overlap.begin, AllFlags, size);
const BufferId new_buffer_id = slot_buffers.insert(
instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin, AllFlags, size);
auto& new_buffer = slot_buffers[new_buffer_id];
const size_t size_bytes = new_buffer.SizeBytes();
const auto cmdbuf = scheduler.CommandBuffer();
@ -505,7 +505,11 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
} else {
// For large one time transfers use a temporary host buffer.
// RenderDoc can lag quite a bit if the stream buffer is too large.
Buffer temp_buffer{instance, MemoryUsage::Upload, 0, vk::BufferUsageFlagBits::eTransferSrc,
Buffer temp_buffer{instance,
scheduler,
MemoryUsage::Upload,
0,
vk::BufferUsageFlagBits::eTransferSrc,
total_size_bytes};
src_buffer = temp_buffer.Handle();
u8* const staging = temp_buffer.mapped_data.data();