diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 07312d655..94fe652e6 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -3,6 +3,7 @@ #include #include "common/alignment.h" +#include "common/debug.h" #include "common/scope_exit.h" #include "common/types.h" #include "video_core/amdgpu/liverpool.h" @@ -131,11 +132,22 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s BufferCache::~BufferCache() = default; -void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) { +void BufferCache::InvalidateMemory(VAddr device_addr, u64 size, bool unmap) { const bool is_tracked = IsRegionRegistered(device_addr, size); if (is_tracked) { // Mark the page as CPU modified to stop tracking writes. memory_tracker.MarkRegionAsCpuModified(device_addr, size); + + if (unmap) { + return; + } + + { + std::scoped_lock lock(dma_sync_ranges_mutex); + const VAddr page_addr = Common::AlignDown(device_addr, CACHING_PAGESIZE); + const u64 page_size = Common::AlignUp(size, CACHING_PAGESIZE); + dma_sync_ranges.Add(device_addr, size); + } } } @@ -371,24 +383,10 @@ std::pair BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size, } bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) { - const VAddr end_addr = addr + size; - const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE); - for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) { - const BufferId buffer_id = page_table[page].buffer_id; - if (!buffer_id) { - ++page; - continue; - } - std::shared_lock lk{slot_buffers_mutex}; - Buffer& buffer = slot_buffers[buffer_id]; - const VAddr buf_start_addr = buffer.CpuAddr(); - const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes(); - if (buf_start_addr < end_addr && addr < buf_end_addr) { - return true; - } - page = Common::DivCeil(buf_end_addr, CACHING_PAGESIZE); - } - return false; + // Check if we are missing some edge case here + const u64 page = addr >> CACHING_PAGEBITS; + const u64 page_size = Common::DivCeil(size, CACHING_PAGESIZE); + return buffer_ranges.Intersects(page, page_size); } bool BufferCache::IsRegionCpuModified(VAddr addr, size_t size) { @@ -577,6 +575,10 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) { JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); } Register(new_buffer_id); + { + std::scoped_lock lk(dma_sync_ranges_mutex); + dma_sync_ranges.Add(overlap.begin, overlap.end); + } return new_buffer_id; } @@ -704,7 +706,6 @@ void BufferCache::ProcessFaultBuffer() { // Only create a buffer is the current range doesn't fit in an existing one FindBuffer(start, static_cast(end - start)); } - rasterizer.AddDmaSyncRanges(fault_ranges); }); } @@ -731,6 +732,11 @@ void BufferCache::ChangeRegister(BufferId buffer_id) { page_table[page].buffer_id = BufferId{}; } } + if constexpr (insert) { + buffer_ranges.Add(page_begin, page_end - page_begin, buffer_id); + } else { + buffer_ranges.Subtract(page_begin, page_end - page_begin); + } } void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, @@ -915,6 +921,7 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) { } VAddr device_addr_end = device_addr + size; ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) { + RENDERER_TRACE; // Note that this function synchronizes the whole buffer, not just the range. // This is because this function is used to sync buffers before using a // shader that uses DMA. @@ -924,6 +931,16 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) { }); } +void BufferCache::SynchronizeDmaBuffers() { + RENDERER_TRACE; + std::scoped_lock lk(dma_sync_ranges_mutex); + dma_sync_ranges.ForEach([&](VAddr device_addr, u64 end_addr) { + RENDERER_TRACE; + SynchronizeBuffersInRange(device_addr, end_addr - device_addr); + }); + dma_sync_ranges.Clear(); +} + void BufferCache::MemoryBarrier() { // Vulkan doesn't know which buffer we access in a shader if we use // BufferDeviceAddress. We need a full memory barrier. diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 335764183..054e61db5 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -98,7 +98,7 @@ public: } /// Invalidates any buffer in the logical page range. - void InvalidateMemory(VAddr device_addr, u64 size); + void InvalidateMemory(VAddr device_addr, u64 size, bool unmap); /// Binds host vertex buffers for the current draw. void BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline); @@ -139,25 +139,21 @@ public: /// Synchronizes all buffers in the specified range. void SynchronizeBuffersInRange(VAddr device_addr, u64 size); + /// Synchronizes all buffers neede for DMA. + void SynchronizeDmaBuffers(); + /// Record memory barrier. Used for buffers when accessed via BDA. void MemoryBarrier(); private: template void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) { - const u64 page_end = Common::DivCeil(device_addr + size, CACHING_PAGESIZE); - for (u64 page = device_addr >> CACHING_PAGEBITS; page < page_end;) { - const BufferId buffer_id = page_table[page].buffer_id; - if (!buffer_id) { - ++page; - continue; - } - Buffer& buffer = slot_buffers[buffer_id]; - func(buffer_id, buffer); - - const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); - page = Common::DivCeil(end_addr, CACHING_PAGESIZE); - } + const u64 page = device_addr >> CACHING_PAGEBITS; + const u64 page_size = Common::DivCeil(size, CACHING_PAGESIZE); + buffer_ranges.ForEachInRange(page, page_size, [&](u64 page_start, u64 page_end, BufferId id) { + Buffer& buffer = slot_buffers[id]; + func(id, buffer); + }); } void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size); @@ -199,7 +195,10 @@ private: Buffer fault_buffer; std::shared_mutex slot_buffers_mutex; Common::SlotVector slot_buffers; + std::shared_mutex dma_sync_ranges_mutex; + RangeSet dma_sync_ranges; RangeSet gpu_modified_ranges; + SplitRangeMap buffer_ranges; MemoryTracker memory_tracker; PageTable page_table; vk::UniqueDescriptorSetLayout fault_process_desc_layout; diff --git a/src/video_core/buffer_cache/range_set.h b/src/video_core/buffer_cache/range_set.h index 2abf6e524..1b91fb893 100644 --- a/src/video_core/buffer_cache/range_set.h +++ b/src/video_core/buffer_cache/range_set.h @@ -4,6 +4,9 @@ #pragma once #include +#include +#include +#include #include #include #include @@ -38,6 +41,22 @@ struct RangeSet { m_ranges_set.subtract(interval); } + void Clear() { + m_ranges_set.clear(); + } + + bool Contains(VAddr base_address, size_t size) const { + const VAddr end_address = base_address + size; + IntervalType interval{base_address, end_address}; + return boost::icl::contains(m_ranges_set, interval); + } + + bool Intersects(VAddr base_address, size_t size) const { + const VAddr end_address = base_address + size; + IntervalType interval{base_address, end_address}; + return boost::icl::intersects(m_ranges_set, interval); + } + template void ForEach(Func&& func) const { if (m_ranges_set.empty()) { @@ -77,14 +96,29 @@ struct RangeSet { } } + template + void ForEachNotInRange(VAddr base_addr, size_t size, Func&& func) const { + const VAddr end_addr = base_addr + size; + ForEachInRange(base_addr, size, [&](VAddr range_addr, VAddr range_end) { + if (size_t gap_size = range_addr - base_addr; gap_size != 0) { + func(base_addr, gap_size); + } + base_addr = range_end; + }); + if (base_addr != end_addr) { + func(base_addr, end_addr - base_addr); + } + } + IntervalSet m_ranges_set; }; +template class RangeMap { public: using IntervalMap = - boost::icl::interval_map; using IntervalType = typename IntervalMap::interval_type; @@ -99,7 +133,7 @@ public: RangeMap(RangeMap&& other); RangeMap& operator=(RangeMap&& other); - void Add(VAddr base_address, size_t size, u64 value) { + void Add(VAddr base_address, size_t size, const T& value) { const VAddr end_address = base_address + size; IntervalType interval{base_address, end_address}; m_ranges_map.add({interval, value}); @@ -111,6 +145,35 @@ public: m_ranges_map -= interval; } + void Clear() { + m_ranges_map.clear(); + } + + bool Contains(VAddr base_address, size_t size) const { + const VAddr end_address = base_address + size; + IntervalType interval{base_address, end_address}; + return boost::icl::contains(m_ranges_map, interval); + } + + bool Intersects(VAddr base_address, size_t size) const { + const VAddr end_address = base_address + size; + IntervalType interval{base_address, end_address}; + return boost::icl::intersects(m_ranges_map, interval); + } + + template + void ForEach(Func&& func) const { + if (m_ranges_map.empty()) { + return; + } + + for (const auto& [interval, value] : m_ranges_map) { + const VAddr inter_addr_end = interval.upper(); + const VAddr inter_addr = interval.lower(); + func(inter_addr, inter_addr_end, value); + } + } + template void ForEachInRange(VAddr base_addr, size_t size, Func&& func) const { if (m_ranges_map.empty()) { @@ -140,7 +203,112 @@ public: template void ForEachNotInRange(VAddr base_addr, size_t size, Func&& func) const { const VAddr end_addr = base_addr + size; - ForEachInRange(base_addr, size, [&](VAddr range_addr, VAddr range_end, u64) { + ForEachInRange(base_addr, size, [&](VAddr range_addr, VAddr range_end, const T&) { + if (size_t gap_size = range_addr - base_addr; gap_size != 0) { + func(base_addr, gap_size); + } + base_addr = range_end; + }); + if (base_addr != end_addr) { + func(base_addr, end_addr - base_addr); + } + } + +private: + IntervalMap m_ranges_map; +}; + +template +class SplitRangeMap { +public: + using IntervalMap = + boost::icl::split_interval_map; + using IntervalType = typename IntervalMap::interval_type; + +public: + SplitRangeMap() = default; + ~SplitRangeMap() = default; + + SplitRangeMap(SplitRangeMap const&) = delete; + SplitRangeMap& operator=(SplitRangeMap const&) = delete; + + SplitRangeMap(SplitRangeMap&& other); + SplitRangeMap& operator=(SplitRangeMap&& other); + + void Add(VAddr base_address, size_t size, const T& value) { + const VAddr end_address = base_address + size; + IntervalType interval{base_address, end_address}; + m_ranges_map.add({interval, value}); + } + + void Subtract(VAddr base_address, size_t size) { + const VAddr end_address = base_address + size; + IntervalType interval{base_address, end_address}; + m_ranges_map -= interval; + } + + void Clear() { + m_ranges_map.clear(); + } + + bool Contains(VAddr base_address, size_t size) const { + const VAddr end_address = base_address + size; + IntervalType interval{base_address, end_address}; + return boost::icl::contains(m_ranges_map, interval); + } + + bool Intersects(VAddr base_address, size_t size) const { + const VAddr end_address = base_address + size; + IntervalType interval{base_address, end_address}; + return boost::icl::intersects(m_ranges_map, interval); + } + + template + void ForEach(Func&& func) const { + if (m_ranges_map.empty()) { + return; + } + + for (const auto& [interval, value] : m_ranges_map) { + const VAddr inter_addr_end = interval.upper(); + const VAddr inter_addr = interval.lower(); + func(inter_addr, inter_addr_end, value); + } + } + + template + void ForEachInRange(VAddr base_addr, size_t size, Func&& func) const { + if (m_ranges_map.empty()) { + return; + } + const VAddr start_address = base_addr; + const VAddr end_address = start_address + size; + const IntervalType search_interval{start_address, end_address}; + auto it = m_ranges_map.lower_bound(search_interval); + if (it == m_ranges_map.end()) { + return; + } + auto end_it = m_ranges_map.upper_bound(search_interval); + for (; it != end_it; it++) { + VAddr inter_addr_end = it->first.upper(); + VAddr inter_addr = it->first.lower(); + if (inter_addr_end > end_address) { + inter_addr_end = end_address; + } + if (inter_addr < start_address) { + inter_addr = start_address; + } + func(inter_addr, inter_addr_end, it->second); + } + } + + template + void ForEachNotInRange(VAddr base_addr, size_t size, Func&& func) const { + const VAddr end_addr = base_addr + size; + ForEachInRange(base_addr, size, [&](VAddr range_addr, VAddr range_end, const T&) { if (size_t gap_size = range_addr - base_addr; gap_size != 0) { func(base_addr, gap_size); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d254d1e0c..2cbf0fec2 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -477,10 +477,9 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { if (uses_dma) { // We only use fault buffer for DMA right now. { - Common::RecursiveSharedLock lock(mapped_ranges_mutex); - for (const auto& range : dma_sync_mapped_ranges) { - buffer_cache.SynchronizeBuffersInRange(range.lower(), range.upper() - range.lower()); - } + // We don't want the mapped ranges to be modified while we are syncing + Common::RecursiveSharedLock lock{mapped_ranges_mutex}; + buffer_cache.SynchronizeDmaBuffers(); } buffer_cache.MemoryBarrier(); } @@ -726,14 +725,6 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin } } -void Rasterizer::AddDmaSyncRanges(const boost::icl::interval_set& ranges) { - dma_sync_ranges += ranges; - { - std::scoped_lock lock(mapped_ranges_mutex); - dma_sync_mapped_ranges = mapped_ranges & dma_sync_ranges; - } -} - void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline, RenderState& state) { int cb_index = 0; for (auto attach_idx = 0u; attach_idx < state.num_color_attachments; ++attach_idx) { @@ -964,7 +955,7 @@ bool Rasterizer::InvalidateMemory(VAddr addr, u64 size) { // Not GPU mapped memory, can skip invalidation logic entirely. return false; } - buffer_cache.InvalidateMemory(addr, size); + buffer_cache.InvalidateMemory(addr, size, false); texture_cache.InvalidateMemory(addr, size); return true; } @@ -984,19 +975,17 @@ void Rasterizer::MapMemory(VAddr addr, u64 size) { { std::scoped_lock lock{mapped_ranges_mutex}; mapped_ranges += decltype(mapped_ranges)::interval_type::right_open(addr, addr + size); - dma_sync_mapped_ranges = mapped_ranges & dma_sync_ranges; } page_manager.OnGpuMap(addr, size); } void Rasterizer::UnmapMemory(VAddr addr, u64 size) { - buffer_cache.InvalidateMemory(addr, size); + buffer_cache.InvalidateMemory(addr, size, true); texture_cache.UnmapMemory(addr, size); page_manager.OnGpuUnmap(addr, size); { std::scoped_lock lock{mapped_ranges_mutex}; mapped_ranges -= decltype(mapped_ranges)::interval_type::right_open(addr, addr + size); - dma_sync_mapped_ranges -= decltype(mapped_ranges)::interval_type::right_open(addr, addr + size); } } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index fc167bdbd..647f3333b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -107,9 +107,6 @@ private: } bool IsComputeMetaClear(const Pipeline* pipeline); - - void AddDmaSyncRanges(const boost::icl::interval_set& ranges); - private: friend class VideoCore::BufferCache; @@ -121,8 +118,6 @@ private: AmdGpu::Liverpool* liverpool; Core::MemoryManager* memory; boost::icl::interval_set mapped_ranges; - boost::icl::interval_set dma_sync_ranges; - boost::icl::interval_set dma_sync_mapped_ranges; std::shared_mutex mapped_ranges_mutex; PipelineCache pipeline_cache;