diff --git a/src/video_core/buffer_cache/buffer.cpp b/src/video_core/buffer_cache/buffer.cpp index e5468dfc9..b880f5ec3 100644 --- a/src/video_core/buffer_cache/buffer.cpp +++ b/src/video_core/buffer_cache/buffer.cpp @@ -180,6 +180,9 @@ ImportedHostBuffer::ImportedHostBuffer(const Vulkan::Instance& instance_, auto device_memory_result = instance->GetDevice().allocateMemory(alloc_ci); if (device_memory_result.result != vk::Result::eSuccess) { // May fail to import the host memory if it is backed by a file. (AMD on Linux) + LOG_WARNING(Render_Vulkan, + "Failed to import host memory at {} size {:#x}, Reason: {}", + cpu_addr, size_bytes, vk::to_string(device_memory_result.result)); instance->GetDevice().destroyBuffer(buffer); has_failed = true; return; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index e4dc5c5e9..38a874d8f 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -19,9 +19,9 @@ static constexpr size_t StagingBufferSize = 512_MB; static constexpr size_t UboStreamBufferSize = 128_MB; BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, - AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_, - PageManager& tracker_) - : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, + Vulkan::Rasterizer& rasterizer_, AmdGpu::Liverpool* liverpool_, + TextureCache& texture_cache_, PageManager& tracker_) + : instance{instance_}, scheduler{scheduler_}, rasterizer{rasterizer_}, liverpool{liverpool_}, texture_cache{texture_cache_}, tracker{tracker_}, staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize}, stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize}, @@ -324,31 +324,38 @@ BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) { } void BufferCache::MapMemory(VAddr device_addr, u64 size) { - const u64 page_start = device_addr >> BDA_PAGEBITS; - const u64 page_end = Common::DivCeil(device_addr + size, BDA_PAGESIZE); + const u64 page_start = device_addr >> CACHING_PAGEBITS; + const u64 page_end = Common::DivCeil(device_addr + size, CACHING_PAGESIZE); + auto interval = decltype(covered_regions)::interval_type::right_open(page_start, page_end); + auto interval_set = boost::icl::interval_set{interval}; + auto uncovered_ranges = interval_set - covered_regions; + if (uncovered_ranges.empty()) { + return; + } // We fill any holes within the given range boost::container::small_vector bda_addrs; - bool importing_failed = false; - u64 range_start = page_start; - u64 range_end = page_start; - const auto import_range = [&]() { - // Import the host memory - void* cpu_addr = reinterpret_cast(range_start << BDA_PAGEBITS); - const u64 range_size = (range_end - range_start) << BDA_PAGEBITS; - ImportedHostBuffer buffer(instance, scheduler, cpu_addr, range_size, vk::BufferUsageFlagBits::eShaderDeviceAddress | vk::BufferUsageFlagBits::eStorageBuffer); + for (const auto& range : uncovered_ranges) { + // import host memory + const u64 range_start = range.lower(); + const u64 range_end = range.upper(); + void* cpu_addr = reinterpret_cast(range_start << CACHING_PAGEBITS); + const u64 range_size = (range_end - range_start) << CACHING_PAGEBITS; + ImportedHostBuffer buffer(instance, scheduler, cpu_addr, range_size, + vk::BufferUsageFlagBits::eShaderDeviceAddress | + vk::BufferUsageFlagBits::eStorageBuffer); if (buffer.HasFailed()) { - importing_failed = true; + continue; } // Update BDA page table - u64 bda_addr = buffer.BufferDeviceAddress(); - u64 range = range_end - range_start; + const u64 bda_addr = buffer.BufferDeviceAddress(); + const u64 range_pages = range_end - range_start; bda_addrs.clear(); - bda_addrs.reserve(range); - for (u64 i = 0; i < range; ++i) { + bda_addrs.reserve(range_pages); + for (u64 i = 0; i < range_pages; ++i) { // TODO: we may want to mark the page as host imported // to let the shader know so that it can notify us if it // accesses the page, so we can create a GPU local buffer. - bda_addrs.push_back(bda_addr + (i << BDA_PAGEBITS)); + bda_addrs.push_back((bda_addr + (i << CACHING_PAGEBITS)) | 0x1); } WriteDataBuffer(bda_pagetable_buffer, range_start * sizeof(u64), bda_addrs.data(), bda_addrs.size() * sizeof(u64)); @@ -356,34 +363,9 @@ void BufferCache::MapMemory(VAddr device_addr, u64 size) { std::scoped_lock lk{mutex}; imported_buffers.emplace_back(std::move(buffer)); } - }; - for (; range_end < page_end; ++range_end) { - if (!bda_mapped_pages.test(range_end)) { - continue; - } - if (range_start != range_end) { - import_range(); - if (importing_failed) { - break; - } - } - range_start = range_end + 1; + // Mark the pages as covered + covered_regions += range; } - if (!importing_failed && range_start != range_end) { - import_range(); - } - // Mark the pages as mapped - for (u64 page = page_start; page < page_end; ++page) { - bda_mapped_pages.set(page); - } - if (!importing_failed) { - return; - } - // If we failed to import the memory, fall back to copying the whole map - // to GPU memory. - LOG_INFO(Render_Vulkan, "Failed to import host memory at {:#x} size {:#x}, falling back to copying", - device_addr, size); - CreateBuffer(device_addr, size); } BufferCache::OverlapResult BufferCache::ResolveOverlaps(VAddr device_addr, u32 wanted_size) { diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index dd22269aa..bab6bf98e 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -39,14 +39,11 @@ class TextureCache; class BufferCache { public: - static constexpr u32 CACHING_PAGEBITS = 12; + static constexpr u32 CACHING_PAGEBITS = 16; static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS; - static constexpr u64 DEVICE_PAGESIZE = 4_KB; - - static constexpr u64 BDA_PAGEBITS = 16; - static constexpr u64 BDA_PAGESIZE = u64{1} << BDA_PAGEBITS; - static constexpr u64 BDA_NUMPAGES = (u64{1} << (u64(40) - BDA_PAGEBITS)); - static constexpr u64 BDA_PAGETABLE_SIZE = BDA_NUMPAGES * sizeof(u64); + static constexpr u64 DEVICE_PAGESIZE = 64_KB; + static constexpr u64 CACHING_NUMPAGES = u64{1} << (40 - CACHING_PAGEBITS); + static constexpr u64 BDA_PAGETABLE_SIZE = CACHING_NUMPAGES * sizeof(u64); struct Traits { using Entry = BufferId; @@ -65,8 +62,8 @@ public: public: explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, - AmdGpu::Liverpool* liverpool, TextureCache& texture_cache, - PageManager& tracker); + Vulkan::Rasterizer& rasterizer_, AmdGpu::Liverpool* liverpool, + TextureCache& texture_cache, PageManager& tracker); ~BufferCache(); /// Returns a pointer to GDS device local buffer. @@ -124,8 +121,6 @@ public: [[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size); - void MapMemory(VAddr device_addr, u64 size); - private: template void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) { @@ -169,8 +164,11 @@ private: void DeleteBuffer(BufferId buffer_id); + void MapMemory(VAddr device_addr, u64 size); + const Vulkan::Instance& instance; Vulkan::Scheduler& scheduler; + Vulkan::Rasterizer& rasterizer; AmdGpu::Liverpool* liverpool; TextureCache& texture_cache; PageManager& tracker; @@ -178,7 +176,7 @@ private: StreamBuffer stream_buffer; Buffer gds_buffer; Buffer bda_pagetable_buffer; - std::bitset bda_mapped_pages; + boost::icl::interval_set covered_regions; std::vector imported_buffers; std::shared_mutex mutex; Common::SlotVector slot_buffers; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 33ab54f1c..a1bb9af14 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -36,7 +36,7 @@ static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) { Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_, AmdGpu::Liverpool* liverpool_) : instance{instance_}, scheduler{scheduler_}, page_manager{this}, - buffer_cache{instance, scheduler, liverpool_, texture_cache, page_manager}, + buffer_cache{instance, scheduler, *this, liverpool_, texture_cache, page_manager}, texture_cache{instance, scheduler, buffer_cache, page_manager}, liverpool{liverpool_}, memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool} { if (!Config::nullGpu()) { @@ -946,7 +946,6 @@ void Rasterizer::MapMemory(VAddr addr, u64 size) { mapped_ranges += decltype(mapped_ranges)::interval_type::right_open(addr, addr + size); } page_manager.OnGpuMap(addr, size); - buffer_cache.MapMemory(addr, size); } void Rasterizer::UnmapMemory(VAddr addr, u64 size) {