diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 2abbcd22b..5187a8ca3 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -36,7 +36,7 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, BDA_PAGETABLE_SIZE}, fault_buffer(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, FAULT_BUFFER_SIZE), - memory_tracker{&tracker} { + memory_tracker{tracker} { Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer"); Vulkan::SetObjectName(instance.GetDevice(), bda_pagetable_buffer.Handle(), "BDA Page Table Buffer"); diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker_base.h index d9166b11c..410b2c69d 100644 --- a/src/video_core/buffer_cache/memory_tracker_base.h +++ b/src/video_core/buffer_cache/memory_tracker_base.h @@ -19,11 +19,11 @@ public: static constexpr size_t MANAGER_POOL_SIZE = 32; public: - explicit MemoryTracker(PageManager* tracker_) : tracker{tracker_} {} + explicit MemoryTracker(PageManager& tracker_) : tracker{&tracker_} {} ~MemoryTracker() = default; /// Returns true if a region has been modified from the CPU - [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { + bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { return IteratePages( query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { return manager->template IsRegionModified(offset, size); @@ -31,7 +31,7 @@ public: } /// Returns true if a region has been modified from the GPU - [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { + bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { return IteratePages( query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { return manager->template IsRegionModified(offset, size); @@ -57,8 +57,7 @@ public: } /// Call 'func' for each CPU modified range and unmark those pages as CPU modified - template - void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { + void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, auto&& func) { IteratePages(query_cpu_range, query_size, [&func](RegionManager* manager, u64 offset, size_t size) { manager->template ForEachModifiedRange( @@ -67,17 +66,12 @@ public: } /// Call 'func' for each GPU modified range and unmark those pages as GPU modified - template - void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { + template + void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, auto&& func) { IteratePages(query_cpu_range, query_size, [&func](RegionManager* manager, u64 offset, size_t size) { - if constexpr (clear) { - manager->template ForEachModifiedRange( - manager->GetCpuAddr() + offset, size, func); - } else { - manager->template ForEachModifiedRange( - manager->GetCpuAddr() + offset, size, func); - } + manager->template ForEachModifiedRange( + manager->GetCpuAddr() + offset, size, func); }); } diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h index 5ad724f96..e912b4f79 100644 --- a/src/video_core/buffer_cache/word_manager.h +++ b/src/video_core/buffer_cache/word_manager.h @@ -10,8 +10,9 @@ #ifdef __linux__ #include "common/adaptive_mutex.h" -#endif +#else #include "common/spin_lock.h" +#endif #include "common/types.h" #include "video_core/page_manager.h" @@ -56,7 +57,7 @@ public: return cpu_addr; } - static u64 ExtractBits(u64 word, size_t page_start, size_t page_end) { + static constexpr u64 ExtractBits(u64 word, size_t page_start, size_t page_end) { constexpr size_t number_bits = sizeof(u64) * 8; const size_t limit_page_end = number_bits - std::min(page_end, number_bits); u64 bits = (word >> page_start) << page_start; @@ -64,7 +65,7 @@ public: return bits; } - static std::pair GetWordPage(VAddr address) { + static constexpr std::pair GetWordPage(VAddr address) { const size_t converted_address = static_cast(address); const size_t word_number = converted_address / BYTES_PER_WORD; const size_t amount_pages = converted_address % BYTES_PER_WORD; @@ -104,13 +105,12 @@ public: } } - template - void IteratePages(u64 mask, Func&& func) const { + void IteratePages(u64 mask, auto&& func) const { size_t offset = 0; while (mask != 0) { const size_t empty_bits = std::countr_zero(mask); offset += empty_bits; - mask = mask >> empty_bits; + mask >>= empty_bits; const size_t continuous_bits = std::countr_one(mask); func(offset, continuous_bits); @@ -155,8 +155,8 @@ public: * @param size Size in bytes of the CPU range to loop over * @param func Function to call for each turned off region */ - template - void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { + template + void ForEachModifiedRange(VAddr query_cpu_range, s64 size, auto&& func) { std::scoped_lock lk{lock}; static_assert(type != Type::Untracked); @@ -177,11 +177,9 @@ public: if constexpr (clear) { if constexpr (type == Type::CPU) { UpdateProtection(index, untracked[index], mask); - } - state_words[index] &= ~mask; - if constexpr (type == Type::CPU) { untracked[index] &= ~mask; } + state_words[index] &= ~mask; } const size_t base_offset = index * PAGES_PER_WORD; IteratePages(word, [&](size_t pages_offset, size_t pages_size) { @@ -245,11 +243,12 @@ private: */ template void UpdateProtection(u64 word_index, u64 current_bits, u64 new_bits) const { + constexpr s32 delta = add_to_tracker ? 1 : -1; u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits; VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; IteratePages(changed_bits, [&](size_t offset, size_t size) { - tracker->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, size * BYTES_PER_PAGE, - add_to_tracker ? 1 : -1); + tracker->UpdatePageWatchers(addr + offset * BYTES_PER_PAGE, + size * BYTES_PER_PAGE); }); } diff --git a/src/video_core/page_manager.cpp b/src/video_core/page_manager.cpp index 47ed9e543..57f6b64f6 100644 --- a/src/video_core/page_manager.cpp +++ b/src/video_core/page_manager.cpp @@ -1,11 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include #include -#include "common/alignment.h" #include "common/assert.h" -#include "common/error.h" #include "common/signal_context.h" #include "core/memory.h" #include "core/signals.h" @@ -15,8 +12,10 @@ #ifndef _WIN64 #include #ifdef ENABLE_USERFAULTFD +#include #include #include +#include "common/error.h" #include #include #endif @@ -24,14 +23,43 @@ #include #endif +#ifdef __linux__ +#include "common/adaptive_mutex.h" +#else +#include "common/spin_lock.h" +#endif + namespace VideoCore { -constexpr size_t PAGESIZE = 4_KB; -constexpr size_t PAGEBITS = 12; +constexpr size_t PAGE_SIZE = 4_KB; +constexpr size_t PAGE_BITS = 12; -#ifdef ENABLE_USERFAULTFD struct PageManager::Impl { - Impl(Vulkan::Rasterizer* rasterizer_) : rasterizer{rasterizer_} { + struct PageState { + u8 num_watchers{}; + + Core::MemoryPermission Perm() const noexcept { + return num_watchers == 0 ? Core::MemoryPermission::ReadWrite + : Core::MemoryPermission::Read; + } + + template + u8 AddDelta() { + if constexpr (delta == 1) { + return ++num_watchers; + } else { + ASSERT_MSG(num_watchers > 0, "Not enough watchers"); + return --num_watchers; + } + } + }; + + static constexpr size_t ADDRESS_BITS = 40; + static constexpr size_t NUM_ADDRESS_PAGES = 1ULL << (40 - PAGE_BITS); + inline static Vulkan::Rasterizer* rasterizer; +#ifdef ENABLE_USERFAULTFD + Impl(Vulkan::Rasterizer* rasterizer_) { + rasterizer = rasterizer_; uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY); ASSERT_MSG(uffd != -1, "{}", Common::GetLastErrorMsg()); @@ -63,7 +91,8 @@ struct PageManager::Impl { ASSERT_MSG(ret != -1, "Uffdio unregister failed"); } - void Protect(VAddr address, size_t size, bool allow_write) { + void Protect(VAddr address, size_t size, Core::MemoryPermission perms) { + bool allow_write = True(perms & Core::MemoryPermission::Write); uffdio_writeprotect wp; wp.range.start = address; wp.range.len = size; @@ -118,12 +147,9 @@ struct PageManager::Impl { } } - Vulkan::Rasterizer* rasterizer; std::jthread ufd_thread; int uffd; -}; #else -struct PageManager::Impl { Impl(Vulkan::Rasterizer* rasterizer_) { rasterizer = rasterizer_; @@ -141,12 +167,10 @@ struct PageManager::Impl { // No-op } - void Protect(VAddr address, size_t size, bool allow_write) { + void Protect(VAddr address, size_t size, Core::MemoryPermission perms) { auto* memory = Core::Memory::Instance(); auto& impl = memory->GetAddressSpace(); - impl.Protect(address, size, - allow_write ? Core::MemoryPermission::ReadWrite - : Core::MemoryPermission::Read); + impl.Protect(address, size, perms); } static bool GuestFaultSignalHandler(void* context, void* fault_address) { @@ -157,23 +181,65 @@ struct PageManager::Impl { return false; } - inline static Vulkan::Rasterizer* rasterizer; -}; #endif + template + void UpdatePageWatchers(VAddr addr, u64 size) { + std::scoped_lock lk(lock); + + size_t page = addr >> PAGE_BITS; + auto perms = cached_pages[page].Perm(); + u64 range_begin = 0; + u64 range_bytes = 0; + + const auto release_pending = [&] { + if (range_bytes > 0) { + Protect(range_begin << PAGE_BITS, range_bytes, perms); + range_bytes = 0; + } + }; + + // Iterate requested pages + const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE); + for (; page != page_end; ++page) { + PageState& state = cached_pages[page]; + + // Apply the change to the page state + const u8 new_count = state.AddDelta(); + + // If the protection changed flush pending (un)protect action + if (auto new_perms = state.Perm(); new_perms != perms) [[unlikely]] { + release_pending(); + perms = new_perms; + } + + // If the page must be (un)protected, add it to the pending range + if ((new_count == 0 && delta < 0) || (new_count == 1 && delta > 0)) { + if (range_bytes == 0) { + range_begin = page; + } + range_bytes += PAGE_SIZE; + } else { + release_pending(); + } + } + + // Flush pending (un)protect action + release_pending(); + } + + std::array cached_pages{}; +#ifdef __linux__ + Common::AdaptiveMutex lock; +#else + Common::SpinLock lock; +#endif +}; PageManager::PageManager(Vulkan::Rasterizer* rasterizer_) - : impl{std::make_unique(rasterizer_)}, rasterizer{rasterizer_} {} + : impl{std::make_unique(rasterizer_)} {} PageManager::~PageManager() = default; -VAddr PageManager::GetPageAddr(VAddr addr) { - return Common::AlignDown(addr, PAGESIZE); -} - -VAddr PageManager::GetNextPageAddr(VAddr addr) { - return Common::AlignUp(addr + 1, PAGESIZE); -} - void PageManager::OnGpuMap(VAddr address, size_t size) { impl->OnMap(address, size); } @@ -182,41 +248,12 @@ void PageManager::OnGpuUnmap(VAddr address, size_t size) { impl->OnUnmap(address, size); } -void PageManager::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) { - static constexpr u64 PageShift = 12; - - std::scoped_lock lk{lock}; - const u64 num_pages = ((addr + size - 1) >> PageShift) - (addr >> PageShift) + 1; - const u64 page_start = addr >> PageShift; - const u64 page_end = page_start + num_pages; - - const auto pages_interval = - decltype(cached_pages)::interval_type::right_open(page_start, page_end); - if (delta > 0) { - cached_pages.add({pages_interval, delta}); - } - - const auto& range = cached_pages.equal_range(pages_interval); - for (const auto& [range, count] : boost::make_iterator_range(range)) { - const auto interval = range & pages_interval; - const VAddr interval_start_addr = boost::icl::first(interval) << PageShift; - const VAddr interval_end_addr = boost::icl::last_next(interval) << PageShift; - const u32 interval_size = interval_end_addr - interval_start_addr; - ASSERT_MSG(rasterizer->IsMapped(interval_start_addr, interval_size), - "Attempted to track non-GPU memory at address {:#x}, size {:#x}.", - interval_start_addr, interval_size); - if (delta > 0 && count == delta) { - impl->Protect(interval_start_addr, interval_size, false); - } else if (delta < 0 && count == -delta) { - impl->Protect(interval_start_addr, interval_size, true); - } else { - ASSERT(count >= 0); - } - } - - if (delta < 0) { - cached_pages.add({pages_interval, delta}); - } +template +void PageManager::UpdatePageWatchers(VAddr addr, u64 size) const { + impl->UpdatePageWatchers(addr, size); } +template void PageManager::UpdatePageWatchers<1>(VAddr addr, u64 size) const; +template void PageManager::UpdatePageWatchers<-1>(VAddr addr, u64 size) const; + } // namespace VideoCore diff --git a/src/video_core/page_manager.h b/src/video_core/page_manager.h index f6bae9641..62a0fde90 100644 --- a/src/video_core/page_manager.h +++ b/src/video_core/page_manager.h @@ -4,11 +4,7 @@ #pragma once #include -#include -#ifdef __linux__ -#include "common/adaptive_mutex.h" -#endif -#include "common/spin_lock.h" +#include "common/alignment.h" #include "common/types.h" namespace Vulkan { @@ -18,6 +14,8 @@ class Rasterizer; namespace VideoCore { class PageManager { + static constexpr size_t PAGE_BITS = 12; + static constexpr size_t PAGE_SIZE = 1ULL << PAGE_BITS; public: explicit PageManager(Vulkan::Rasterizer* rasterizer); ~PageManager(); @@ -28,22 +26,23 @@ public: /// Unregister a range of gpu memory that was unmapped. void OnGpuUnmap(VAddr address, size_t size); - /// Increase/decrease the number of surface in pages touching the specified region - void UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta); + /// Updates watches in the pages touching the specified region. + template + void UpdatePageWatchers(VAddr addr, u64 size) const; - static VAddr GetPageAddr(VAddr addr); - static VAddr GetNextPageAddr(VAddr addr); + /// Returns page aligned address. + static constexpr VAddr GetPageAddr(VAddr addr) { + return Common::AlignDown(addr, PAGE_SIZE); + } + + /// Returns address of the next page. + static constexpr VAddr GetNextPageAddr(VAddr addr) { + return Common::AlignUp(addr + 1, PAGE_SIZE); + } private: struct Impl; std::unique_ptr impl; - Vulkan::Rasterizer* rasterizer; - boost::icl::interval_map cached_pages; -#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP - Common::AdaptiveMutex lock; -#else - Common::SpinLock lock; -#endif }; } // namespace VideoCore diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 82f4d6413..409085511 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -672,7 +672,7 @@ void TextureCache::TrackImage(ImageId image_id) { // Re-track the whole image image.track_addr = image_begin; image.track_addr_end = image_end; - tracker.UpdatePagesCachedCount(image_begin, image.info.guest_size, 1); + tracker.UpdatePageWatchers<1>(image_begin, image.info.guest_size); } else { if (image_begin < image.track_addr) { TrackImageHead(image_id); @@ -695,7 +695,7 @@ void TextureCache::TrackImageHead(ImageId image_id) { ASSERT(image.track_addr != 0 && image_begin < image.track_addr); const auto size = image.track_addr - image_begin; image.track_addr = image_begin; - tracker.UpdatePagesCachedCount(image_begin, size, 1); + tracker.UpdatePageWatchers<1>(image_begin, size); } void TextureCache::TrackImageTail(ImageId image_id) { @@ -711,7 +711,7 @@ void TextureCache::TrackImageTail(ImageId image_id) { const auto addr = image.track_addr_end; const auto size = image_end - image.track_addr_end; image.track_addr_end = image_end; - tracker.UpdatePagesCachedCount(addr, size, 1); + tracker.UpdatePageWatchers<1>(addr, size); } void TextureCache::UntrackImage(ImageId image_id) { @@ -724,7 +724,7 @@ void TextureCache::UntrackImage(ImageId image_id) { image.track_addr = 0; image.track_addr_end = 0; if (size != 0) { - tracker.UpdatePagesCachedCount(addr, size, -1); + tracker.UpdatePageWatchers<-1>(addr, size); } } @@ -743,7 +743,7 @@ void TextureCache::UntrackImageHead(ImageId image_id) { // Cehck its hash later. MarkAsMaybeDirty(image_id, image); } - tracker.UpdatePagesCachedCount(image_begin, size, -1); + tracker.UpdatePageWatchers<-1>(image_begin, size); } void TextureCache::UntrackImageTail(ImageId image_id) { @@ -762,7 +762,7 @@ void TextureCache::UntrackImageTail(ImageId image_id) { // Cehck its hash later. MarkAsMaybeDirty(image_id, image); } - tracker.UpdatePagesCachedCount(addr, size, -1); + tracker.UpdatePageWatchers<-1>(addr, size); } void TextureCache::DeleteImage(ImageId image_id) {