diff --git a/src/video_core/buffer_cache/memory_tracker.h b/src/video_core/buffer_cache/memory_tracker.h index 37fafa2d6..3dbffdabd 100644 --- a/src/video_core/buffer_cache/memory_tracker.h +++ b/src/video_core/buffer_cache/memory_tracker.h @@ -16,7 +16,7 @@ namespace VideoCore { class MemoryTracker { public: static constexpr size_t MAX_CPU_PAGE_BITS = 40; - static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); + static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - TRACKER_HIGHER_PAGE_BITS); static constexpr size_t MANAGER_POOL_SIZE = 32; public: @@ -90,11 +90,11 @@ private: using FuncReturn = typename std::invoke_result::type; static constexpr bool BOOL_BREAK = std::is_same_v; std::size_t remaining_size{size}; - std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS}; - u64 page_offset{cpu_address & HIGHER_PAGE_MASK}; + std::size_t page_index{cpu_address >> TRACKER_HIGHER_PAGE_BITS}; + u64 page_offset{cpu_address & TRACKER_HIGHER_PAGE_MASK}; while (remaining_size > 0) { const std::size_t copy_amount{ - std::min(HIGHER_PAGE_SIZE - page_offset, remaining_size)}; + std::min(TRACKER_HIGHER_PAGE_SIZE - page_offset, remaining_size)}; auto* manager{top_tier[page_index]}; if (manager) { if constexpr (BOOL_BREAK) { @@ -123,7 +123,7 @@ private: } void CreateRegion(std::size_t page_index) { - const VAddr base_cpu_addr = page_index << HIGHER_PAGE_BITS; + const VAddr base_cpu_addr = page_index << TRACKER_HIGHER_PAGE_BITS; if (free_managers.empty()) { manager_pool.emplace_back(); auto& last_pool = manager_pool.back(); diff --git a/src/video_core/buffer_cache/region_definitions.h b/src/video_core/buffer_cache/region_definitions.h index 80c6afdc6..f035704d9 100644 --- a/src/video_core/buffer_cache/region_definitions.h +++ b/src/video_core/buffer_cache/region_definitions.h @@ -9,13 +9,13 @@ namespace VideoCore { -constexpr u64 PAGES_PER_WORD = 64; -constexpr u64 BYTES_PER_PAGE = 4_KB; +constexpr u64 TRACKER_PAGE_BITS = 12; // 4K pages +constexpr u64 TRACKER_BYTES_PER_PAGE = 1ULL << TRACKER_PAGE_BITS; -constexpr u64 HIGHER_PAGE_BITS = 22; -constexpr u64 HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; -constexpr u64 HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; -constexpr u64 NUM_REGION_PAGES = HIGHER_PAGE_SIZE / BYTES_PER_PAGE; +constexpr u64 TRACKER_HIGHER_PAGE_BITS = 24; // each region is 16MB +constexpr u64 TRACKER_HIGHER_PAGE_SIZE = 1ULL << TRACKER_HIGHER_PAGE_BITS; +constexpr u64 TRACKER_HIGHER_PAGE_MASK = TRACKER_HIGHER_PAGE_SIZE - 1ULL; +constexpr u64 NUM_PAGES_PER_REGION = TRACKER_HIGHER_PAGE_SIZE / TRACKER_BYTES_PER_PAGE; enum class Type { CPU, @@ -23,6 +23,6 @@ enum class Type { Writeable, }; -using RegionBits = Common::BitArray; +using RegionBits = Common::BitArray; } // namespace VideoCore \ No newline at end of file diff --git a/src/video_core/buffer_cache/region_manager.h b/src/video_core/buffer_cache/region_manager.h index 07ffee36b..19d0d700e 100644 --- a/src/video_core/buffer_cache/region_manager.h +++ b/src/video_core/buffer_cache/region_manager.h @@ -83,9 +83,9 @@ public: void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) { RENDERER_TRACE; const size_t offset = dirty_addr - cpu_addr; - const size_t start_page = SanitizeAddress(offset) / BYTES_PER_PAGE; - const size_t end_page = Common::DivCeil(SanitizeAddress(offset + size), BYTES_PER_PAGE); - if (start_page >= NUM_REGION_PAGES || end_page <= start_page) { + const size_t start_page = SanitizeAddress(offset) / TRACKER_BYTES_PER_PAGE; + const size_t end_page = Common::DivCeil(SanitizeAddress(offset + size), TRACKER_BYTES_PER_PAGE); + if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) { return; } std::scoped_lock lk{lock}; @@ -114,9 +114,9 @@ public: void ForEachModifiedRange(VAddr query_cpu_range, s64 size, auto&& func) { RENDERER_TRACE; const size_t offset = query_cpu_range - cpu_addr; - const size_t start_page = SanitizeAddress(offset) / BYTES_PER_PAGE; - const size_t end_page = Common::DivCeil(SanitizeAddress(offset + size), BYTES_PER_PAGE); - if (start_page >= NUM_REGION_PAGES || end_page <= start_page) { + const size_t start_page = SanitizeAddress(offset) / TRACKER_BYTES_PER_PAGE; + const size_t end_page = Common::DivCeil(SanitizeAddress(offset + size), TRACKER_BYTES_PER_PAGE); + if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) { return; } std::scoped_lock lk{lock}; @@ -131,7 +131,7 @@ public: } for (const auto& [start, end] : mask) { - func(cpu_addr + start * BYTES_PER_PAGE, (end - start) * BYTES_PER_PAGE); + func(cpu_addr + start * TRACKER_BYTES_PER_PAGE, (end - start) * TRACKER_BYTES_PER_PAGE); } if constexpr (clear) { @@ -151,9 +151,9 @@ public: template [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { RENDERER_TRACE; - const size_t start_page = SanitizeAddress(offset) / BYTES_PER_PAGE; - const size_t end_page = Common::DivCeil(SanitizeAddress(offset + size), BYTES_PER_PAGE); - if (start_page >= NUM_REGION_PAGES || end_page <= start_page) { + const size_t start_page = SanitizeAddress(offset) / TRACKER_BYTES_PER_PAGE; + const size_t end_page = Common::DivCeil(SanitizeAddress(offset + size), TRACKER_BYTES_PER_PAGE); + if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) { return false; } // std::scoped_lock lk{lock}; // Is this needed? diff --git a/src/video_core/page_manager.cpp b/src/video_core/page_manager.cpp index 145779070..6495b8a69 100644 --- a/src/video_core/page_manager.cpp +++ b/src/video_core/page_manager.cpp @@ -59,6 +59,7 @@ struct PageManager::Impl { static constexpr size_t ADDRESS_BITS = 40; static constexpr size_t NUM_ADDRESS_PAGES = 1ULL << (40 - PAGE_BITS); + static constexpr size_t NUM_ADDRESS_LOCKS = NUM_ADDRESS_PAGES / PAGES_PER_LOCK; inline static Vulkan::Rasterizer* rasterizer; #ifdef ENABLE_USERFAULTFD Impl(Vulkan::Rasterizer* rasterizer_) { @@ -189,11 +190,20 @@ struct PageManager::Impl { template void UpdatePageWatchers(VAddr addr, u64 size) { RENDERER_TRACE; - + size_t page = addr >> PAGE_BITS; + const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE); + + const size_t lock_start = page / PAGES_PER_LOCK; + const size_t lock_end = Common::DivCeil(page_end, PAGES_PER_LOCK); + for (size_t i = lock_start; i < lock_end; ++i) { + locks[i].lock(); + } + auto perms = cached_pages[page].Perm(); u64 range_begin = 0; u64 range_bytes = 0; + u64 potential_range_bytes = 0; const auto release_pending = [&] { if (range_bytes > 0) { @@ -201,13 +211,12 @@ struct PageManager::Impl { // Perform pending (un)protect action Protect(range_begin << PAGE_BITS, range_bytes, perms); range_bytes = 0; + potential_range_bytes = 0; } }; - std::scoped_lock lk(lock); // Iterate requested pages - const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE); const u64 aligned_addr = page << PAGE_BITS; const u64 aligned_end = page_end << PAGE_BITS; ASSERT_MSG(rasterizer->IsMapped(aligned_addr, aligned_end - aligned_addr), @@ -225,19 +234,29 @@ struct PageManager::Impl { release_pending(); perms = new_perms; } else if (range_bytes != 0) { - // If the protection did not change, extend the current range - range_bytes += PAGE_SIZE; + // If the protection did not change, extend the potential range + potential_range_bytes += PAGE_SIZE; } // Only start a new range if the page must be (un)protected - if (range_bytes == 0 && ((new_count == 0 && !track) || (new_count == 1 && track))) { - range_begin = page; - range_bytes = PAGE_SIZE; + if ((new_count == 0 && !track) || (new_count == 1 && track)) { + if (range_bytes == 0) { + // Start a new potential range + range_begin = page; + potential_range_bytes = PAGE_SIZE; + } + // Extend current range up to potential range + range_bytes = potential_range_bytes; } } // Add pending (un)protect action release_pending(); + + // Unlock all locks + for (size_t i = lock_start; i < lock_end; ++i) { + locks[i].unlock(); + } } template @@ -254,11 +273,14 @@ struct PageManager::Impl { UpdatePageWatchers(start_addr, size); return; } - + size_t base_page = (base_addr >> PAGE_BITS); + ASSERT(base_page % PAGES_PER_LOCK == 0); + std::scoped_lock lk(locks[base_page / PAGES_PER_LOCK]); auto perms = cached_pages[base_page + start_range.first].Perm(); u64 range_begin = 0; u64 range_bytes = 0; + u64 potential_range_bytes = 0; const auto release_pending = [&] { if (range_bytes > 0) { @@ -266,11 +288,10 @@ struct PageManager::Impl { // Perform pending (un)protect action Protect((range_begin << PAGE_BITS), range_bytes, perms); range_bytes = 0; + potential_range_bytes = 0; } }; - std::scoped_lock lk(lock); - // Iterate pages for (size_t page = start_range.first; page < end_range.second; ++page) { PageState& state = cached_pages[base_page + page]; @@ -284,8 +305,8 @@ struct PageManager::Impl { release_pending(); perms = new_perms; } else if (range_bytes != 0) { - // If the protection did not change, extend the current range - range_bytes += PAGE_SIZE; + // If the protection did not change, extend the potential range + potential_range_bytes += PAGE_SIZE; } // If the page is not being updated, skip it @@ -293,10 +314,15 @@ struct PageManager::Impl { continue; } - // Only start a new range if the page must be (un)protected - if (range_bytes == 0 && ((new_count == 0 && !track) || (new_count == 1 && track))) { - range_begin = base_page + page; - range_bytes = PAGE_SIZE; + // If the page must be (un)protected + if ((new_count == 0 && !track) || (new_count == 1 && track)) { + if (range_bytes == 0) { + // Start a new potential range + range_begin = base_page + page; + potential_range_bytes = PAGE_SIZE; + } + // Extend current rango up to potential range + range_bytes = potential_range_bytes; } } @@ -306,10 +332,11 @@ struct PageManager::Impl { std::array cached_pages{}; #ifdef __linux__ - Common::AdaptiveMutex lock; + using LockType = Common::AdaptiveMutex; #else - Common::SpinLock lock; + using LockType = Common::SpinLock; #endif + std::array locks{}; }; PageManager::PageManager(Vulkan::Rasterizer* rasterizer_) diff --git a/src/video_core/page_manager.h b/src/video_core/page_manager.h index 157b34984..561087ead 100644 --- a/src/video_core/page_manager.h +++ b/src/video_core/page_manager.h @@ -15,8 +15,13 @@ class Rasterizer; namespace VideoCore { class PageManager { - static constexpr size_t PAGE_BITS = 12; - static constexpr size_t PAGE_SIZE = 1ULL << PAGE_BITS; + // Use the same page size as the tracker. + static constexpr size_t PAGE_BITS = TRACKER_PAGE_BITS; + static constexpr size_t PAGE_SIZE = TRACKER_BYTES_PER_PAGE; + + // Keep the lock granularity the same as region granularity. (since each regions has + // itself a lock) + static constexpr size_t PAGES_PER_LOCK = NUM_PAGES_PER_REGION; public: explicit PageManager(Vulkan::Rasterizer* rasterizer);