From cfad5c9596b1620925791cc19c94c9b195161a2a Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sun, 8 Jun 2025 01:09:09 +0200 Subject: [PATCH] Batched protect progress --- CMakeLists.txt | 5 +- src/common/bit_array.h | 70 +++++++++++++++++++ src/video_core/buffer_cache/buffer_cache.h | 2 +- ...memory_tracker_base.h => memory_tracker.h} | 2 +- .../buffer_cache/region_definitions.h | 31 ++++++++ .../{word_manager.h => region_manager.h} | 24 +------ src/video_core/page_manager.cpp | 30 ++++++-- src/video_core/page_manager.h | 8 ++- .../texture_cache/texture_cache.cpp | 6 +- 9 files changed, 142 insertions(+), 36 deletions(-) rename src/video_core/buffer_cache/{memory_tracker_base.h => memory_tracker.h} (99%) create mode 100644 src/video_core/buffer_cache/region_definitions.h rename src/video_core/buffer_cache/{word_manager.h => region_manager.h} (93%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 44e3c9852..d8fe5f68b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -914,9 +914,10 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/buffer_cache/buffer.h src/video_core/buffer_cache/buffer_cache.cpp src/video_core/buffer_cache/buffer_cache.h - src/video_core/buffer_cache/memory_tracker_base.h + src/video_core/buffer_cache/memory_tracker.h src/video_core/buffer_cache/range_set.h - src/video_core/buffer_cache/word_manager.h + src/video_core/buffer_cache/region_definitions.h + src/video_core/buffer_cache/region_manager.h src/video_core/renderer_vulkan/liverpool_to_vk.cpp src/video_core/renderer_vulkan/liverpool_to_vk.h src/video_core/renderer_vulkan/vk_common.cpp diff --git a/src/common/bit_array.h b/src/common/bit_array.h index e754cf589..e0322cab0 100644 --- a/src/common/bit_array.h +++ b/src/common/bit_array.h @@ -223,6 +223,76 @@ public: return {N, N}; } + inline constexpr Range FirstRange() const { + return FirstRangeFrom(0); + } + + Range LastRegionFrom(size_t end) const { + if (end >= N) { + return {N, N}; + } + if (end == 0) { + return {0, 0}; + } + const auto find_start_bit = [&](size_t word) { +#ifdef BIT_ARRAY_USE_AVX + const __m256i all_zero = _mm256_setzero_si256(); + for (; word >= WORDS_PER_AVX; word -= WORDS_PER_AVX) { + const __m256i current = + _mm256_loadu_si256(reinterpret_cast(&data[word - WORDS_PER_AVX])); + const __m256i cmp = _mm256_cmpeq_epi64(current, all_zero); + if (_mm256_movemask_epi8(cmp) != 0xFFFFFFFF) { + break; + } + } +#endif + for (; word > 0; --word) { + if (data[word - 1] != 0) { + return word * BITS_PER_WORD - std::countl_zero(data[word - 1]); + } + } + return size_t(0); + }; + const auto word_bits = [&](size_t index, u64 word) { + const int empty_bits = std::countl_zero(word); + const int ones_count = std::countl_one(word << empty_bits); + const size_t end_bit = index * BITS_PER_WORD - empty_bits; + if (empty_bits + ones_count < BITS_PER_WORD) { + return Range{end_bit - ones_count, end_bit}; + } + return Range{find_start_bit(index - 1), end_bit}; + }; + const size_t end_word = (end - 1) / BITS_PER_WORD; + const size_t end_bit = (end - 1) % BITS_PER_WORD; + u64 masked_last = data[end_word]; + if (end_bit < BITS_PER_WORD - 1) { + masked_last &= (1ULL << (end_bit + 1)) - 1; + } + if (masked_last) { + return word_bits(end_word, masked_last); + } + size_t word = end_word - 1; +#ifdef BIT_ARRAY_USE_AVX + for (; word >= WORDS_PER_AVX; word -= WORDS_PER_AVX) { + const __m256i current = + _mm256_loadu_si256(reinterpret_cast(&data[word - WORDS_PER_AVX])); + if (!_mm256_testz_si256(current, current)) { + break; + } + } +#endif + for (; word > 0; --word) { + if (data[word - 1] != 0) { + return word_bits(word, data[word - 1]); + } + } + return {0, 0}; + } + + inline constexpr Range LastRegion() const { + return LastRegionFrom(0); + } + inline constexpr size_t Size() const { return N; } diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d7d753213..651ba84dc 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -9,7 +9,7 @@ #include "common/slot_vector.h" #include "common/types.h" #include "video_core/buffer_cache/buffer.h" -#include "video_core/buffer_cache/memory_tracker_base.h" +#include "video_core/buffer_cache/memory_tracker.h" #include "video_core/buffer_cache/range_set.h" #include "video_core/multi_level_page_table.h" diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker.h similarity index 99% rename from src/video_core/buffer_cache/memory_tracker_base.h rename to src/video_core/buffer_cache/memory_tracker.h index c60aa9c80..37fafa2d6 100644 --- a/src/video_core/buffer_cache/memory_tracker_base.h +++ b/src/video_core/buffer_cache/memory_tracker.h @@ -9,7 +9,7 @@ #include #include "common/debug.h" #include "common/types.h" -#include "video_core/buffer_cache/word_manager.h" +#include "video_core/buffer_cache/region_manager.h" namespace VideoCore { diff --git a/src/video_core/buffer_cache/region_definitions.h b/src/video_core/buffer_cache/region_definitions.h new file mode 100644 index 000000000..bb5e40c40 --- /dev/null +++ b/src/video_core/buffer_cache/region_definitions.h @@ -0,0 +1,31 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "common/bit_array.h" +#include "common/types.h" + +namespace VideoCore { + +constexpr u64 PAGES_PER_WORD = 64; +constexpr u64 BYTES_PER_PAGE = 4_KB; +constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; + +constexpr u64 HIGHER_PAGE_BITS = 22; +constexpr u64 HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; +constexpr u64 HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; +constexpr u64 NUM_REGION_WORDS = HIGHER_PAGE_SIZE / BYTES_PER_WORD; + +enum class Type { + CPU, + GPU, + Untracked, +}; + +using WordsArray = std::array; +// TODO: use this insteed of WordsArray once it is ready +using RegionBits = Common::BitArray; + +} // namespace VideoCore \ No newline at end of file diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/region_manager.h similarity index 93% rename from src/video_core/buffer_cache/word_manager.h rename to src/video_core/buffer_cache/region_manager.h index 51a912c62..e22dc6cc5 100644 --- a/src/video_core/buffer_cache/word_manager.h +++ b/src/video_core/buffer_cache/region_manager.h @@ -3,7 +3,6 @@ #pragma once -#include #include #include #include @@ -16,26 +15,10 @@ #include "common/debug.h" #include "common/types.h" #include "video_core/page_manager.h" +#include "video_core/buffer_cache/region_definitions.h" namespace VideoCore { -constexpr u64 PAGES_PER_WORD = 64; -constexpr u64 BYTES_PER_PAGE = 4_KB; -constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; - -constexpr u64 HIGHER_PAGE_BITS = 22; -constexpr u64 HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; -constexpr u64 HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; -constexpr u64 NUM_REGION_WORDS = HIGHER_PAGE_SIZE / BYTES_PER_WORD; - -enum class Type { - CPU, - GPU, - Untracked, -}; - -using WordsArray = std::array; - /** * Allows tracking CPU and GPU modification of pages in a contigious 4MB virtual address region. * Information is stored in bitsets for spacial locality and fast update of single pages. @@ -250,12 +233,11 @@ private: template void UpdateProtection(u64 word_index, u64 current_bits, u64 new_bits) const { RENDERER_TRACE; - constexpr s32 delta = add_to_tracker ? 1 : -1; u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits; VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; IteratePages(changed_bits, [&](size_t offset, size_t size) { - tracker->UpdatePageWatchers(addr + offset * BYTES_PER_PAGE, - size * BYTES_PER_PAGE); + tracker->UpdatePageWatchers(addr + offset * BYTES_PER_PAGE, + size * BYTES_PER_PAGE); }); } diff --git a/src/video_core/page_manager.cpp b/src/video_core/page_manager.cpp index 39c03e7da..0cee1ee9b 100644 --- a/src/video_core/page_manager.cpp +++ b/src/video_core/page_manager.cpp @@ -190,7 +190,7 @@ struct PageManager::Impl { } #endif - template + template void UpdatePageWatchers(VAddr addr, u64 size) { RENDERER_TRACE; boost::container::small_vector update_ranges; @@ -223,7 +223,7 @@ struct PageManager::Impl { PageState& state = cached_pages[page]; // Apply the change to the page state - const u8 new_count = state.AddDelta(); + const u8 new_count = state.AddDelta(); // If the protection changed add pending (un)protect action if (auto new_perms = state.Perm(); new_perms != perms) [[unlikely]] { @@ -232,7 +232,7 @@ struct PageManager::Impl { } // If the page must be (un)protected, add it to the pending range - if ((new_count == 0 && delta < 0) || (new_count == 1 && delta > 0)) { + if ((new_count == 0 && !track) || (new_count == 1 && track)) { if (range_bytes == 0) { range_begin = page; } @@ -252,6 +252,22 @@ struct PageManager::Impl { } } + template + void UpdatePageWatchersMasked(VAddr addr, RegionBits& mask) { + RENDERER_TRACE; + boost::container::small_vector update_ranges; + { + std::scoped_lock lk(lock); + + + } + + // Flush deferred protects + for (const auto& range : update_ranges) { + Protect(range.addr, range.size, range.perms); + } + } + std::array cached_pages{}; #ifdef __linux__ Common::AdaptiveMutex lock; @@ -273,12 +289,12 @@ void PageManager::OnGpuUnmap(VAddr address, size_t size) { impl->OnUnmap(address, size); } -template +template void PageManager::UpdatePageWatchers(VAddr addr, u64 size) const { - impl->UpdatePageWatchers(addr, size); + impl->UpdatePageWatchers(addr, size); } -template void PageManager::UpdatePageWatchers<1>(VAddr addr, u64 size) const; -template void PageManager::UpdatePageWatchers<-1>(VAddr addr, u64 size) const; +template void PageManager::UpdatePageWatchers(VAddr addr, u64 size) const; +template void PageManager::UpdatePageWatchers(VAddr addr, u64 size) const; } // namespace VideoCore diff --git a/src/video_core/page_manager.h b/src/video_core/page_manager.h index 98dd099af..59071dabc 100644 --- a/src/video_core/page_manager.h +++ b/src/video_core/page_manager.h @@ -6,6 +6,7 @@ #include #include "common/alignment.h" #include "common/types.h" +#include "video_core/buffer_cache//region_definitions.h" namespace Vulkan { class Rasterizer; @@ -28,9 +29,14 @@ public: void OnGpuUnmap(VAddr address, size_t size); /// Updates watches in the pages touching the specified region. - template + template void UpdatePageWatchers(VAddr addr, u64 size) const; + /// Updates watches in the pages touching the specified region + /// using a mask. + template + void UpdatePageWatchersMasked(VAddr addr, RegionBits& mask) const; + /// Returns page aligned address. static constexpr VAddr GetPageAddr(VAddr addr) { return Common::AlignDown(addr, PAGE_SIZE); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index a1ff5db8a..a50601af6 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -761,7 +761,7 @@ void TextureCache::UntrackImage(ImageId image_id) { image.track_addr = 0; image.track_addr_end = 0; if (size != 0) { - tracker.UpdatePageWatchers<-1>(addr, size); + tracker.UpdatePageWatchers(addr, size); } } @@ -780,7 +780,7 @@ void TextureCache::UntrackImageHead(ImageId image_id) { // Cehck its hash later. MarkAsMaybeDirty(image_id, image); } - tracker.UpdatePageWatchers<-1>(image_begin, size); + tracker.UpdatePageWatchers(image_begin, size); } void TextureCache::UntrackImageTail(ImageId image_id) { @@ -799,7 +799,7 @@ void TextureCache::UntrackImageTail(ImageId image_id) { // Cehck its hash later. MarkAsMaybeDirty(image_id, image); } - tracker.UpdatePageWatchers<-1>(addr, size); + tracker.UpdatePageWatchers(addr, size); } void TextureCache::DeleteImage(ImageId image_id) {