Batched protect progress

This commit is contained in:
Lander Gallastegi 2025-06-08 01:09:09 +02:00
parent 0a11c48693
commit cfad5c9596
9 changed files with 142 additions and 36 deletions

View File

@ -914,9 +914,10 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
src/video_core/buffer_cache/buffer.h
src/video_core/buffer_cache/buffer_cache.cpp
src/video_core/buffer_cache/buffer_cache.h
src/video_core/buffer_cache/memory_tracker_base.h
src/video_core/buffer_cache/memory_tracker.h
src/video_core/buffer_cache/range_set.h
src/video_core/buffer_cache/word_manager.h
src/video_core/buffer_cache/region_definitions.h
src/video_core/buffer_cache/region_manager.h
src/video_core/renderer_vulkan/liverpool_to_vk.cpp
src/video_core/renderer_vulkan/liverpool_to_vk.h
src/video_core/renderer_vulkan/vk_common.cpp

View File

@ -223,6 +223,76 @@ public:
return {N, N};
}
inline constexpr Range FirstRange() const {
return FirstRangeFrom(0);
}
Range LastRegionFrom(size_t end) const {
if (end >= N) {
return {N, N};
}
if (end == 0) {
return {0, 0};
}
const auto find_start_bit = [&](size_t word) {
#ifdef BIT_ARRAY_USE_AVX
const __m256i all_zero = _mm256_setzero_si256();
for (; word >= WORDS_PER_AVX; word -= WORDS_PER_AVX) {
const __m256i current =
_mm256_loadu_si256(reinterpret_cast<const __m256i*>(&data[word - WORDS_PER_AVX]));
const __m256i cmp = _mm256_cmpeq_epi64(current, all_zero);
if (_mm256_movemask_epi8(cmp) != 0xFFFFFFFF) {
break;
}
}
#endif
for (; word > 0; --word) {
if (data[word - 1] != 0) {
return word * BITS_PER_WORD - std::countl_zero(data[word - 1]);
}
}
return size_t(0);
};
const auto word_bits = [&](size_t index, u64 word) {
const int empty_bits = std::countl_zero(word);
const int ones_count = std::countl_one(word << empty_bits);
const size_t end_bit = index * BITS_PER_WORD - empty_bits;
if (empty_bits + ones_count < BITS_PER_WORD) {
return Range{end_bit - ones_count, end_bit};
}
return Range{find_start_bit(index - 1), end_bit};
};
const size_t end_word = (end - 1) / BITS_PER_WORD;
const size_t end_bit = (end - 1) % BITS_PER_WORD;
u64 masked_last = data[end_word];
if (end_bit < BITS_PER_WORD - 1) {
masked_last &= (1ULL << (end_bit + 1)) - 1;
}
if (masked_last) {
return word_bits(end_word, masked_last);
}
size_t word = end_word - 1;
#ifdef BIT_ARRAY_USE_AVX
for (; word >= WORDS_PER_AVX; word -= WORDS_PER_AVX) {
const __m256i current =
_mm256_loadu_si256(reinterpret_cast<const __m256i*>(&data[word - WORDS_PER_AVX]));
if (!_mm256_testz_si256(current, current)) {
break;
}
}
#endif
for (; word > 0; --word) {
if (data[word - 1] != 0) {
return word_bits(word, data[word - 1]);
}
}
return {0, 0};
}
inline constexpr Range LastRegion() const {
return LastRegionFrom(0);
}
inline constexpr size_t Size() const {
return N;
}

View File

@ -9,7 +9,7 @@
#include "common/slot_vector.h"
#include "common/types.h"
#include "video_core/buffer_cache/buffer.h"
#include "video_core/buffer_cache/memory_tracker_base.h"
#include "video_core/buffer_cache/memory_tracker.h"
#include "video_core/buffer_cache/range_set.h"
#include "video_core/multi_level_page_table.h"

View File

@ -9,7 +9,7 @@
#include <vector>
#include "common/debug.h"
#include "common/types.h"
#include "video_core/buffer_cache/word_manager.h"
#include "video_core/buffer_cache/region_manager.h"
namespace VideoCore {

View File

@ -0,0 +1,31 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include "common/bit_array.h"
#include "common/types.h"
namespace VideoCore {
constexpr u64 PAGES_PER_WORD = 64;
constexpr u64 BYTES_PER_PAGE = 4_KB;
constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
constexpr u64 HIGHER_PAGE_BITS = 22;
constexpr u64 HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
constexpr u64 HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
constexpr u64 NUM_REGION_WORDS = HIGHER_PAGE_SIZE / BYTES_PER_WORD;
enum class Type {
CPU,
GPU,
Untracked,
};
using WordsArray = std::array<u64, NUM_REGION_WORDS>;
// TODO: use this insteed of WordsArray once it is ready
using RegionBits = Common::BitArray<NUM_REGION_WORDS * PAGES_PER_WORD>;
} // namespace VideoCore

View File

@ -3,7 +3,6 @@
#pragma once
#include <array>
#include <mutex>
#include <span>
#include <utility>
@ -16,26 +15,10 @@
#include "common/debug.h"
#include "common/types.h"
#include "video_core/page_manager.h"
#include "video_core/buffer_cache/region_definitions.h"
namespace VideoCore {
constexpr u64 PAGES_PER_WORD = 64;
constexpr u64 BYTES_PER_PAGE = 4_KB;
constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
constexpr u64 HIGHER_PAGE_BITS = 22;
constexpr u64 HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
constexpr u64 HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
constexpr u64 NUM_REGION_WORDS = HIGHER_PAGE_SIZE / BYTES_PER_WORD;
enum class Type {
CPU,
GPU,
Untracked,
};
using WordsArray = std::array<u64, NUM_REGION_WORDS>;
/**
* Allows tracking CPU and GPU modification of pages in a contigious 4MB virtual address region.
* Information is stored in bitsets for spacial locality and fast update of single pages.
@ -250,11 +233,10 @@ private:
template <bool add_to_tracker>
void UpdateProtection(u64 word_index, u64 current_bits, u64 new_bits) const {
RENDERER_TRACE;
constexpr s32 delta = add_to_tracker ? 1 : -1;
u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits;
VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
IteratePages(changed_bits, [&](size_t offset, size_t size) {
tracker->UpdatePageWatchers<delta>(addr + offset * BYTES_PER_PAGE,
tracker->UpdatePageWatchers<add_to_tracker>(addr + offset * BYTES_PER_PAGE,
size * BYTES_PER_PAGE);
});
}

View File

@ -190,7 +190,7 @@ struct PageManager::Impl {
}
#endif
template <s32 delta>
template <bool track>
void UpdatePageWatchers(VAddr addr, u64 size) {
RENDERER_TRACE;
boost::container::small_vector<UpdateProtectRange, 16> update_ranges;
@ -223,7 +223,7 @@ struct PageManager::Impl {
PageState& state = cached_pages[page];
// Apply the change to the page state
const u8 new_count = state.AddDelta<delta>();
const u8 new_count = state.AddDelta<track ? 1 : -1>();
// If the protection changed add pending (un)protect action
if (auto new_perms = state.Perm(); new_perms != perms) [[unlikely]] {
@ -232,7 +232,7 @@ struct PageManager::Impl {
}
// If the page must be (un)protected, add it to the pending range
if ((new_count == 0 && delta < 0) || (new_count == 1 && delta > 0)) {
if ((new_count == 0 && !track) || (new_count == 1 && track)) {
if (range_bytes == 0) {
range_begin = page;
}
@ -252,6 +252,22 @@ struct PageManager::Impl {
}
}
template <bool track>
void UpdatePageWatchersMasked(VAddr addr, RegionBits& mask) {
RENDERER_TRACE;
boost::container::small_vector<UpdateProtectRange, 16> update_ranges;
{
std::scoped_lock lk(lock);
}
// Flush deferred protects
for (const auto& range : update_ranges) {
Protect(range.addr, range.size, range.perms);
}
}
std::array<PageState, NUM_ADDRESS_PAGES> cached_pages{};
#ifdef __linux__
Common::AdaptiveMutex lock;
@ -273,12 +289,12 @@ void PageManager::OnGpuUnmap(VAddr address, size_t size) {
impl->OnUnmap(address, size);
}
template <s32 delta>
template <bool track>
void PageManager::UpdatePageWatchers(VAddr addr, u64 size) const {
impl->UpdatePageWatchers<delta>(addr, size);
impl->UpdatePageWatchers<track>(addr, size);
}
template void PageManager::UpdatePageWatchers<1>(VAddr addr, u64 size) const;
template void PageManager::UpdatePageWatchers<-1>(VAddr addr, u64 size) const;
template void PageManager::UpdatePageWatchers<true>(VAddr addr, u64 size) const;
template void PageManager::UpdatePageWatchers<false>(VAddr addr, u64 size) const;
} // namespace VideoCore

View File

@ -6,6 +6,7 @@
#include <memory>
#include "common/alignment.h"
#include "common/types.h"
#include "video_core/buffer_cache//region_definitions.h"
namespace Vulkan {
class Rasterizer;
@ -28,9 +29,14 @@ public:
void OnGpuUnmap(VAddr address, size_t size);
/// Updates watches in the pages touching the specified region.
template <s32 delta>
template <bool track>
void UpdatePageWatchers(VAddr addr, u64 size) const;
/// Updates watches in the pages touching the specified region
/// using a mask.
template <bool track>
void UpdatePageWatchersMasked(VAddr addr, RegionBits& mask) const;
/// Returns page aligned address.
static constexpr VAddr GetPageAddr(VAddr addr) {
return Common::AlignDown(addr, PAGE_SIZE);

View File

@ -761,7 +761,7 @@ void TextureCache::UntrackImage(ImageId image_id) {
image.track_addr = 0;
image.track_addr_end = 0;
if (size != 0) {
tracker.UpdatePageWatchers<-1>(addr, size);
tracker.UpdatePageWatchers<false>(addr, size);
}
}
@ -780,7 +780,7 @@ void TextureCache::UntrackImageHead(ImageId image_id) {
// Cehck its hash later.
MarkAsMaybeDirty(image_id, image);
}
tracker.UpdatePageWatchers<-1>(image_begin, size);
tracker.UpdatePageWatchers<false>(image_begin, size);
}
void TextureCache::UntrackImageTail(ImageId image_id) {
@ -799,7 +799,7 @@ void TextureCache::UntrackImageTail(ImageId image_id) {
// Cehck its hash later.
MarkAsMaybeDirty(image_id, image);
}
tracker.UpdatePageWatchers<-1>(addr, size);
tracker.UpdatePageWatchers<false>(addr, size);
}
void TextureCache::DeleteImage(ImageId image_id) {