Port page_manager from readbacks-poc

This commit is contained in:
Lander Gallastegi 2025-05-16 00:56:08 +02:00
parent f155ec1663
commit 2f05b23cca
6 changed files with 140 additions and 111 deletions

View File

@ -36,7 +36,7 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal,
0, AllFlags, BDA_PAGETABLE_SIZE},
fault_buffer(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, FAULT_BUFFER_SIZE),
memory_tracker{&tracker} {
memory_tracker{tracker} {
Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer");
Vulkan::SetObjectName(instance.GetDevice(), bda_pagetable_buffer.Handle(),
"BDA Page Table Buffer");

View File

@ -19,11 +19,11 @@ public:
static constexpr size_t MANAGER_POOL_SIZE = 32;
public:
explicit MemoryTracker(PageManager* tracker_) : tracker{tracker_} {}
explicit MemoryTracker(PageManager& tracker_) : tracker{&tracker_} {}
~MemoryTracker() = default;
/// Returns true if a region has been modified from the CPU
[[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
return IteratePages<true>(
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
return manager->template IsRegionModified<Type::CPU>(offset, size);
@ -31,7 +31,7 @@ public:
}
/// Returns true if a region has been modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
return IteratePages<false>(
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
return manager->template IsRegionModified<Type::GPU>(offset, size);
@ -57,8 +57,7 @@ public:
}
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
template <typename Func>
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) {
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, auto&& func) {
IteratePages<true>(query_cpu_range, query_size,
[&func](RegionManager* manager, u64 offset, size_t size) {
manager->template ForEachModifiedRange<Type::CPU, true>(
@ -67,17 +66,12 @@ public:
}
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
template <bool clear, typename Func>
void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, Func&& func) {
template <bool clear>
void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, auto&& func) {
IteratePages<false>(query_cpu_range, query_size,
[&func](RegionManager* manager, u64 offset, size_t size) {
if constexpr (clear) {
manager->template ForEachModifiedRange<Type::GPU, true>(
manager->GetCpuAddr() + offset, size, func);
} else {
manager->template ForEachModifiedRange<Type::GPU, false>(
manager->GetCpuAddr() + offset, size, func);
}
manager->template ForEachModifiedRange<Type::GPU, clear>(
manager->GetCpuAddr() + offset, size, func);
});
}

View File

@ -10,8 +10,9 @@
#ifdef __linux__
#include "common/adaptive_mutex.h"
#endif
#else
#include "common/spin_lock.h"
#endif
#include "common/types.h"
#include "video_core/page_manager.h"
@ -56,7 +57,7 @@ public:
return cpu_addr;
}
static u64 ExtractBits(u64 word, size_t page_start, size_t page_end) {
static constexpr u64 ExtractBits(u64 word, size_t page_start, size_t page_end) {
constexpr size_t number_bits = sizeof(u64) * 8;
const size_t limit_page_end = number_bits - std::min(page_end, number_bits);
u64 bits = (word >> page_start) << page_start;
@ -64,7 +65,7 @@ public:
return bits;
}
static std::pair<size_t, size_t> GetWordPage(VAddr address) {
static constexpr std::pair<size_t, size_t> GetWordPage(VAddr address) {
const size_t converted_address = static_cast<size_t>(address);
const size_t word_number = converted_address / BYTES_PER_WORD;
const size_t amount_pages = converted_address % BYTES_PER_WORD;
@ -104,13 +105,12 @@ public:
}
}
template <typename Func>
void IteratePages(u64 mask, Func&& func) const {
void IteratePages(u64 mask, auto&& func) const {
size_t offset = 0;
while (mask != 0) {
const size_t empty_bits = std::countr_zero(mask);
offset += empty_bits;
mask = mask >> empty_bits;
mask >>= empty_bits;
const size_t continuous_bits = std::countr_one(mask);
func(offset, continuous_bits);
@ -155,8 +155,8 @@ public:
* @param size Size in bytes of the CPU range to loop over
* @param func Function to call for each turned off region
*/
template <Type type, bool clear, typename Func>
void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
template <Type type, bool clear>
void ForEachModifiedRange(VAddr query_cpu_range, s64 size, auto&& func) {
std::scoped_lock lk{lock};
static_assert(type != Type::Untracked);
@ -177,11 +177,9 @@ public:
if constexpr (clear) {
if constexpr (type == Type::CPU) {
UpdateProtection<true>(index, untracked[index], mask);
}
state_words[index] &= ~mask;
if constexpr (type == Type::CPU) {
untracked[index] &= ~mask;
}
state_words[index] &= ~mask;
}
const size_t base_offset = index * PAGES_PER_WORD;
IteratePages(word, [&](size_t pages_offset, size_t pages_size) {
@ -245,11 +243,12 @@ private:
*/
template <bool add_to_tracker>
void UpdateProtection(u64 word_index, u64 current_bits, u64 new_bits) const {
constexpr s32 delta = add_to_tracker ? 1 : -1;
u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits;
VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
IteratePages(changed_bits, [&](size_t offset, size_t size) {
tracker->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, size * BYTES_PER_PAGE,
add_to_tracker ? 1 : -1);
tracker->UpdatePageWatchers<delta>(addr + offset * BYTES_PER_PAGE,
size * BYTES_PER_PAGE);
});
}

View File

@ -1,11 +1,8 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <thread>
#include <boost/icl/interval_set.hpp>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/error.h"
#include "common/signal_context.h"
#include "core/memory.h"
#include "core/signals.h"
@ -15,8 +12,10 @@
#ifndef _WIN64
#include <sys/mman.h>
#ifdef ENABLE_USERFAULTFD
#include <thread>
#include <fcntl.h>
#include <linux/userfaultfd.h>
#include "common/error.h"
#include <poll.h>
#include <sys/ioctl.h>
#endif
@ -24,14 +23,43 @@
#include <windows.h>
#endif
#ifdef __linux__
#include "common/adaptive_mutex.h"
#else
#include "common/spin_lock.h"
#endif
namespace VideoCore {
constexpr size_t PAGESIZE = 4_KB;
constexpr size_t PAGEBITS = 12;
constexpr size_t PAGE_SIZE = 4_KB;
constexpr size_t PAGE_BITS = 12;
#ifdef ENABLE_USERFAULTFD
struct PageManager::Impl {
Impl(Vulkan::Rasterizer* rasterizer_) : rasterizer{rasterizer_} {
struct PageState {
u8 num_watchers{};
Core::MemoryPermission Perm() const noexcept {
return num_watchers == 0 ? Core::MemoryPermission::ReadWrite
: Core::MemoryPermission::Read;
}
template <s32 delta>
u8 AddDelta() {
if constexpr (delta == 1) {
return ++num_watchers;
} else {
ASSERT_MSG(num_watchers > 0, "Not enough watchers");
return --num_watchers;
}
}
};
static constexpr size_t ADDRESS_BITS = 40;
static constexpr size_t NUM_ADDRESS_PAGES = 1ULL << (40 - PAGE_BITS);
inline static Vulkan::Rasterizer* rasterizer;
#ifdef ENABLE_USERFAULTFD
Impl(Vulkan::Rasterizer* rasterizer_) {
rasterizer = rasterizer_;
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
ASSERT_MSG(uffd != -1, "{}", Common::GetLastErrorMsg());
@ -63,7 +91,8 @@ struct PageManager::Impl {
ASSERT_MSG(ret != -1, "Uffdio unregister failed");
}
void Protect(VAddr address, size_t size, bool allow_write) {
void Protect(VAddr address, size_t size, Core::MemoryPermission perms) {
bool allow_write = True(perms & Core::MemoryPermission::Write);
uffdio_writeprotect wp;
wp.range.start = address;
wp.range.len = size;
@ -118,12 +147,9 @@ struct PageManager::Impl {
}
}
Vulkan::Rasterizer* rasterizer;
std::jthread ufd_thread;
int uffd;
};
#else
struct PageManager::Impl {
Impl(Vulkan::Rasterizer* rasterizer_) {
rasterizer = rasterizer_;
@ -141,12 +167,10 @@ struct PageManager::Impl {
// No-op
}
void Protect(VAddr address, size_t size, bool allow_write) {
void Protect(VAddr address, size_t size, Core::MemoryPermission perms) {
auto* memory = Core::Memory::Instance();
auto& impl = memory->GetAddressSpace();
impl.Protect(address, size,
allow_write ? Core::MemoryPermission::ReadWrite
: Core::MemoryPermission::Read);
impl.Protect(address, size, perms);
}
static bool GuestFaultSignalHandler(void* context, void* fault_address) {
@ -157,23 +181,65 @@ struct PageManager::Impl {
return false;
}
inline static Vulkan::Rasterizer* rasterizer;
};
#endif
template <s32 delta>
void UpdatePageWatchers(VAddr addr, u64 size) {
std::scoped_lock lk(lock);
size_t page = addr >> PAGE_BITS;
auto perms = cached_pages[page].Perm();
u64 range_begin = 0;
u64 range_bytes = 0;
const auto release_pending = [&] {
if (range_bytes > 0) {
Protect(range_begin << PAGE_BITS, range_bytes, perms);
range_bytes = 0;
}
};
// Iterate requested pages
const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
for (; page != page_end; ++page) {
PageState& state = cached_pages[page];
// Apply the change to the page state
const u8 new_count = state.AddDelta<delta>();
// If the protection changed flush pending (un)protect action
if (auto new_perms = state.Perm(); new_perms != perms) [[unlikely]] {
release_pending();
perms = new_perms;
}
// If the page must be (un)protected, add it to the pending range
if ((new_count == 0 && delta < 0) || (new_count == 1 && delta > 0)) {
if (range_bytes == 0) {
range_begin = page;
}
range_bytes += PAGE_SIZE;
} else {
release_pending();
}
}
// Flush pending (un)protect action
release_pending();
}
std::array<PageState, NUM_ADDRESS_PAGES> cached_pages{};
#ifdef __linux__
Common::AdaptiveMutex lock;
#else
Common::SpinLock lock;
#endif
};
PageManager::PageManager(Vulkan::Rasterizer* rasterizer_)
: impl{std::make_unique<Impl>(rasterizer_)}, rasterizer{rasterizer_} {}
: impl{std::make_unique<Impl>(rasterizer_)} {}
PageManager::~PageManager() = default;
VAddr PageManager::GetPageAddr(VAddr addr) {
return Common::AlignDown(addr, PAGESIZE);
}
VAddr PageManager::GetNextPageAddr(VAddr addr) {
return Common::AlignUp(addr + 1, PAGESIZE);
}
void PageManager::OnGpuMap(VAddr address, size_t size) {
impl->OnMap(address, size);
}
@ -182,41 +248,12 @@ void PageManager::OnGpuUnmap(VAddr address, size_t size) {
impl->OnUnmap(address, size);
}
void PageManager::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) {
static constexpr u64 PageShift = 12;
std::scoped_lock lk{lock};
const u64 num_pages = ((addr + size - 1) >> PageShift) - (addr >> PageShift) + 1;
const u64 page_start = addr >> PageShift;
const u64 page_end = page_start + num_pages;
const auto pages_interval =
decltype(cached_pages)::interval_type::right_open(page_start, page_end);
if (delta > 0) {
cached_pages.add({pages_interval, delta});
}
const auto& range = cached_pages.equal_range(pages_interval);
for (const auto& [range, count] : boost::make_iterator_range(range)) {
const auto interval = range & pages_interval;
const VAddr interval_start_addr = boost::icl::first(interval) << PageShift;
const VAddr interval_end_addr = boost::icl::last_next(interval) << PageShift;
const u32 interval_size = interval_end_addr - interval_start_addr;
ASSERT_MSG(rasterizer->IsMapped(interval_start_addr, interval_size),
"Attempted to track non-GPU memory at address {:#x}, size {:#x}.",
interval_start_addr, interval_size);
if (delta > 0 && count == delta) {
impl->Protect(interval_start_addr, interval_size, false);
} else if (delta < 0 && count == -delta) {
impl->Protect(interval_start_addr, interval_size, true);
} else {
ASSERT(count >= 0);
}
}
if (delta < 0) {
cached_pages.add({pages_interval, delta});
}
template <s32 delta>
void PageManager::UpdatePageWatchers(VAddr addr, u64 size) const {
impl->UpdatePageWatchers<delta>(addr, size);
}
template void PageManager::UpdatePageWatchers<1>(VAddr addr, u64 size) const;
template void PageManager::UpdatePageWatchers<-1>(VAddr addr, u64 size) const;
} // namespace VideoCore

View File

@ -4,11 +4,7 @@
#pragma once
#include <memory>
#include <boost/icl/interval_map.hpp>
#ifdef __linux__
#include "common/adaptive_mutex.h"
#endif
#include "common/spin_lock.h"
#include "common/alignment.h"
#include "common/types.h"
namespace Vulkan {
@ -18,6 +14,8 @@ class Rasterizer;
namespace VideoCore {
class PageManager {
static constexpr size_t PAGE_BITS = 12;
static constexpr size_t PAGE_SIZE = 1ULL << PAGE_BITS;
public:
explicit PageManager(Vulkan::Rasterizer* rasterizer);
~PageManager();
@ -28,22 +26,23 @@ public:
/// Unregister a range of gpu memory that was unmapped.
void OnGpuUnmap(VAddr address, size_t size);
/// Increase/decrease the number of surface in pages touching the specified region
void UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta);
/// Updates watches in the pages touching the specified region.
template <s32 delta>
void UpdatePageWatchers(VAddr addr, u64 size) const;
static VAddr GetPageAddr(VAddr addr);
static VAddr GetNextPageAddr(VAddr addr);
/// Returns page aligned address.
static constexpr VAddr GetPageAddr(VAddr addr) {
return Common::AlignDown(addr, PAGE_SIZE);
}
/// Returns address of the next page.
static constexpr VAddr GetNextPageAddr(VAddr addr) {
return Common::AlignUp(addr + 1, PAGE_SIZE);
}
private:
struct Impl;
std::unique_ptr<Impl> impl;
Vulkan::Rasterizer* rasterizer;
boost::icl::interval_map<VAddr, s32> cached_pages;
#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
Common::AdaptiveMutex lock;
#else
Common::SpinLock lock;
#endif
};
} // namespace VideoCore

View File

@ -672,7 +672,7 @@ void TextureCache::TrackImage(ImageId image_id) {
// Re-track the whole image
image.track_addr = image_begin;
image.track_addr_end = image_end;
tracker.UpdatePagesCachedCount(image_begin, image.info.guest_size, 1);
tracker.UpdatePageWatchers<1>(image_begin, image.info.guest_size);
} else {
if (image_begin < image.track_addr) {
TrackImageHead(image_id);
@ -695,7 +695,7 @@ void TextureCache::TrackImageHead(ImageId image_id) {
ASSERT(image.track_addr != 0 && image_begin < image.track_addr);
const auto size = image.track_addr - image_begin;
image.track_addr = image_begin;
tracker.UpdatePagesCachedCount(image_begin, size, 1);
tracker.UpdatePageWatchers<1>(image_begin, size);
}
void TextureCache::TrackImageTail(ImageId image_id) {
@ -711,7 +711,7 @@ void TextureCache::TrackImageTail(ImageId image_id) {
const auto addr = image.track_addr_end;
const auto size = image_end - image.track_addr_end;
image.track_addr_end = image_end;
tracker.UpdatePagesCachedCount(addr, size, 1);
tracker.UpdatePageWatchers<1>(addr, size);
}
void TextureCache::UntrackImage(ImageId image_id) {
@ -724,7 +724,7 @@ void TextureCache::UntrackImage(ImageId image_id) {
image.track_addr = 0;
image.track_addr_end = 0;
if (size != 0) {
tracker.UpdatePagesCachedCount(addr, size, -1);
tracker.UpdatePageWatchers<-1>(addr, size);
}
}
@ -743,7 +743,7 @@ void TextureCache::UntrackImageHead(ImageId image_id) {
// Cehck its hash later.
MarkAsMaybeDirty(image_id, image);
}
tracker.UpdatePagesCachedCount(image_begin, size, -1);
tracker.UpdatePageWatchers<-1>(image_begin, size);
}
void TextureCache::UntrackImageTail(ImageId image_id) {
@ -762,7 +762,7 @@ void TextureCache::UntrackImageTail(ImageId image_id) {
// Cehck its hash later.
MarkAsMaybeDirty(image_id, image);
}
tracker.UpdatePagesCachedCount(addr, size, -1);
tracker.UpdatePageWatchers<-1>(addr, size);
}
void TextureCache::DeleteImage(ImageId image_id) {