Port page_manager from readbacks-poc

This commit is contained in:
Lander Gallastegi 2025-05-16 00:56:08 +02:00
parent f155ec1663
commit 2f05b23cca
6 changed files with 140 additions and 111 deletions

View File

@ -36,7 +36,7 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal, bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal,
0, AllFlags, BDA_PAGETABLE_SIZE}, 0, AllFlags, BDA_PAGETABLE_SIZE},
fault_buffer(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, FAULT_BUFFER_SIZE), fault_buffer(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, FAULT_BUFFER_SIZE),
memory_tracker{&tracker} { memory_tracker{tracker} {
Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer"); Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer");
Vulkan::SetObjectName(instance.GetDevice(), bda_pagetable_buffer.Handle(), Vulkan::SetObjectName(instance.GetDevice(), bda_pagetable_buffer.Handle(),
"BDA Page Table Buffer"); "BDA Page Table Buffer");

View File

@ -19,11 +19,11 @@ public:
static constexpr size_t MANAGER_POOL_SIZE = 32; static constexpr size_t MANAGER_POOL_SIZE = 32;
public: public:
explicit MemoryTracker(PageManager* tracker_) : tracker{tracker_} {} explicit MemoryTracker(PageManager& tracker_) : tracker{&tracker_} {}
~MemoryTracker() = default; ~MemoryTracker() = default;
/// Returns true if a region has been modified from the CPU /// Returns true if a region has been modified from the CPU
[[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
return IteratePages<true>( return IteratePages<true>(
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
return manager->template IsRegionModified<Type::CPU>(offset, size); return manager->template IsRegionModified<Type::CPU>(offset, size);
@ -31,7 +31,7 @@ public:
} }
/// Returns true if a region has been modified from the GPU /// Returns true if a region has been modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
return IteratePages<false>( return IteratePages<false>(
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
return manager->template IsRegionModified<Type::GPU>(offset, size); return manager->template IsRegionModified<Type::GPU>(offset, size);
@ -57,8 +57,7 @@ public:
} }
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified /// Call 'func' for each CPU modified range and unmark those pages as CPU modified
template <typename Func> void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, auto&& func) {
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) {
IteratePages<true>(query_cpu_range, query_size, IteratePages<true>(query_cpu_range, query_size,
[&func](RegionManager* manager, u64 offset, size_t size) { [&func](RegionManager* manager, u64 offset, size_t size) {
manager->template ForEachModifiedRange<Type::CPU, true>( manager->template ForEachModifiedRange<Type::CPU, true>(
@ -67,17 +66,12 @@ public:
} }
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
template <bool clear, typename Func> template <bool clear>
void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, auto&& func) {
IteratePages<false>(query_cpu_range, query_size, IteratePages<false>(query_cpu_range, query_size,
[&func](RegionManager* manager, u64 offset, size_t size) { [&func](RegionManager* manager, u64 offset, size_t size) {
if constexpr (clear) { manager->template ForEachModifiedRange<Type::GPU, clear>(
manager->template ForEachModifiedRange<Type::GPU, true>( manager->GetCpuAddr() + offset, size, func);
manager->GetCpuAddr() + offset, size, func);
} else {
manager->template ForEachModifiedRange<Type::GPU, false>(
manager->GetCpuAddr() + offset, size, func);
}
}); });
} }

View File

@ -10,8 +10,9 @@
#ifdef __linux__ #ifdef __linux__
#include "common/adaptive_mutex.h" #include "common/adaptive_mutex.h"
#endif #else
#include "common/spin_lock.h" #include "common/spin_lock.h"
#endif
#include "common/types.h" #include "common/types.h"
#include "video_core/page_manager.h" #include "video_core/page_manager.h"
@ -56,7 +57,7 @@ public:
return cpu_addr; return cpu_addr;
} }
static u64 ExtractBits(u64 word, size_t page_start, size_t page_end) { static constexpr u64 ExtractBits(u64 word, size_t page_start, size_t page_end) {
constexpr size_t number_bits = sizeof(u64) * 8; constexpr size_t number_bits = sizeof(u64) * 8;
const size_t limit_page_end = number_bits - std::min(page_end, number_bits); const size_t limit_page_end = number_bits - std::min(page_end, number_bits);
u64 bits = (word >> page_start) << page_start; u64 bits = (word >> page_start) << page_start;
@ -64,7 +65,7 @@ public:
return bits; return bits;
} }
static std::pair<size_t, size_t> GetWordPage(VAddr address) { static constexpr std::pair<size_t, size_t> GetWordPage(VAddr address) {
const size_t converted_address = static_cast<size_t>(address); const size_t converted_address = static_cast<size_t>(address);
const size_t word_number = converted_address / BYTES_PER_WORD; const size_t word_number = converted_address / BYTES_PER_WORD;
const size_t amount_pages = converted_address % BYTES_PER_WORD; const size_t amount_pages = converted_address % BYTES_PER_WORD;
@ -104,13 +105,12 @@ public:
} }
} }
template <typename Func> void IteratePages(u64 mask, auto&& func) const {
void IteratePages(u64 mask, Func&& func) const {
size_t offset = 0; size_t offset = 0;
while (mask != 0) { while (mask != 0) {
const size_t empty_bits = std::countr_zero(mask); const size_t empty_bits = std::countr_zero(mask);
offset += empty_bits; offset += empty_bits;
mask = mask >> empty_bits; mask >>= empty_bits;
const size_t continuous_bits = std::countr_one(mask); const size_t continuous_bits = std::countr_one(mask);
func(offset, continuous_bits); func(offset, continuous_bits);
@ -155,8 +155,8 @@ public:
* @param size Size in bytes of the CPU range to loop over * @param size Size in bytes of the CPU range to loop over
* @param func Function to call for each turned off region * @param func Function to call for each turned off region
*/ */
template <Type type, bool clear, typename Func> template <Type type, bool clear>
void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { void ForEachModifiedRange(VAddr query_cpu_range, s64 size, auto&& func) {
std::scoped_lock lk{lock}; std::scoped_lock lk{lock};
static_assert(type != Type::Untracked); static_assert(type != Type::Untracked);
@ -177,11 +177,9 @@ public:
if constexpr (clear) { if constexpr (clear) {
if constexpr (type == Type::CPU) { if constexpr (type == Type::CPU) {
UpdateProtection<true>(index, untracked[index], mask); UpdateProtection<true>(index, untracked[index], mask);
}
state_words[index] &= ~mask;
if constexpr (type == Type::CPU) {
untracked[index] &= ~mask; untracked[index] &= ~mask;
} }
state_words[index] &= ~mask;
} }
const size_t base_offset = index * PAGES_PER_WORD; const size_t base_offset = index * PAGES_PER_WORD;
IteratePages(word, [&](size_t pages_offset, size_t pages_size) { IteratePages(word, [&](size_t pages_offset, size_t pages_size) {
@ -245,11 +243,12 @@ private:
*/ */
template <bool add_to_tracker> template <bool add_to_tracker>
void UpdateProtection(u64 word_index, u64 current_bits, u64 new_bits) const { void UpdateProtection(u64 word_index, u64 current_bits, u64 new_bits) const {
constexpr s32 delta = add_to_tracker ? 1 : -1;
u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits; u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits;
VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
IteratePages(changed_bits, [&](size_t offset, size_t size) { IteratePages(changed_bits, [&](size_t offset, size_t size) {
tracker->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, size * BYTES_PER_PAGE, tracker->UpdatePageWatchers<delta>(addr + offset * BYTES_PER_PAGE,
add_to_tracker ? 1 : -1); size * BYTES_PER_PAGE);
}); });
} }

View File

@ -1,11 +1,8 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <thread>
#include <boost/icl/interval_set.hpp> #include <boost/icl/interval_set.hpp>
#include "common/alignment.h"
#include "common/assert.h" #include "common/assert.h"
#include "common/error.h"
#include "common/signal_context.h" #include "common/signal_context.h"
#include "core/memory.h" #include "core/memory.h"
#include "core/signals.h" #include "core/signals.h"
@ -15,8 +12,10 @@
#ifndef _WIN64 #ifndef _WIN64
#include <sys/mman.h> #include <sys/mman.h>
#ifdef ENABLE_USERFAULTFD #ifdef ENABLE_USERFAULTFD
#include <thread>
#include <fcntl.h> #include <fcntl.h>
#include <linux/userfaultfd.h> #include <linux/userfaultfd.h>
#include "common/error.h"
#include <poll.h> #include <poll.h>
#include <sys/ioctl.h> #include <sys/ioctl.h>
#endif #endif
@ -24,14 +23,43 @@
#include <windows.h> #include <windows.h>
#endif #endif
#ifdef __linux__
#include "common/adaptive_mutex.h"
#else
#include "common/spin_lock.h"
#endif
namespace VideoCore { namespace VideoCore {
constexpr size_t PAGESIZE = 4_KB; constexpr size_t PAGE_SIZE = 4_KB;
constexpr size_t PAGEBITS = 12; constexpr size_t PAGE_BITS = 12;
#ifdef ENABLE_USERFAULTFD
struct PageManager::Impl { struct PageManager::Impl {
Impl(Vulkan::Rasterizer* rasterizer_) : rasterizer{rasterizer_} { struct PageState {
u8 num_watchers{};
Core::MemoryPermission Perm() const noexcept {
return num_watchers == 0 ? Core::MemoryPermission::ReadWrite
: Core::MemoryPermission::Read;
}
template <s32 delta>
u8 AddDelta() {
if constexpr (delta == 1) {
return ++num_watchers;
} else {
ASSERT_MSG(num_watchers > 0, "Not enough watchers");
return --num_watchers;
}
}
};
static constexpr size_t ADDRESS_BITS = 40;
static constexpr size_t NUM_ADDRESS_PAGES = 1ULL << (40 - PAGE_BITS);
inline static Vulkan::Rasterizer* rasterizer;
#ifdef ENABLE_USERFAULTFD
Impl(Vulkan::Rasterizer* rasterizer_) {
rasterizer = rasterizer_;
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY); uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
ASSERT_MSG(uffd != -1, "{}", Common::GetLastErrorMsg()); ASSERT_MSG(uffd != -1, "{}", Common::GetLastErrorMsg());
@ -63,7 +91,8 @@ struct PageManager::Impl {
ASSERT_MSG(ret != -1, "Uffdio unregister failed"); ASSERT_MSG(ret != -1, "Uffdio unregister failed");
} }
void Protect(VAddr address, size_t size, bool allow_write) { void Protect(VAddr address, size_t size, Core::MemoryPermission perms) {
bool allow_write = True(perms & Core::MemoryPermission::Write);
uffdio_writeprotect wp; uffdio_writeprotect wp;
wp.range.start = address; wp.range.start = address;
wp.range.len = size; wp.range.len = size;
@ -118,12 +147,9 @@ struct PageManager::Impl {
} }
} }
Vulkan::Rasterizer* rasterizer;
std::jthread ufd_thread; std::jthread ufd_thread;
int uffd; int uffd;
};
#else #else
struct PageManager::Impl {
Impl(Vulkan::Rasterizer* rasterizer_) { Impl(Vulkan::Rasterizer* rasterizer_) {
rasterizer = rasterizer_; rasterizer = rasterizer_;
@ -141,12 +167,10 @@ struct PageManager::Impl {
// No-op // No-op
} }
void Protect(VAddr address, size_t size, bool allow_write) { void Protect(VAddr address, size_t size, Core::MemoryPermission perms) {
auto* memory = Core::Memory::Instance(); auto* memory = Core::Memory::Instance();
auto& impl = memory->GetAddressSpace(); auto& impl = memory->GetAddressSpace();
impl.Protect(address, size, impl.Protect(address, size, perms);
allow_write ? Core::MemoryPermission::ReadWrite
: Core::MemoryPermission::Read);
} }
static bool GuestFaultSignalHandler(void* context, void* fault_address) { static bool GuestFaultSignalHandler(void* context, void* fault_address) {
@ -157,23 +181,65 @@ struct PageManager::Impl {
return false; return false;
} }
inline static Vulkan::Rasterizer* rasterizer;
};
#endif #endif
template <s32 delta>
void UpdatePageWatchers(VAddr addr, u64 size) {
std::scoped_lock lk(lock);
size_t page = addr >> PAGE_BITS;
auto perms = cached_pages[page].Perm();
u64 range_begin = 0;
u64 range_bytes = 0;
const auto release_pending = [&] {
if (range_bytes > 0) {
Protect(range_begin << PAGE_BITS, range_bytes, perms);
range_bytes = 0;
}
};
// Iterate requested pages
const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
for (; page != page_end; ++page) {
PageState& state = cached_pages[page];
// Apply the change to the page state
const u8 new_count = state.AddDelta<delta>();
// If the protection changed flush pending (un)protect action
if (auto new_perms = state.Perm(); new_perms != perms) [[unlikely]] {
release_pending();
perms = new_perms;
}
// If the page must be (un)protected, add it to the pending range
if ((new_count == 0 && delta < 0) || (new_count == 1 && delta > 0)) {
if (range_bytes == 0) {
range_begin = page;
}
range_bytes += PAGE_SIZE;
} else {
release_pending();
}
}
// Flush pending (un)protect action
release_pending();
}
std::array<PageState, NUM_ADDRESS_PAGES> cached_pages{};
#ifdef __linux__
Common::AdaptiveMutex lock;
#else
Common::SpinLock lock;
#endif
};
PageManager::PageManager(Vulkan::Rasterizer* rasterizer_) PageManager::PageManager(Vulkan::Rasterizer* rasterizer_)
: impl{std::make_unique<Impl>(rasterizer_)}, rasterizer{rasterizer_} {} : impl{std::make_unique<Impl>(rasterizer_)} {}
PageManager::~PageManager() = default; PageManager::~PageManager() = default;
VAddr PageManager::GetPageAddr(VAddr addr) {
return Common::AlignDown(addr, PAGESIZE);
}
VAddr PageManager::GetNextPageAddr(VAddr addr) {
return Common::AlignUp(addr + 1, PAGESIZE);
}
void PageManager::OnGpuMap(VAddr address, size_t size) { void PageManager::OnGpuMap(VAddr address, size_t size) {
impl->OnMap(address, size); impl->OnMap(address, size);
} }
@ -182,41 +248,12 @@ void PageManager::OnGpuUnmap(VAddr address, size_t size) {
impl->OnUnmap(address, size); impl->OnUnmap(address, size);
} }
void PageManager::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) { template <s32 delta>
static constexpr u64 PageShift = 12; void PageManager::UpdatePageWatchers(VAddr addr, u64 size) const {
impl->UpdatePageWatchers<delta>(addr, size);
std::scoped_lock lk{lock};
const u64 num_pages = ((addr + size - 1) >> PageShift) - (addr >> PageShift) + 1;
const u64 page_start = addr >> PageShift;
const u64 page_end = page_start + num_pages;
const auto pages_interval =
decltype(cached_pages)::interval_type::right_open(page_start, page_end);
if (delta > 0) {
cached_pages.add({pages_interval, delta});
}
const auto& range = cached_pages.equal_range(pages_interval);
for (const auto& [range, count] : boost::make_iterator_range(range)) {
const auto interval = range & pages_interval;
const VAddr interval_start_addr = boost::icl::first(interval) << PageShift;
const VAddr interval_end_addr = boost::icl::last_next(interval) << PageShift;
const u32 interval_size = interval_end_addr - interval_start_addr;
ASSERT_MSG(rasterizer->IsMapped(interval_start_addr, interval_size),
"Attempted to track non-GPU memory at address {:#x}, size {:#x}.",
interval_start_addr, interval_size);
if (delta > 0 && count == delta) {
impl->Protect(interval_start_addr, interval_size, false);
} else if (delta < 0 && count == -delta) {
impl->Protect(interval_start_addr, interval_size, true);
} else {
ASSERT(count >= 0);
}
}
if (delta < 0) {
cached_pages.add({pages_interval, delta});
}
} }
template void PageManager::UpdatePageWatchers<1>(VAddr addr, u64 size) const;
template void PageManager::UpdatePageWatchers<-1>(VAddr addr, u64 size) const;
} // namespace VideoCore } // namespace VideoCore

View File

@ -4,11 +4,7 @@
#pragma once #pragma once
#include <memory> #include <memory>
#include <boost/icl/interval_map.hpp> #include "common/alignment.h"
#ifdef __linux__
#include "common/adaptive_mutex.h"
#endif
#include "common/spin_lock.h"
#include "common/types.h" #include "common/types.h"
namespace Vulkan { namespace Vulkan {
@ -18,6 +14,8 @@ class Rasterizer;
namespace VideoCore { namespace VideoCore {
class PageManager { class PageManager {
static constexpr size_t PAGE_BITS = 12;
static constexpr size_t PAGE_SIZE = 1ULL << PAGE_BITS;
public: public:
explicit PageManager(Vulkan::Rasterizer* rasterizer); explicit PageManager(Vulkan::Rasterizer* rasterizer);
~PageManager(); ~PageManager();
@ -28,22 +26,23 @@ public:
/// Unregister a range of gpu memory that was unmapped. /// Unregister a range of gpu memory that was unmapped.
void OnGpuUnmap(VAddr address, size_t size); void OnGpuUnmap(VAddr address, size_t size);
/// Increase/decrease the number of surface in pages touching the specified region /// Updates watches in the pages touching the specified region.
void UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta); template <s32 delta>
void UpdatePageWatchers(VAddr addr, u64 size) const;
static VAddr GetPageAddr(VAddr addr); /// Returns page aligned address.
static VAddr GetNextPageAddr(VAddr addr); static constexpr VAddr GetPageAddr(VAddr addr) {
return Common::AlignDown(addr, PAGE_SIZE);
}
/// Returns address of the next page.
static constexpr VAddr GetNextPageAddr(VAddr addr) {
return Common::AlignUp(addr + 1, PAGE_SIZE);
}
private: private:
struct Impl; struct Impl;
std::unique_ptr<Impl> impl; std::unique_ptr<Impl> impl;
Vulkan::Rasterizer* rasterizer;
boost::icl::interval_map<VAddr, s32> cached_pages;
#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
Common::AdaptiveMutex lock;
#else
Common::SpinLock lock;
#endif
}; };
} // namespace VideoCore } // namespace VideoCore

View File

@ -672,7 +672,7 @@ void TextureCache::TrackImage(ImageId image_id) {
// Re-track the whole image // Re-track the whole image
image.track_addr = image_begin; image.track_addr = image_begin;
image.track_addr_end = image_end; image.track_addr_end = image_end;
tracker.UpdatePagesCachedCount(image_begin, image.info.guest_size, 1); tracker.UpdatePageWatchers<1>(image_begin, image.info.guest_size);
} else { } else {
if (image_begin < image.track_addr) { if (image_begin < image.track_addr) {
TrackImageHead(image_id); TrackImageHead(image_id);
@ -695,7 +695,7 @@ void TextureCache::TrackImageHead(ImageId image_id) {
ASSERT(image.track_addr != 0 && image_begin < image.track_addr); ASSERT(image.track_addr != 0 && image_begin < image.track_addr);
const auto size = image.track_addr - image_begin; const auto size = image.track_addr - image_begin;
image.track_addr = image_begin; image.track_addr = image_begin;
tracker.UpdatePagesCachedCount(image_begin, size, 1); tracker.UpdatePageWatchers<1>(image_begin, size);
} }
void TextureCache::TrackImageTail(ImageId image_id) { void TextureCache::TrackImageTail(ImageId image_id) {
@ -711,7 +711,7 @@ void TextureCache::TrackImageTail(ImageId image_id) {
const auto addr = image.track_addr_end; const auto addr = image.track_addr_end;
const auto size = image_end - image.track_addr_end; const auto size = image_end - image.track_addr_end;
image.track_addr_end = image_end; image.track_addr_end = image_end;
tracker.UpdatePagesCachedCount(addr, size, 1); tracker.UpdatePageWatchers<1>(addr, size);
} }
void TextureCache::UntrackImage(ImageId image_id) { void TextureCache::UntrackImage(ImageId image_id) {
@ -724,7 +724,7 @@ void TextureCache::UntrackImage(ImageId image_id) {
image.track_addr = 0; image.track_addr = 0;
image.track_addr_end = 0; image.track_addr_end = 0;
if (size != 0) { if (size != 0) {
tracker.UpdatePagesCachedCount(addr, size, -1); tracker.UpdatePageWatchers<-1>(addr, size);
} }
} }
@ -743,7 +743,7 @@ void TextureCache::UntrackImageHead(ImageId image_id) {
// Cehck its hash later. // Cehck its hash later.
MarkAsMaybeDirty(image_id, image); MarkAsMaybeDirty(image_id, image);
} }
tracker.UpdatePagesCachedCount(image_begin, size, -1); tracker.UpdatePageWatchers<-1>(image_begin, size);
} }
void TextureCache::UntrackImageTail(ImageId image_id) { void TextureCache::UntrackImageTail(ImageId image_id) {
@ -762,7 +762,7 @@ void TextureCache::UntrackImageTail(ImageId image_id) {
// Cehck its hash later. // Cehck its hash later.
MarkAsMaybeDirty(image_id, image); MarkAsMaybeDirty(image_id, image);
} }
tracker.UpdatePagesCachedCount(addr, size, -1); tracker.UpdatePageWatchers<-1>(addr, size);
} }
void TextureCache::DeleteImage(ImageId image_id) { void TextureCache::DeleteImage(ImageId image_id) {