mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-23 18:45:36 +00:00
Batched protect progress
This commit is contained in:
parent
0a11c48693
commit
cfad5c9596
@ -914,9 +914,10 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
|
|||||||
src/video_core/buffer_cache/buffer.h
|
src/video_core/buffer_cache/buffer.h
|
||||||
src/video_core/buffer_cache/buffer_cache.cpp
|
src/video_core/buffer_cache/buffer_cache.cpp
|
||||||
src/video_core/buffer_cache/buffer_cache.h
|
src/video_core/buffer_cache/buffer_cache.h
|
||||||
src/video_core/buffer_cache/memory_tracker_base.h
|
src/video_core/buffer_cache/memory_tracker.h
|
||||||
src/video_core/buffer_cache/range_set.h
|
src/video_core/buffer_cache/range_set.h
|
||||||
src/video_core/buffer_cache/word_manager.h
|
src/video_core/buffer_cache/region_definitions.h
|
||||||
|
src/video_core/buffer_cache/region_manager.h
|
||||||
src/video_core/renderer_vulkan/liverpool_to_vk.cpp
|
src/video_core/renderer_vulkan/liverpool_to_vk.cpp
|
||||||
src/video_core/renderer_vulkan/liverpool_to_vk.h
|
src/video_core/renderer_vulkan/liverpool_to_vk.h
|
||||||
src/video_core/renderer_vulkan/vk_common.cpp
|
src/video_core/renderer_vulkan/vk_common.cpp
|
||||||
|
@ -223,6 +223,76 @@ public:
|
|||||||
return {N, N};
|
return {N, N};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline constexpr Range FirstRange() const {
|
||||||
|
return FirstRangeFrom(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
Range LastRegionFrom(size_t end) const {
|
||||||
|
if (end >= N) {
|
||||||
|
return {N, N};
|
||||||
|
}
|
||||||
|
if (end == 0) {
|
||||||
|
return {0, 0};
|
||||||
|
}
|
||||||
|
const auto find_start_bit = [&](size_t word) {
|
||||||
|
#ifdef BIT_ARRAY_USE_AVX
|
||||||
|
const __m256i all_zero = _mm256_setzero_si256();
|
||||||
|
for (; word >= WORDS_PER_AVX; word -= WORDS_PER_AVX) {
|
||||||
|
const __m256i current =
|
||||||
|
_mm256_loadu_si256(reinterpret_cast<const __m256i*>(&data[word - WORDS_PER_AVX]));
|
||||||
|
const __m256i cmp = _mm256_cmpeq_epi64(current, all_zero);
|
||||||
|
if (_mm256_movemask_epi8(cmp) != 0xFFFFFFFF) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
for (; word > 0; --word) {
|
||||||
|
if (data[word - 1] != 0) {
|
||||||
|
return word * BITS_PER_WORD - std::countl_zero(data[word - 1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return size_t(0);
|
||||||
|
};
|
||||||
|
const auto word_bits = [&](size_t index, u64 word) {
|
||||||
|
const int empty_bits = std::countl_zero(word);
|
||||||
|
const int ones_count = std::countl_one(word << empty_bits);
|
||||||
|
const size_t end_bit = index * BITS_PER_WORD - empty_bits;
|
||||||
|
if (empty_bits + ones_count < BITS_PER_WORD) {
|
||||||
|
return Range{end_bit - ones_count, end_bit};
|
||||||
|
}
|
||||||
|
return Range{find_start_bit(index - 1), end_bit};
|
||||||
|
};
|
||||||
|
const size_t end_word = (end - 1) / BITS_PER_WORD;
|
||||||
|
const size_t end_bit = (end - 1) % BITS_PER_WORD;
|
||||||
|
u64 masked_last = data[end_word];
|
||||||
|
if (end_bit < BITS_PER_WORD - 1) {
|
||||||
|
masked_last &= (1ULL << (end_bit + 1)) - 1;
|
||||||
|
}
|
||||||
|
if (masked_last) {
|
||||||
|
return word_bits(end_word, masked_last);
|
||||||
|
}
|
||||||
|
size_t word = end_word - 1;
|
||||||
|
#ifdef BIT_ARRAY_USE_AVX
|
||||||
|
for (; word >= WORDS_PER_AVX; word -= WORDS_PER_AVX) {
|
||||||
|
const __m256i current =
|
||||||
|
_mm256_loadu_si256(reinterpret_cast<const __m256i*>(&data[word - WORDS_PER_AVX]));
|
||||||
|
if (!_mm256_testz_si256(current, current)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
for (; word > 0; --word) {
|
||||||
|
if (data[word - 1] != 0) {
|
||||||
|
return word_bits(word, data[word - 1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return {0, 0};
|
||||||
|
}
|
||||||
|
|
||||||
|
inline constexpr Range LastRegion() const {
|
||||||
|
return LastRegionFrom(0);
|
||||||
|
}
|
||||||
|
|
||||||
inline constexpr size_t Size() const {
|
inline constexpr size_t Size() const {
|
||||||
return N;
|
return N;
|
||||||
}
|
}
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
#include "common/slot_vector.h"
|
#include "common/slot_vector.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "video_core/buffer_cache/buffer.h"
|
#include "video_core/buffer_cache/buffer.h"
|
||||||
#include "video_core/buffer_cache/memory_tracker_base.h"
|
#include "video_core/buffer_cache/memory_tracker.h"
|
||||||
#include "video_core/buffer_cache/range_set.h"
|
#include "video_core/buffer_cache/range_set.h"
|
||||||
#include "video_core/multi_level_page_table.h"
|
#include "video_core/multi_level_page_table.h"
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include "common/debug.h"
|
#include "common/debug.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "video_core/buffer_cache/word_manager.h"
|
#include "video_core/buffer_cache/region_manager.h"
|
||||||
|
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
|
31
src/video_core/buffer_cache/region_definitions.h
Normal file
31
src/video_core/buffer_cache/region_definitions.h
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include "common/bit_array.h"
|
||||||
|
#include "common/types.h"
|
||||||
|
|
||||||
|
namespace VideoCore {
|
||||||
|
|
||||||
|
constexpr u64 PAGES_PER_WORD = 64;
|
||||||
|
constexpr u64 BYTES_PER_PAGE = 4_KB;
|
||||||
|
constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
|
||||||
|
|
||||||
|
constexpr u64 HIGHER_PAGE_BITS = 22;
|
||||||
|
constexpr u64 HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
|
||||||
|
constexpr u64 HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
|
||||||
|
constexpr u64 NUM_REGION_WORDS = HIGHER_PAGE_SIZE / BYTES_PER_WORD;
|
||||||
|
|
||||||
|
enum class Type {
|
||||||
|
CPU,
|
||||||
|
GPU,
|
||||||
|
Untracked,
|
||||||
|
};
|
||||||
|
|
||||||
|
using WordsArray = std::array<u64, NUM_REGION_WORDS>;
|
||||||
|
// TODO: use this insteed of WordsArray once it is ready
|
||||||
|
using RegionBits = Common::BitArray<NUM_REGION_WORDS * PAGES_PER_WORD>;
|
||||||
|
|
||||||
|
} // namespace VideoCore
|
@ -3,7 +3,6 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <array>
|
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <span>
|
#include <span>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
@ -16,26 +15,10 @@
|
|||||||
#include "common/debug.h"
|
#include "common/debug.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "video_core/page_manager.h"
|
#include "video_core/page_manager.h"
|
||||||
|
#include "video_core/buffer_cache/region_definitions.h"
|
||||||
|
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
|
||||||
constexpr u64 PAGES_PER_WORD = 64;
|
|
||||||
constexpr u64 BYTES_PER_PAGE = 4_KB;
|
|
||||||
constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
|
|
||||||
|
|
||||||
constexpr u64 HIGHER_PAGE_BITS = 22;
|
|
||||||
constexpr u64 HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
|
|
||||||
constexpr u64 HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
|
|
||||||
constexpr u64 NUM_REGION_WORDS = HIGHER_PAGE_SIZE / BYTES_PER_WORD;
|
|
||||||
|
|
||||||
enum class Type {
|
|
||||||
CPU,
|
|
||||||
GPU,
|
|
||||||
Untracked,
|
|
||||||
};
|
|
||||||
|
|
||||||
using WordsArray = std::array<u64, NUM_REGION_WORDS>;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Allows tracking CPU and GPU modification of pages in a contigious 4MB virtual address region.
|
* Allows tracking CPU and GPU modification of pages in a contigious 4MB virtual address region.
|
||||||
* Information is stored in bitsets for spacial locality and fast update of single pages.
|
* Information is stored in bitsets for spacial locality and fast update of single pages.
|
||||||
@ -250,12 +233,11 @@ private:
|
|||||||
template <bool add_to_tracker>
|
template <bool add_to_tracker>
|
||||||
void UpdateProtection(u64 word_index, u64 current_bits, u64 new_bits) const {
|
void UpdateProtection(u64 word_index, u64 current_bits, u64 new_bits) const {
|
||||||
RENDERER_TRACE;
|
RENDERER_TRACE;
|
||||||
constexpr s32 delta = add_to_tracker ? 1 : -1;
|
|
||||||
u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits;
|
u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits;
|
||||||
VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
|
VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
|
||||||
IteratePages(changed_bits, [&](size_t offset, size_t size) {
|
IteratePages(changed_bits, [&](size_t offset, size_t size) {
|
||||||
tracker->UpdatePageWatchers<delta>(addr + offset * BYTES_PER_PAGE,
|
tracker->UpdatePageWatchers<add_to_tracker>(addr + offset * BYTES_PER_PAGE,
|
||||||
size * BYTES_PER_PAGE);
|
size * BYTES_PER_PAGE);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
@ -190,7 +190,7 @@ struct PageManager::Impl {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
template <s32 delta>
|
template <bool track>
|
||||||
void UpdatePageWatchers(VAddr addr, u64 size) {
|
void UpdatePageWatchers(VAddr addr, u64 size) {
|
||||||
RENDERER_TRACE;
|
RENDERER_TRACE;
|
||||||
boost::container::small_vector<UpdateProtectRange, 16> update_ranges;
|
boost::container::small_vector<UpdateProtectRange, 16> update_ranges;
|
||||||
@ -223,7 +223,7 @@ struct PageManager::Impl {
|
|||||||
PageState& state = cached_pages[page];
|
PageState& state = cached_pages[page];
|
||||||
|
|
||||||
// Apply the change to the page state
|
// Apply the change to the page state
|
||||||
const u8 new_count = state.AddDelta<delta>();
|
const u8 new_count = state.AddDelta<track ? 1 : -1>();
|
||||||
|
|
||||||
// If the protection changed add pending (un)protect action
|
// If the protection changed add pending (un)protect action
|
||||||
if (auto new_perms = state.Perm(); new_perms != perms) [[unlikely]] {
|
if (auto new_perms = state.Perm(); new_perms != perms) [[unlikely]] {
|
||||||
@ -232,7 +232,7 @@ struct PageManager::Impl {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// If the page must be (un)protected, add it to the pending range
|
// If the page must be (un)protected, add it to the pending range
|
||||||
if ((new_count == 0 && delta < 0) || (new_count == 1 && delta > 0)) {
|
if ((new_count == 0 && !track) || (new_count == 1 && track)) {
|
||||||
if (range_bytes == 0) {
|
if (range_bytes == 0) {
|
||||||
range_begin = page;
|
range_begin = page;
|
||||||
}
|
}
|
||||||
@ -252,6 +252,22 @@ struct PageManager::Impl {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <bool track>
|
||||||
|
void UpdatePageWatchersMasked(VAddr addr, RegionBits& mask) {
|
||||||
|
RENDERER_TRACE;
|
||||||
|
boost::container::small_vector<UpdateProtectRange, 16> update_ranges;
|
||||||
|
{
|
||||||
|
std::scoped_lock lk(lock);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flush deferred protects
|
||||||
|
for (const auto& range : update_ranges) {
|
||||||
|
Protect(range.addr, range.size, range.perms);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::array<PageState, NUM_ADDRESS_PAGES> cached_pages{};
|
std::array<PageState, NUM_ADDRESS_PAGES> cached_pages{};
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
Common::AdaptiveMutex lock;
|
Common::AdaptiveMutex lock;
|
||||||
@ -273,12 +289,12 @@ void PageManager::OnGpuUnmap(VAddr address, size_t size) {
|
|||||||
impl->OnUnmap(address, size);
|
impl->OnUnmap(address, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <s32 delta>
|
template <bool track>
|
||||||
void PageManager::UpdatePageWatchers(VAddr addr, u64 size) const {
|
void PageManager::UpdatePageWatchers(VAddr addr, u64 size) const {
|
||||||
impl->UpdatePageWatchers<delta>(addr, size);
|
impl->UpdatePageWatchers<track>(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void PageManager::UpdatePageWatchers<1>(VAddr addr, u64 size) const;
|
template void PageManager::UpdatePageWatchers<true>(VAddr addr, u64 size) const;
|
||||||
template void PageManager::UpdatePageWatchers<-1>(VAddr addr, u64 size) const;
|
template void PageManager::UpdatePageWatchers<false>(VAddr addr, u64 size) const;
|
||||||
|
|
||||||
} // namespace VideoCore
|
} // namespace VideoCore
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
#include "video_core/buffer_cache//region_definitions.h"
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
class Rasterizer;
|
class Rasterizer;
|
||||||
@ -28,9 +29,14 @@ public:
|
|||||||
void OnGpuUnmap(VAddr address, size_t size);
|
void OnGpuUnmap(VAddr address, size_t size);
|
||||||
|
|
||||||
/// Updates watches in the pages touching the specified region.
|
/// Updates watches in the pages touching the specified region.
|
||||||
template <s32 delta>
|
template <bool track>
|
||||||
void UpdatePageWatchers(VAddr addr, u64 size) const;
|
void UpdatePageWatchers(VAddr addr, u64 size) const;
|
||||||
|
|
||||||
|
/// Updates watches in the pages touching the specified region
|
||||||
|
/// using a mask.
|
||||||
|
template <bool track>
|
||||||
|
void UpdatePageWatchersMasked(VAddr addr, RegionBits& mask) const;
|
||||||
|
|
||||||
/// Returns page aligned address.
|
/// Returns page aligned address.
|
||||||
static constexpr VAddr GetPageAddr(VAddr addr) {
|
static constexpr VAddr GetPageAddr(VAddr addr) {
|
||||||
return Common::AlignDown(addr, PAGE_SIZE);
|
return Common::AlignDown(addr, PAGE_SIZE);
|
||||||
|
@ -761,7 +761,7 @@ void TextureCache::UntrackImage(ImageId image_id) {
|
|||||||
image.track_addr = 0;
|
image.track_addr = 0;
|
||||||
image.track_addr_end = 0;
|
image.track_addr_end = 0;
|
||||||
if (size != 0) {
|
if (size != 0) {
|
||||||
tracker.UpdatePageWatchers<-1>(addr, size);
|
tracker.UpdatePageWatchers<false>(addr, size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -780,7 +780,7 @@ void TextureCache::UntrackImageHead(ImageId image_id) {
|
|||||||
// Cehck its hash later.
|
// Cehck its hash later.
|
||||||
MarkAsMaybeDirty(image_id, image);
|
MarkAsMaybeDirty(image_id, image);
|
||||||
}
|
}
|
||||||
tracker.UpdatePageWatchers<-1>(image_begin, size);
|
tracker.UpdatePageWatchers<false>(image_begin, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextureCache::UntrackImageTail(ImageId image_id) {
|
void TextureCache::UntrackImageTail(ImageId image_id) {
|
||||||
@ -799,7 +799,7 @@ void TextureCache::UntrackImageTail(ImageId image_id) {
|
|||||||
// Cehck its hash later.
|
// Cehck its hash later.
|
||||||
MarkAsMaybeDirty(image_id, image);
|
MarkAsMaybeDirty(image_id, image);
|
||||||
}
|
}
|
||||||
tracker.UpdatePageWatchers<-1>(addr, size);
|
tracker.UpdatePageWatchers<false>(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextureCache::DeleteImage(ImageId image_id) {
|
void TextureCache::DeleteImage(ImageId image_id) {
|
||||||
|
Loading…
Reference in New Issue
Block a user