mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-27 04:25:12 +00:00
Faster syncing with ranges
This commit is contained in:
parent
c82c165b29
commit
bd31f1d12c
@ -3,6 +3,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include "common/alignment.h"
|
||||
#include "common/debug.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/types.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
@ -131,11 +132,22 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
||||
|
||||
BufferCache::~BufferCache() = default;
|
||||
|
||||
void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) {
|
||||
void BufferCache::InvalidateMemory(VAddr device_addr, u64 size, bool unmap) {
|
||||
const bool is_tracked = IsRegionRegistered(device_addr, size);
|
||||
if (is_tracked) {
|
||||
// Mark the page as CPU modified to stop tracking writes.
|
||||
memory_tracker.MarkRegionAsCpuModified(device_addr, size);
|
||||
|
||||
if (unmap) {
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
std::scoped_lock lock(dma_sync_ranges_mutex);
|
||||
const VAddr page_addr = Common::AlignDown(device_addr, CACHING_PAGESIZE);
|
||||
const u64 page_size = Common::AlignUp(size, CACHING_PAGESIZE);
|
||||
dma_sync_ranges.Add(device_addr, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -371,24 +383,10 @@ std::pair<Buffer*, u32> BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size,
|
||||
}
|
||||
|
||||
bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) {
|
||||
const VAddr end_addr = addr + size;
|
||||
const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE);
|
||||
for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) {
|
||||
const BufferId buffer_id = page_table[page].buffer_id;
|
||||
if (!buffer_id) {
|
||||
++page;
|
||||
continue;
|
||||
}
|
||||
std::shared_lock lk{slot_buffers_mutex};
|
||||
Buffer& buffer = slot_buffers[buffer_id];
|
||||
const VAddr buf_start_addr = buffer.CpuAddr();
|
||||
const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes();
|
||||
if (buf_start_addr < end_addr && addr < buf_end_addr) {
|
||||
return true;
|
||||
}
|
||||
page = Common::DivCeil(buf_end_addr, CACHING_PAGESIZE);
|
||||
}
|
||||
return false;
|
||||
// Check if we are missing some edge case here
|
||||
const u64 page = addr >> CACHING_PAGEBITS;
|
||||
const u64 page_size = Common::DivCeil(size, CACHING_PAGESIZE);
|
||||
return buffer_ranges.Intersects(page, page_size);
|
||||
}
|
||||
|
||||
bool BufferCache::IsRegionCpuModified(VAddr addr, size_t size) {
|
||||
@ -577,6 +575,10 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
|
||||
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
|
||||
}
|
||||
Register(new_buffer_id);
|
||||
{
|
||||
std::scoped_lock lk(dma_sync_ranges_mutex);
|
||||
dma_sync_ranges.Add(overlap.begin, overlap.end);
|
||||
}
|
||||
return new_buffer_id;
|
||||
}
|
||||
|
||||
@ -704,7 +706,6 @@ void BufferCache::ProcessFaultBuffer() {
|
||||
// Only create a buffer is the current range doesn't fit in an existing one
|
||||
FindBuffer(start, static_cast<u32>(end - start));
|
||||
}
|
||||
rasterizer.AddDmaSyncRanges(fault_ranges);
|
||||
});
|
||||
}
|
||||
|
||||
@ -731,6 +732,11 @@ void BufferCache::ChangeRegister(BufferId buffer_id) {
|
||||
page_table[page].buffer_id = BufferId{};
|
||||
}
|
||||
}
|
||||
if constexpr (insert) {
|
||||
buffer_ranges.Add(page_begin, page_end - page_begin, buffer_id);
|
||||
} else {
|
||||
buffer_ranges.Subtract(page_begin, page_end - page_begin);
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
||||
@ -915,6 +921,7 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
|
||||
}
|
||||
VAddr device_addr_end = device_addr + size;
|
||||
ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
|
||||
RENDERER_TRACE;
|
||||
// Note that this function synchronizes the whole buffer, not just the range.
|
||||
// This is because this function is used to sync buffers before using a
|
||||
// shader that uses DMA.
|
||||
@ -924,6 +931,16 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCache::SynchronizeDmaBuffers() {
|
||||
RENDERER_TRACE;
|
||||
std::scoped_lock lk(dma_sync_ranges_mutex);
|
||||
dma_sync_ranges.ForEach([&](VAddr device_addr, u64 end_addr) {
|
||||
RENDERER_TRACE;
|
||||
SynchronizeBuffersInRange(device_addr, end_addr - device_addr);
|
||||
});
|
||||
dma_sync_ranges.Clear();
|
||||
}
|
||||
|
||||
void BufferCache::MemoryBarrier() {
|
||||
// Vulkan doesn't know which buffer we access in a shader if we use
|
||||
// BufferDeviceAddress. We need a full memory barrier.
|
||||
|
@ -98,7 +98,7 @@ public:
|
||||
}
|
||||
|
||||
/// Invalidates any buffer in the logical page range.
|
||||
void InvalidateMemory(VAddr device_addr, u64 size);
|
||||
void InvalidateMemory(VAddr device_addr, u64 size, bool unmap);
|
||||
|
||||
/// Binds host vertex buffers for the current draw.
|
||||
void BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline);
|
||||
@ -139,25 +139,21 @@ public:
|
||||
/// Synchronizes all buffers in the specified range.
|
||||
void SynchronizeBuffersInRange(VAddr device_addr, u64 size);
|
||||
|
||||
/// Synchronizes all buffers neede for DMA.
|
||||
void SynchronizeDmaBuffers();
|
||||
|
||||
/// Record memory barrier. Used for buffers when accessed via BDA.
|
||||
void MemoryBarrier();
|
||||
|
||||
private:
|
||||
template <typename Func>
|
||||
void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) {
|
||||
const u64 page_end = Common::DivCeil(device_addr + size, CACHING_PAGESIZE);
|
||||
for (u64 page = device_addr >> CACHING_PAGEBITS; page < page_end;) {
|
||||
const BufferId buffer_id = page_table[page].buffer_id;
|
||||
if (!buffer_id) {
|
||||
++page;
|
||||
continue;
|
||||
}
|
||||
Buffer& buffer = slot_buffers[buffer_id];
|
||||
func(buffer_id, buffer);
|
||||
|
||||
const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
|
||||
page = Common::DivCeil(end_addr, CACHING_PAGESIZE);
|
||||
}
|
||||
const u64 page = device_addr >> CACHING_PAGEBITS;
|
||||
const u64 page_size = Common::DivCeil(size, CACHING_PAGESIZE);
|
||||
buffer_ranges.ForEachInRange(page, page_size, [&](u64 page_start, u64 page_end, BufferId id) {
|
||||
Buffer& buffer = slot_buffers[id];
|
||||
func(id, buffer);
|
||||
});
|
||||
}
|
||||
|
||||
void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size);
|
||||
@ -199,7 +195,10 @@ private:
|
||||
Buffer fault_buffer;
|
||||
std::shared_mutex slot_buffers_mutex;
|
||||
Common::SlotVector<Buffer> slot_buffers;
|
||||
std::shared_mutex dma_sync_ranges_mutex;
|
||||
RangeSet dma_sync_ranges;
|
||||
RangeSet gpu_modified_ranges;
|
||||
SplitRangeMap<BufferId> buffer_ranges;
|
||||
MemoryTracker memory_tracker;
|
||||
PageTable page_table;
|
||||
vk::UniqueDescriptorSetLayout fault_process_desc_layout;
|
||||
|
@ -4,6 +4,9 @@
|
||||
#pragma once
|
||||
|
||||
#include <boost/icl/interval_map.hpp>
|
||||
#include <boost/icl/split_interval_map.hpp>
|
||||
#include <boost/icl/split_interval_set.hpp>
|
||||
#include <boost/icl/discrete_interval.hpp>
|
||||
#include <boost/pool/pool.hpp>
|
||||
#include <boost/pool/pool_alloc.hpp>
|
||||
#include <boost/pool/poolfwd.hpp>
|
||||
@ -38,6 +41,22 @@ struct RangeSet {
|
||||
m_ranges_set.subtract(interval);
|
||||
}
|
||||
|
||||
void Clear() {
|
||||
m_ranges_set.clear();
|
||||
}
|
||||
|
||||
bool Contains(VAddr base_address, size_t size) const {
|
||||
const VAddr end_address = base_address + size;
|
||||
IntervalType interval{base_address, end_address};
|
||||
return boost::icl::contains(m_ranges_set, interval);
|
||||
}
|
||||
|
||||
bool Intersects(VAddr base_address, size_t size) const {
|
||||
const VAddr end_address = base_address + size;
|
||||
IntervalType interval{base_address, end_address};
|
||||
return boost::icl::intersects(m_ranges_set, interval);
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
void ForEach(Func&& func) const {
|
||||
if (m_ranges_set.empty()) {
|
||||
@ -77,14 +96,29 @@ struct RangeSet {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
void ForEachNotInRange(VAddr base_addr, size_t size, Func&& func) const {
|
||||
const VAddr end_addr = base_addr + size;
|
||||
ForEachInRange(base_addr, size, [&](VAddr range_addr, VAddr range_end) {
|
||||
if (size_t gap_size = range_addr - base_addr; gap_size != 0) {
|
||||
func(base_addr, gap_size);
|
||||
}
|
||||
base_addr = range_end;
|
||||
});
|
||||
if (base_addr != end_addr) {
|
||||
func(base_addr, end_addr - base_addr);
|
||||
}
|
||||
}
|
||||
|
||||
IntervalSet m_ranges_set;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class RangeMap {
|
||||
public:
|
||||
using IntervalMap =
|
||||
boost::icl::interval_map<VAddr, u64, boost::icl::partial_absorber, std::less,
|
||||
boost::icl::inplace_plus, boost::icl::inter_section,
|
||||
boost::icl::interval_map<VAddr, T, boost::icl::total_absorber, std::less,
|
||||
boost::icl::inplace_identity, boost::icl::inter_section,
|
||||
ICL_INTERVAL_INSTANCE(ICL_INTERVAL_DEFAULT, VAddr, std::less),
|
||||
RangeSetsAllocator>;
|
||||
using IntervalType = typename IntervalMap::interval_type;
|
||||
@ -99,7 +133,7 @@ public:
|
||||
RangeMap(RangeMap&& other);
|
||||
RangeMap& operator=(RangeMap&& other);
|
||||
|
||||
void Add(VAddr base_address, size_t size, u64 value) {
|
||||
void Add(VAddr base_address, size_t size, const T& value) {
|
||||
const VAddr end_address = base_address + size;
|
||||
IntervalType interval{base_address, end_address};
|
||||
m_ranges_map.add({interval, value});
|
||||
@ -111,6 +145,35 @@ public:
|
||||
m_ranges_map -= interval;
|
||||
}
|
||||
|
||||
void Clear() {
|
||||
m_ranges_map.clear();
|
||||
}
|
||||
|
||||
bool Contains(VAddr base_address, size_t size) const {
|
||||
const VAddr end_address = base_address + size;
|
||||
IntervalType interval{base_address, end_address};
|
||||
return boost::icl::contains(m_ranges_map, interval);
|
||||
}
|
||||
|
||||
bool Intersects(VAddr base_address, size_t size) const {
|
||||
const VAddr end_address = base_address + size;
|
||||
IntervalType interval{base_address, end_address};
|
||||
return boost::icl::intersects(m_ranges_map, interval);
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
void ForEach(Func&& func) const {
|
||||
if (m_ranges_map.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const auto& [interval, value] : m_ranges_map) {
|
||||
const VAddr inter_addr_end = interval.upper();
|
||||
const VAddr inter_addr = interval.lower();
|
||||
func(inter_addr, inter_addr_end, value);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
void ForEachInRange(VAddr base_addr, size_t size, Func&& func) const {
|
||||
if (m_ranges_map.empty()) {
|
||||
@ -140,7 +203,112 @@ public:
|
||||
template <typename Func>
|
||||
void ForEachNotInRange(VAddr base_addr, size_t size, Func&& func) const {
|
||||
const VAddr end_addr = base_addr + size;
|
||||
ForEachInRange(base_addr, size, [&](VAddr range_addr, VAddr range_end, u64) {
|
||||
ForEachInRange(base_addr, size, [&](VAddr range_addr, VAddr range_end, const T&) {
|
||||
if (size_t gap_size = range_addr - base_addr; gap_size != 0) {
|
||||
func(base_addr, gap_size);
|
||||
}
|
||||
base_addr = range_end;
|
||||
});
|
||||
if (base_addr != end_addr) {
|
||||
func(base_addr, end_addr - base_addr);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
IntervalMap m_ranges_map;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class SplitRangeMap {
|
||||
public:
|
||||
using IntervalMap =
|
||||
boost::icl::split_interval_map<VAddr, T, boost::icl::total_absorber, std::less,
|
||||
boost::icl::inplace_identity, boost::icl::inter_section,
|
||||
ICL_INTERVAL_INSTANCE(ICL_INTERVAL_DEFAULT, VAddr, std::less),
|
||||
RangeSetsAllocator>;
|
||||
using IntervalType = typename IntervalMap::interval_type;
|
||||
|
||||
public:
|
||||
SplitRangeMap() = default;
|
||||
~SplitRangeMap() = default;
|
||||
|
||||
SplitRangeMap(SplitRangeMap const&) = delete;
|
||||
SplitRangeMap& operator=(SplitRangeMap const&) = delete;
|
||||
|
||||
SplitRangeMap(SplitRangeMap&& other);
|
||||
SplitRangeMap& operator=(SplitRangeMap&& other);
|
||||
|
||||
void Add(VAddr base_address, size_t size, const T& value) {
|
||||
const VAddr end_address = base_address + size;
|
||||
IntervalType interval{base_address, end_address};
|
||||
m_ranges_map.add({interval, value});
|
||||
}
|
||||
|
||||
void Subtract(VAddr base_address, size_t size) {
|
||||
const VAddr end_address = base_address + size;
|
||||
IntervalType interval{base_address, end_address};
|
||||
m_ranges_map -= interval;
|
||||
}
|
||||
|
||||
void Clear() {
|
||||
m_ranges_map.clear();
|
||||
}
|
||||
|
||||
bool Contains(VAddr base_address, size_t size) const {
|
||||
const VAddr end_address = base_address + size;
|
||||
IntervalType interval{base_address, end_address};
|
||||
return boost::icl::contains(m_ranges_map, interval);
|
||||
}
|
||||
|
||||
bool Intersects(VAddr base_address, size_t size) const {
|
||||
const VAddr end_address = base_address + size;
|
||||
IntervalType interval{base_address, end_address};
|
||||
return boost::icl::intersects(m_ranges_map, interval);
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
void ForEach(Func&& func) const {
|
||||
if (m_ranges_map.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const auto& [interval, value] : m_ranges_map) {
|
||||
const VAddr inter_addr_end = interval.upper();
|
||||
const VAddr inter_addr = interval.lower();
|
||||
func(inter_addr, inter_addr_end, value);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
void ForEachInRange(VAddr base_addr, size_t size, Func&& func) const {
|
||||
if (m_ranges_map.empty()) {
|
||||
return;
|
||||
}
|
||||
const VAddr start_address = base_addr;
|
||||
const VAddr end_address = start_address + size;
|
||||
const IntervalType search_interval{start_address, end_address};
|
||||
auto it = m_ranges_map.lower_bound(search_interval);
|
||||
if (it == m_ranges_map.end()) {
|
||||
return;
|
||||
}
|
||||
auto end_it = m_ranges_map.upper_bound(search_interval);
|
||||
for (; it != end_it; it++) {
|
||||
VAddr inter_addr_end = it->first.upper();
|
||||
VAddr inter_addr = it->first.lower();
|
||||
if (inter_addr_end > end_address) {
|
||||
inter_addr_end = end_address;
|
||||
}
|
||||
if (inter_addr < start_address) {
|
||||
inter_addr = start_address;
|
||||
}
|
||||
func(inter_addr, inter_addr_end, it->second);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
void ForEachNotInRange(VAddr base_addr, size_t size, Func&& func) const {
|
||||
const VAddr end_addr = base_addr + size;
|
||||
ForEachInRange(base_addr, size, [&](VAddr range_addr, VAddr range_end, const T&) {
|
||||
if (size_t gap_size = range_addr - base_addr; gap_size != 0) {
|
||||
func(base_addr, gap_size);
|
||||
}
|
||||
|
@ -477,10 +477,9 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
||||
if (uses_dma) {
|
||||
// We only use fault buffer for DMA right now.
|
||||
{
|
||||
Common::RecursiveSharedLock lock(mapped_ranges_mutex);
|
||||
for (const auto& range : dma_sync_mapped_ranges) {
|
||||
buffer_cache.SynchronizeBuffersInRange(range.lower(), range.upper() - range.lower());
|
||||
}
|
||||
// We don't want the mapped ranges to be modified while we are syncing
|
||||
Common::RecursiveSharedLock lock{mapped_ranges_mutex};
|
||||
buffer_cache.SynchronizeDmaBuffers();
|
||||
}
|
||||
buffer_cache.MemoryBarrier();
|
||||
}
|
||||
@ -726,14 +725,6 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
|
||||
}
|
||||
}
|
||||
|
||||
void Rasterizer::AddDmaSyncRanges(const boost::icl::interval_set<VAddr>& ranges) {
|
||||
dma_sync_ranges += ranges;
|
||||
{
|
||||
std::scoped_lock lock(mapped_ranges_mutex);
|
||||
dma_sync_mapped_ranges = mapped_ranges & dma_sync_ranges;
|
||||
}
|
||||
}
|
||||
|
||||
void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline, RenderState& state) {
|
||||
int cb_index = 0;
|
||||
for (auto attach_idx = 0u; attach_idx < state.num_color_attachments; ++attach_idx) {
|
||||
@ -964,7 +955,7 @@ bool Rasterizer::InvalidateMemory(VAddr addr, u64 size) {
|
||||
// Not GPU mapped memory, can skip invalidation logic entirely.
|
||||
return false;
|
||||
}
|
||||
buffer_cache.InvalidateMemory(addr, size);
|
||||
buffer_cache.InvalidateMemory(addr, size, false);
|
||||
texture_cache.InvalidateMemory(addr, size);
|
||||
return true;
|
||||
}
|
||||
@ -984,19 +975,17 @@ void Rasterizer::MapMemory(VAddr addr, u64 size) {
|
||||
{
|
||||
std::scoped_lock lock{mapped_ranges_mutex};
|
||||
mapped_ranges += decltype(mapped_ranges)::interval_type::right_open(addr, addr + size);
|
||||
dma_sync_mapped_ranges = mapped_ranges & dma_sync_ranges;
|
||||
}
|
||||
page_manager.OnGpuMap(addr, size);
|
||||
}
|
||||
|
||||
void Rasterizer::UnmapMemory(VAddr addr, u64 size) {
|
||||
buffer_cache.InvalidateMemory(addr, size);
|
||||
buffer_cache.InvalidateMemory(addr, size, true);
|
||||
texture_cache.UnmapMemory(addr, size);
|
||||
page_manager.OnGpuUnmap(addr, size);
|
||||
{
|
||||
std::scoped_lock lock{mapped_ranges_mutex};
|
||||
mapped_ranges -= decltype(mapped_ranges)::interval_type::right_open(addr, addr + size);
|
||||
dma_sync_mapped_ranges -= decltype(mapped_ranges)::interval_type::right_open(addr, addr + size);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -107,9 +107,6 @@ private:
|
||||
}
|
||||
|
||||
bool IsComputeMetaClear(const Pipeline* pipeline);
|
||||
|
||||
void AddDmaSyncRanges(const boost::icl::interval_set<VAddr>& ranges);
|
||||
|
||||
private:
|
||||
friend class VideoCore::BufferCache;
|
||||
|
||||
@ -121,8 +118,6 @@ private:
|
||||
AmdGpu::Liverpool* liverpool;
|
||||
Core::MemoryManager* memory;
|
||||
boost::icl::interval_set<VAddr> mapped_ranges;
|
||||
boost::icl::interval_set<VAddr> dma_sync_ranges;
|
||||
boost::icl::interval_set<VAddr> dma_sync_mapped_ranges;
|
||||
std::shared_mutex mapped_ranges_mutex;
|
||||
PipelineCache pipeline_cache;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user