mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-10 05:38:49 +00:00
Readbacks proof of concept rebased (#3178)
* Readbacks proof of concept * liverpool: Use span for acb too * config: Add readbacks config option * config: Log readbacks
This commit is contained in:
@@ -3,12 +3,14 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include "common/alignment.h"
|
||||
#include "common/config.h"
|
||||
#include "common/debug.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/types.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/buffer_cache/memory_tracker.h"
|
||||
#include "video_core/host_shaders/fault_buffer_process_comp.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
@@ -27,10 +29,10 @@ static constexpr size_t DeviceBufferSize = 128_MB;
|
||||
static constexpr size_t MaxPageFaults = 1024;
|
||||
|
||||
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||
Vulkan::Rasterizer& rasterizer_, AmdGpu::Liverpool* liverpool_,
|
||||
TextureCache& texture_cache_, PageManager& tracker_)
|
||||
: instance{instance_}, scheduler{scheduler_}, rasterizer{rasterizer_}, liverpool{liverpool_},
|
||||
memory{Core::Memory::Instance()}, texture_cache{texture_cache_}, tracker{tracker_},
|
||||
AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_,
|
||||
PageManager& tracker)
|
||||
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_},
|
||||
memory{Core::Memory::Instance()}, texture_cache{texture_cache_},
|
||||
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
||||
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
||||
download_buffer{instance, scheduler, MemoryUsage::Download, DownloadBufferSize},
|
||||
@@ -38,13 +40,14 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
||||
gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize},
|
||||
bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal,
|
||||
0, AllFlags, BDA_PAGETABLE_SIZE},
|
||||
fault_buffer(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, FAULT_BUFFER_SIZE),
|
||||
memory_tracker{tracker} {
|
||||
fault_buffer(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, FAULT_BUFFER_SIZE) {
|
||||
Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer");
|
||||
Vulkan::SetObjectName(instance.GetDevice(), bda_pagetable_buffer.Handle(),
|
||||
"BDA Page Table Buffer");
|
||||
Vulkan::SetObjectName(instance.GetDevice(), fault_buffer.Handle(), "Fault Buffer");
|
||||
|
||||
memory_tracker = std::make_unique<MemoryTracker>(tracker);
|
||||
|
||||
// Ensure the first slot is used for the null buffer
|
||||
const auto null_id =
|
||||
slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, 16);
|
||||
@@ -129,22 +132,27 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
||||
|
||||
BufferCache::~BufferCache() = default;
|
||||
|
||||
void BufferCache::InvalidateMemory(VAddr device_addr, u64 size, bool unmap) {
|
||||
const bool is_tracked = IsRegionRegistered(device_addr, size);
|
||||
if (is_tracked) {
|
||||
// Mark the page as CPU modified to stop tracking writes.
|
||||
memory_tracker.MarkRegionAsCpuModified(device_addr, size);
|
||||
|
||||
if (unmap) {
|
||||
return;
|
||||
}
|
||||
void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) {
|
||||
if (!IsRegionRegistered(device_addr, size)) {
|
||||
return;
|
||||
}
|
||||
if (Config::readbacks() && memory_tracker->IsRegionGpuModified(device_addr, size)) {
|
||||
ReadMemory(device_addr, size);
|
||||
}
|
||||
memory_tracker->MarkRegionAsCpuModified(device_addr, size);
|
||||
}
|
||||
|
||||
void BufferCache::ReadMemory(VAddr device_addr, u64 size) {
|
||||
liverpool->SendCommand<true>([this, device_addr, size] {
|
||||
Buffer& buffer = slot_buffers[FindBuffer(device_addr, size)];
|
||||
DownloadBufferMemory(buffer, device_addr, size);
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size) {
|
||||
boost::container::small_vector<vk::BufferCopy, 1> copies;
|
||||
u64 total_size_bytes = 0;
|
||||
memory_tracker.ForEachDownloadRange<true>(
|
||||
memory_tracker->ForEachDownloadRange<false>(
|
||||
device_addr, size, [&](u64 device_addr_out, u64 range_size) {
|
||||
const VAddr buffer_addr = buffer.CpuAddr();
|
||||
const auto add_download = [&](VAddr start, VAddr end) {
|
||||
@@ -155,7 +163,10 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
|
||||
.dstOffset = total_size_bytes,
|
||||
.size = new_size,
|
||||
});
|
||||
total_size_bytes += new_size;
|
||||
// Align up to avoid cache conflicts
|
||||
constexpr u64 align = 64ULL;
|
||||
constexpr u64 mask = ~(align - 1ULL);
|
||||
total_size_bytes += (new_size + align - 1) & mask;
|
||||
};
|
||||
gpu_modified_ranges.ForEachInRange(device_addr_out, range_size, add_download);
|
||||
gpu_modified_ranges.Subtract(device_addr_out, range_size);
|
||||
@@ -173,11 +184,14 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.copyBuffer(buffer.buffer, download_buffer.Handle(), copies);
|
||||
scheduler.Finish();
|
||||
auto* memory = Core::Memory::Instance();
|
||||
for (const auto& copy : copies) {
|
||||
const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset;
|
||||
const u64 dst_offset = copy.dstOffset - offset;
|
||||
std::memcpy(std::bit_cast<u8*>(copy_device_addr), download + dst_offset, copy.size);
|
||||
memory->TryWriteBacking(std::bit_cast<u8*>(copy_device_addr), download + dst_offset,
|
||||
copy.size);
|
||||
}
|
||||
memory_tracker->UnmarkRegionAsGpuModified(device_addr, size);
|
||||
}
|
||||
|
||||
void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) {
|
||||
@@ -296,9 +310,11 @@ void BufferCache::BindIndexBuffer(u32 index_offset) {
|
||||
|
||||
void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
|
||||
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
|
||||
if (!is_gds && !IsRegionGpuModified(address, num_bytes)) {
|
||||
memcpy(std::bit_cast<void*>(address), value, num_bytes);
|
||||
return;
|
||||
if (!is_gds) {
|
||||
ASSERT(memory->TryWriteBacking(std::bit_cast<void*>(address), value, num_bytes));
|
||||
if (!IsRegionRegistered(address, num_bytes)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
Buffer* buffer = [&] {
|
||||
if (is_gds) {
|
||||
@@ -326,25 +342,108 @@ void BufferCache::WriteData(VAddr address, const void* value, u32 num_bytes, boo
|
||||
WriteDataBuffer(*buffer, address, value, num_bytes);
|
||||
}
|
||||
|
||||
void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) {
|
||||
if (!dst_gds && !IsRegionGpuModified(dst, num_bytes)) {
|
||||
if (!src_gds && !IsRegionGpuModified(src, num_bytes)) {
|
||||
// Both buffers were not transferred to GPU yet. Can safely copy in host memory.
|
||||
memcpy(std::bit_cast<void*>(dst), std::bit_cast<void*>(src), num_bytes);
|
||||
return;
|
||||
}
|
||||
// Without a readback there's nothing we can do with this
|
||||
// Fallback to creating dst buffer on GPU to at least have this data there
|
||||
}
|
||||
auto& src_buffer = [&] -> const Buffer& {
|
||||
if (src_gds) {
|
||||
return gds_buffer;
|
||||
}
|
||||
// Avoid using ObtainBuffer here as that might give us the stream buffer.
|
||||
const BufferId buffer_id = FindBuffer(src, num_bytes);
|
||||
auto& buffer = slot_buffers[buffer_id];
|
||||
SynchronizeBuffer(buffer, src, num_bytes, false);
|
||||
return buffer;
|
||||
}();
|
||||
auto& dst_buffer = [&] -> const Buffer& {
|
||||
if (dst_gds) {
|
||||
return gds_buffer;
|
||||
}
|
||||
// Prefer using ObtainBuffer here as that will auto-mark the region as GPU modified.
|
||||
const auto [buffer, offset] = ObtainBuffer(dst, num_bytes, true);
|
||||
return *buffer;
|
||||
}();
|
||||
vk::BufferCopy region{
|
||||
.srcOffset = src_buffer.Offset(src),
|
||||
.dstOffset = dst_buffer.Offset(dst),
|
||||
.size = num_bytes,
|
||||
};
|
||||
const vk::BufferMemoryBarrier2 buf_barriers_before[2] = {
|
||||
{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.buffer = dst_buffer.Handle(),
|
||||
.offset = dst_buffer.Offset(dst),
|
||||
.size = num_bytes,
|
||||
},
|
||||
{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferRead,
|
||||
.buffer = src_buffer.Handle(),
|
||||
.offset = src_buffer.Offset(src),
|
||||
.size = num_bytes,
|
||||
},
|
||||
};
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 2,
|
||||
.pBufferMemoryBarriers = buf_barriers_before,
|
||||
});
|
||||
cmdbuf.copyBuffer(src_buffer.Handle(), dst_buffer.Handle(), region);
|
||||
const vk::BufferMemoryBarrier2 buf_barriers_after[2] = {
|
||||
{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.buffer = dst_buffer.Handle(),
|
||||
.offset = dst_buffer.Offset(dst),
|
||||
.size = num_bytes,
|
||||
},
|
||||
{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryWrite,
|
||||
.buffer = src_buffer.Handle(),
|
||||
.offset = src_buffer.Offset(src),
|
||||
.size = num_bytes,
|
||||
},
|
||||
};
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 2,
|
||||
.pBufferMemoryBarriers = buf_barriers_after,
|
||||
});
|
||||
}
|
||||
|
||||
std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written,
|
||||
bool is_texel_buffer, BufferId buffer_id) {
|
||||
// For small uniform buffers that have not been modified by gpu
|
||||
// use device local stream buffer to reduce renderpass breaks.
|
||||
// Maybe we want to modify the threshold now that the page size is 16KB?
|
||||
static constexpr u64 StreamThreshold = CACHING_PAGESIZE;
|
||||
const bool is_gpu_dirty = memory_tracker.IsRegionGpuModified(device_addr, size);
|
||||
if (!is_written && size <= StreamThreshold && !is_gpu_dirty) {
|
||||
// For read-only buffers use device local stream buffer to reduce renderpass breaks.
|
||||
if (!is_written && size <= CACHING_PAGESIZE && !IsRegionGpuModified(device_addr, size)) {
|
||||
const u64 offset = stream_buffer.Copy(device_addr, size, instance.UniformMinAlignment());
|
||||
return {&stream_buffer, offset};
|
||||
}
|
||||
|
||||
if (!buffer_id || slot_buffers[buffer_id].is_deleted) {
|
||||
if (IsBufferInvalid(buffer_id)) {
|
||||
buffer_id = FindBuffer(device_addr, size);
|
||||
}
|
||||
Buffer& buffer = slot_buffers[buffer_id];
|
||||
SynchronizeBuffer(buffer, device_addr, size, is_texel_buffer);
|
||||
if (is_written) {
|
||||
memory_tracker.MarkRegionAsGpuModified(device_addr, size);
|
||||
memory_tracker->MarkRegionAsGpuModified(device_addr, size);
|
||||
gpu_modified_ranges.Add(device_addr, size);
|
||||
}
|
||||
return {&buffer, buffer.Offset(device_addr)};
|
||||
@@ -352,21 +451,17 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
|
||||
|
||||
std::pair<Buffer*, u32> BufferCache::ObtainBufferForImage(VAddr gpu_addr, u32 size) {
|
||||
// Check if any buffer contains the full requested range.
|
||||
const u64 page = gpu_addr >> CACHING_PAGEBITS;
|
||||
const BufferId buffer_id = page_table[page].buffer_id;
|
||||
const BufferId buffer_id = page_table[gpu_addr >> CACHING_PAGEBITS].buffer_id;
|
||||
if (buffer_id) {
|
||||
Buffer& buffer = slot_buffers[buffer_id];
|
||||
if (buffer.IsInBounds(gpu_addr, size)) {
|
||||
if (Buffer& buffer = slot_buffers[buffer_id]; buffer.IsInBounds(gpu_addr, size)) {
|
||||
SynchronizeBuffer(buffer, gpu_addr, size, false);
|
||||
return {&buffer, buffer.Offset(gpu_addr)};
|
||||
}
|
||||
}
|
||||
// If no buffer contains the full requested range but some buffer within was GPU-modified,
|
||||
// fall back to ObtainBuffer to create a full buffer and avoid losing GPU modifications.
|
||||
if (memory_tracker.IsRegionGpuModified(gpu_addr, size)) {
|
||||
// If some buffer within was GPU modified create a full buffer to avoid losing GPU data.
|
||||
if (IsRegionGpuModified(gpu_addr, size)) {
|
||||
return ObtainBuffer(gpu_addr, size, false, false);
|
||||
}
|
||||
|
||||
// In all other cases, just do a CPU copy to the staging buffer.
|
||||
const auto [data, offset] = staging_buffer.Map(size, 16);
|
||||
memory->CopySparseMemory(gpu_addr, data, size);
|
||||
@@ -380,11 +475,11 @@ bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) {
|
||||
}
|
||||
|
||||
bool BufferCache::IsRegionCpuModified(VAddr addr, size_t size) {
|
||||
return memory_tracker.IsRegionCpuModified(addr, size);
|
||||
return memory_tracker->IsRegionCpuModified(addr, size);
|
||||
}
|
||||
|
||||
bool BufferCache::IsRegionGpuModified(VAddr addr, size_t size) {
|
||||
return memory_tracker.IsRegionGpuModified(addr, size);
|
||||
return memory_tracker->IsRegionGpuModified(addr, size);
|
||||
}
|
||||
|
||||
BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) {
|
||||
@@ -723,7 +818,7 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
||||
boost::container::small_vector<vk::BufferCopy, 4> copies;
|
||||
u64 total_size_bytes = 0;
|
||||
VAddr buffer_start = buffer.CpuAddr();
|
||||
memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
|
||||
memory_tracker->ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
|
||||
copies.push_back(vk::BufferCopy{
|
||||
.srcOffset = total_size_bytes,
|
||||
.dstOffset = device_addr_out - buffer_start,
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
#include "common/slot_vector.h"
|
||||
#include "common/types.h"
|
||||
#include "video_core/buffer_cache/buffer.h"
|
||||
#include "video_core/buffer_cache/memory_tracker.h"
|
||||
#include "video_core/buffer_cache/range_set.h"
|
||||
#include "video_core/multi_level_page_table.h"
|
||||
|
||||
@@ -21,13 +20,6 @@ namespace Core {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace Shader {
|
||||
namespace Gcn {
|
||||
struct FetchShaderData;
|
||||
}
|
||||
struct Info;
|
||||
} // namespace Shader
|
||||
|
||||
namespace Vulkan {
|
||||
class GraphicsPipeline;
|
||||
}
|
||||
@@ -39,6 +31,8 @@ using BufferId = Common::SlotId;
|
||||
static constexpr BufferId NULL_BUFFER_ID{0};
|
||||
|
||||
class TextureCache;
|
||||
class MemoryTracker;
|
||||
class PageManager;
|
||||
|
||||
class BufferCache {
|
||||
public:
|
||||
@@ -69,10 +63,16 @@ public:
|
||||
bool has_stream_leap = false;
|
||||
};
|
||||
|
||||
using IntervalSet =
|
||||
boost::icl::interval_set<VAddr, std::less,
|
||||
ICL_INTERVAL_INSTANCE(ICL_INTERVAL_DEFAULT, VAddr, std::less),
|
||||
RangeSetsAllocator>;
|
||||
using IntervalType = typename IntervalSet::interval_type;
|
||||
|
||||
public:
|
||||
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
||||
Vulkan::Rasterizer& rasterizer_, AmdGpu::Liverpool* liverpool,
|
||||
TextureCache& texture_cache, PageManager& tracker);
|
||||
AmdGpu::Liverpool* liverpool, TextureCache& texture_cache,
|
||||
PageManager& tracker);
|
||||
~BufferCache();
|
||||
|
||||
/// Returns a pointer to GDS device local buffer.
|
||||
@@ -110,7 +110,10 @@ public:
|
||||
}
|
||||
|
||||
/// Invalidates any buffer in the logical page range.
|
||||
void InvalidateMemory(VAddr device_addr, u64 size, bool unmap);
|
||||
void InvalidateMemory(VAddr device_addr, u64 size);
|
||||
|
||||
/// Waits on pending downloads in the logical page range.
|
||||
void ReadMemory(VAddr device_addr, u64 size);
|
||||
|
||||
/// Binds host vertex buffers for the current draw.
|
||||
void BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline);
|
||||
@@ -124,6 +127,9 @@ public:
|
||||
/// Writes a value to GPU buffer. (uses staging buffer to temporarily store the data)
|
||||
void WriteData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
||||
|
||||
/// Performs buffer to buffer data copy on the GPU.
|
||||
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
|
||||
|
||||
/// Obtains a buffer for the specified region.
|
||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,
|
||||
bool is_texel_buffer = false,
|
||||
@@ -166,6 +172,10 @@ private:
|
||||
});
|
||||
}
|
||||
|
||||
inline bool IsBufferInvalid(BufferId buffer_id) const {
|
||||
return !buffer_id || slot_buffers[buffer_id].is_deleted;
|
||||
}
|
||||
|
||||
void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size);
|
||||
|
||||
[[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size);
|
||||
@@ -193,11 +203,10 @@ private:
|
||||
|
||||
const Vulkan::Instance& instance;
|
||||
Vulkan::Scheduler& scheduler;
|
||||
Vulkan::Rasterizer& rasterizer;
|
||||
AmdGpu::Liverpool* liverpool;
|
||||
Core::MemoryManager* memory;
|
||||
TextureCache& texture_cache;
|
||||
PageManager& tracker;
|
||||
std::unique_ptr<MemoryTracker> memory_tracker;
|
||||
StreamBuffer staging_buffer;
|
||||
StreamBuffer stream_buffer;
|
||||
StreamBuffer download_buffer;
|
||||
@@ -209,7 +218,6 @@ private:
|
||||
Common::SlotVector<Buffer> slot_buffers;
|
||||
RangeSet gpu_modified_ranges;
|
||||
SplitRangeMap<BufferId> buffer_ranges;
|
||||
MemoryTracker memory_tracker;
|
||||
PageTable page_table;
|
||||
vk::UniqueDescriptorSetLayout fault_process_desc_layout;
|
||||
vk::UniquePipeline fault_process_pipeline;
|
||||
|
||||
@@ -57,6 +57,14 @@ public:
|
||||
});
|
||||
}
|
||||
|
||||
void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
|
||||
IteratePages<false>(dirty_cpu_addr, query_size,
|
||||
[](RegionManager* manager, u64 offset, size_t size) {
|
||||
manager->template ChangeRegionState<Type::GPU, false>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
});
|
||||
}
|
||||
|
||||
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
|
||||
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, auto&& func) {
|
||||
IteratePages<true>(query_cpu_range, query_size,
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include "common/bit_array.h"
|
||||
#include "common/types.h"
|
||||
|
||||
@@ -20,9 +19,8 @@ constexpr u64 NUM_PAGES_PER_REGION = TRACKER_HIGHER_PAGE_SIZE / TRACKER_BYTES_PE
|
||||
enum class Type {
|
||||
CPU,
|
||||
GPU,
|
||||
Writeable,
|
||||
};
|
||||
|
||||
using RegionBits = Common::BitArray<NUM_PAGES_PER_REGION>;
|
||||
|
||||
} // namespace VideoCore
|
||||
} // namespace VideoCore
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <utility>
|
||||
#include "common/config.h"
|
||||
#include "common/div_ceil.h"
|
||||
|
||||
#ifdef __linux__
|
||||
@@ -20,7 +20,7 @@
|
||||
namespace VideoCore {
|
||||
|
||||
/**
|
||||
* Allows tracking CPU and GPU modification of pages in a contigious 4MB virtual address region.
|
||||
* Allows tracking CPU and GPU modification of pages in a contigious 16MB virtual address region.
|
||||
* Information is stored in bitsets for spacial locality and fast update of single pages.
|
||||
*/
|
||||
class RegionManager {
|
||||
@@ -30,6 +30,7 @@ public:
|
||||
cpu.Fill();
|
||||
gpu.Clear();
|
||||
writeable.Fill();
|
||||
readable.Fill();
|
||||
}
|
||||
explicit RegionManager() = default;
|
||||
|
||||
@@ -47,29 +48,19 @@ public:
|
||||
|
||||
template <Type type>
|
||||
RegionBits& GetRegionBits() noexcept {
|
||||
static_assert(type != Type::Writeable);
|
||||
if constexpr (type == Type::CPU) {
|
||||
return cpu;
|
||||
} else if constexpr (type == Type::GPU) {
|
||||
return gpu;
|
||||
} else if constexpr (type == Type::Writeable) {
|
||||
return writeable;
|
||||
} else {
|
||||
static_assert(false, "Invalid type");
|
||||
}
|
||||
}
|
||||
|
||||
template <Type type>
|
||||
const RegionBits& GetRegionBits() const noexcept {
|
||||
static_assert(type != Type::Writeable);
|
||||
if constexpr (type == Type::CPU) {
|
||||
return cpu;
|
||||
} else if constexpr (type == Type::GPU) {
|
||||
return gpu;
|
||||
} else if constexpr (type == Type::Writeable) {
|
||||
return writeable;
|
||||
} else {
|
||||
static_assert(false, "Invalid type");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -90,7 +81,6 @@ public:
|
||||
return;
|
||||
}
|
||||
std::scoped_lock lk{lock};
|
||||
static_assert(type != Type::Writeable);
|
||||
|
||||
RegionBits& bits = GetRegionBits<type>();
|
||||
if constexpr (enable) {
|
||||
@@ -99,7 +89,9 @@ public:
|
||||
bits.UnsetRange(start_page, end_page);
|
||||
}
|
||||
if constexpr (type == Type::CPU) {
|
||||
UpdateProtection<!enable>();
|
||||
UpdateProtection<!enable, false>();
|
||||
} else if (Config::readbacks()) {
|
||||
UpdateProtection<enable, true>();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -122,16 +114,10 @@ public:
|
||||
return;
|
||||
}
|
||||
std::scoped_lock lk{lock};
|
||||
static_assert(type != Type::Writeable);
|
||||
|
||||
RegionBits& bits = GetRegionBits<type>();
|
||||
RegionBits mask(bits, start_page, end_page);
|
||||
|
||||
// TODO: this will not be needed once we handle readbacks
|
||||
if constexpr (type == Type::GPU) {
|
||||
mask &= ~writeable;
|
||||
}
|
||||
|
||||
for (const auto& [start, end] : mask) {
|
||||
func(cpu_addr + start * TRACKER_BYTES_PER_PAGE, (end - start) * TRACKER_BYTES_PER_PAGE);
|
||||
}
|
||||
@@ -139,7 +125,9 @@ public:
|
||||
if constexpr (clear) {
|
||||
bits.UnsetRange(start_page, end_page);
|
||||
if constexpr (type == Type::CPU) {
|
||||
UpdateProtection<true>();
|
||||
UpdateProtection<true, false>();
|
||||
} else if (Config::readbacks()) {
|
||||
UpdateProtection<false, true>();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -151,7 +139,7 @@ public:
|
||||
* @param size Size in bytes of the region to query for modifications
|
||||
*/
|
||||
template <Type type>
|
||||
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
|
||||
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) noexcept {
|
||||
RENDERER_TRACE;
|
||||
const size_t start_page = SanitizeAddress(offset) / TRACKER_BYTES_PER_PAGE;
|
||||
const size_t end_page =
|
||||
@@ -159,17 +147,10 @@ public:
|
||||
if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) {
|
||||
return false;
|
||||
}
|
||||
// std::scoped_lock lk{lock}; // Is this needed?
|
||||
static_assert(type != Type::Writeable);
|
||||
std::scoped_lock lk{lock};
|
||||
|
||||
const RegionBits& bits = GetRegionBits<type>();
|
||||
RegionBits test(bits, start_page, end_page);
|
||||
|
||||
// TODO: this will not be needed once we handle readbacks
|
||||
if constexpr (type == Type::GPU) {
|
||||
test &= ~writeable;
|
||||
}
|
||||
|
||||
return test.Any();
|
||||
}
|
||||
|
||||
@@ -181,19 +162,21 @@ private:
|
||||
* @param current_bits Current state of the word
|
||||
* @param new_bits New state of the word
|
||||
*
|
||||
* @tparam add_to_tracker True when the tracker should start tracking the new pages
|
||||
* @tparam track True when the tracker should start tracking the new pages
|
||||
*/
|
||||
template <bool add_to_tracker>
|
||||
template <bool track, bool is_read>
|
||||
void UpdateProtection() {
|
||||
RENDERER_TRACE;
|
||||
RegionBits mask = cpu ^ writeable;
|
||||
|
||||
RegionBits mask = is_read ? (~gpu ^ readable) : (cpu ^ writeable);
|
||||
if (mask.None()) {
|
||||
return; // No changes to the CPU tracking state
|
||||
return;
|
||||
}
|
||||
|
||||
writeable = cpu;
|
||||
tracker->UpdatePageWatchersForRegion<add_to_tracker>(cpu_addr, mask);
|
||||
if constexpr (is_read) {
|
||||
readable = ~gpu;
|
||||
} else {
|
||||
writeable = cpu;
|
||||
}
|
||||
tracker->UpdatePageWatchersForRegion<track, is_read>(cpu_addr, mask);
|
||||
}
|
||||
|
||||
#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
|
||||
@@ -206,6 +189,7 @@ private:
|
||||
RegionBits cpu;
|
||||
RegionBits gpu;
|
||||
RegionBits writeable;
|
||||
RegionBits readable;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
||||
|
||||
Reference in New Issue
Block a user