mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-08 12:49:11 +00:00
buffer_cache: Split DMA fault handling code from buffer cache (#3809)
Its better not to have that raw code there
This commit is contained in:
@@ -960,6 +960,8 @@ set(VIDEO_CORE src/video_core/amdgpu/cb_db_extent.h
|
||||
src/video_core/buffer_cache/buffer.h
|
||||
src/video_core/buffer_cache/buffer_cache.cpp
|
||||
src/video_core/buffer_cache/buffer_cache.h
|
||||
src/video_core/buffer_cache/fault_manager.cpp
|
||||
src/video_core/buffer_cache/fault_manager.h
|
||||
src/video_core/buffer_cache/memory_tracker.h
|
||||
src/video_core/buffer_cache/range_set.h
|
||||
src/video_core/buffer_cache/region_definitions.h
|
||||
|
||||
@@ -190,7 +190,7 @@ void CollectShaderInfoPass(IR::Program& program, const Profile& profile) {
|
||||
});
|
||||
info.buffers.push_back({
|
||||
.used_types = IR::Type::U32,
|
||||
.inline_cbuf = AmdGpu::Buffer::Placeholder(VideoCore::BufferCache::FAULT_BUFFER_SIZE),
|
||||
.inline_cbuf = AmdGpu::Buffer::Placeholder(std::numeric_limits<u32>::max()),
|
||||
.buffer_type = BufferType::FaultBuffer,
|
||||
.is_written = true,
|
||||
});
|
||||
|
||||
@@ -2,49 +2,42 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <mutex>
|
||||
#include "common/alignment.h"
|
||||
#include "common/debug.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/types.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/buffer_cache/memory_tracker.h"
|
||||
#include "video_core/host_shaders/fault_buffer_process_comp.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
static constexpr size_t DataShareBufferSize = 64_KB;
|
||||
static constexpr size_t StagingBufferSize = 512_MB;
|
||||
static constexpr size_t DownloadBufferSize = 32_MB;
|
||||
static constexpr size_t UboStreamBufferSize = 64_MB;
|
||||
static constexpr size_t DownloadBufferSize = 128_MB;
|
||||
static constexpr size_t DeviceBufferSize = 128_MB;
|
||||
static constexpr size_t MaxPageFaults = 1024;
|
||||
|
||||
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||
AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_,
|
||||
PageManager& tracker)
|
||||
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_},
|
||||
memory{Core::Memory::Instance()}, texture_cache{texture_cache_},
|
||||
fault_manager{instance, scheduler, *this, CACHING_PAGEBITS, CACHING_NUMPAGES},
|
||||
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
||||
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
||||
download_buffer{instance, scheduler, MemoryUsage::Download, DownloadBufferSize},
|
||||
device_buffer{instance, scheduler, MemoryUsage::DeviceLocal, DeviceBufferSize},
|
||||
gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize},
|
||||
bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal,
|
||||
0, AllFlags, BDA_PAGETABLE_SIZE},
|
||||
fault_buffer(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, FAULT_BUFFER_SIZE) {
|
||||
0, AllFlags, BDA_PAGETABLE_SIZE} {
|
||||
Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer");
|
||||
Vulkan::SetObjectName(instance.GetDevice(), bda_pagetable_buffer.Handle(),
|
||||
"BDA Page Table Buffer");
|
||||
Vulkan::SetObjectName(instance.GetDevice(), fault_buffer.Handle(), "Fault Buffer");
|
||||
|
||||
memory_tracker = std::make_unique<MemoryTracker>(tracker);
|
||||
|
||||
@@ -57,80 +50,6 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
||||
const vk::Buffer& null_buffer = slot_buffers[null_id].buffer;
|
||||
Vulkan::SetObjectName(instance.GetDevice(), null_buffer, "Null Buffer");
|
||||
|
||||
// Prepare the fault buffer parsing pipeline
|
||||
boost::container::static_vector<vk::DescriptorSetLayoutBinding, 2> bindings{
|
||||
{
|
||||
.binding = 0,
|
||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
},
|
||||
{
|
||||
.binding = 1,
|
||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
},
|
||||
};
|
||||
|
||||
const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = {
|
||||
.flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR,
|
||||
.bindingCount = static_cast<u32>(bindings.size()),
|
||||
.pBindings = bindings.data(),
|
||||
};
|
||||
auto [desc_layout_result, desc_layout] =
|
||||
instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci);
|
||||
ASSERT_MSG(desc_layout_result == vk::Result::eSuccess,
|
||||
"Failed to create descriptor set layout: {}", vk::to_string(desc_layout_result));
|
||||
fault_process_desc_layout = std::move(desc_layout);
|
||||
|
||||
const auto& module = Vulkan::Compile(HostShaders::FAULT_BUFFER_PROCESS_COMP,
|
||||
vk::ShaderStageFlagBits::eCompute, instance.GetDevice());
|
||||
Vulkan::SetObjectName(instance.GetDevice(), module, "Fault Buffer Parser");
|
||||
|
||||
const vk::SpecializationMapEntry specialization_map_entry = {
|
||||
.constantID = 0,
|
||||
.offset = 0,
|
||||
.size = sizeof(u32),
|
||||
};
|
||||
|
||||
const vk::SpecializationInfo specialization_info = {
|
||||
.mapEntryCount = 1,
|
||||
.pMapEntries = &specialization_map_entry,
|
||||
.dataSize = sizeof(u32),
|
||||
.pData = &CACHING_PAGEBITS,
|
||||
};
|
||||
|
||||
const vk::PipelineShaderStageCreateInfo shader_ci = {
|
||||
.stage = vk::ShaderStageFlagBits::eCompute,
|
||||
.module = module,
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = &specialization_info,
|
||||
};
|
||||
|
||||
const vk::PipelineLayoutCreateInfo layout_info = {
|
||||
.setLayoutCount = 1U,
|
||||
.pSetLayouts = &(*fault_process_desc_layout),
|
||||
};
|
||||
auto [layout_result, layout] = instance.GetDevice().createPipelineLayoutUnique(layout_info);
|
||||
ASSERT_MSG(layout_result == vk::Result::eSuccess, "Failed to create pipeline layout: {}",
|
||||
vk::to_string(layout_result));
|
||||
fault_process_pipeline_layout = std::move(layout);
|
||||
|
||||
const vk::ComputePipelineCreateInfo pipeline_info = {
|
||||
.stage = shader_ci,
|
||||
.layout = *fault_process_pipeline_layout,
|
||||
};
|
||||
auto [pipeline_result, pipeline] =
|
||||
instance.GetDevice().createComputePipelineUnique({}, pipeline_info);
|
||||
ASSERT_MSG(pipeline_result == vk::Result::eSuccess, "Failed to create compute pipeline: {}",
|
||||
vk::to_string(pipeline_result));
|
||||
fault_process_pipeline = std::move(pipeline);
|
||||
Vulkan::SetObjectName(instance.GetDevice(), *fault_process_pipeline,
|
||||
"Fault Buffer Parser Pipeline");
|
||||
|
||||
instance.GetDevice().destroyShaderModule(module);
|
||||
|
||||
// Set up garbage collection parameters
|
||||
if (!instance.CanReportMemoryUsage()) {
|
||||
trigger_gc_memory = DEFAULT_TRIGGER_GC_MEMORY;
|
||||
@@ -656,14 +575,10 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
|
||||
wanted_size = static_cast<u32>(device_addr_end - device_addr);
|
||||
const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size);
|
||||
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
|
||||
const BufferId new_buffer_id = [&] {
|
||||
std::scoped_lock lk{slot_buffers_mutex};
|
||||
return slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin,
|
||||
AllFlags | vk::BufferUsageFlagBits::eShaderDeviceAddress, size);
|
||||
}();
|
||||
const BufferId new_buffer_id =
|
||||
slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin,
|
||||
AllFlags | vk::BufferUsageFlagBits::eShaderDeviceAddress, size);
|
||||
auto& new_buffer = slot_buffers[new_buffer_id];
|
||||
const size_t size_bytes = new_buffer.SizeBytes();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
for (const BufferId overlap_id : overlap.ids) {
|
||||
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
|
||||
}
|
||||
@@ -672,126 +587,7 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
|
||||
}
|
||||
|
||||
void BufferCache::ProcessFaultBuffer() {
|
||||
// Run fault processing shader
|
||||
const auto [mapped, offset] = download_buffer.Map(MaxPageFaults * sizeof(u64));
|
||||
vk::BufferMemoryBarrier2 fault_buffer_barrier{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eShaderWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eShaderRead,
|
||||
.buffer = fault_buffer.Handle(),
|
||||
.offset = 0,
|
||||
.size = FAULT_BUFFER_SIZE,
|
||||
};
|
||||
vk::BufferMemoryBarrier2 download_barrier{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eShaderWrite,
|
||||
.buffer = download_buffer.Handle(),
|
||||
.offset = offset,
|
||||
.size = MaxPageFaults * sizeof(u64),
|
||||
};
|
||||
std::array<vk::BufferMemoryBarrier2, 2> barriers{fault_buffer_barrier, download_barrier};
|
||||
vk::DescriptorBufferInfo fault_buffer_info{
|
||||
.buffer = fault_buffer.Handle(),
|
||||
.offset = 0,
|
||||
.range = FAULT_BUFFER_SIZE,
|
||||
};
|
||||
vk::DescriptorBufferInfo download_info{
|
||||
.buffer = download_buffer.Handle(),
|
||||
.offset = offset,
|
||||
.range = MaxPageFaults * sizeof(u64),
|
||||
};
|
||||
boost::container::small_vector<vk::WriteDescriptorSet, 2> writes{
|
||||
{
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = 0,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||
.pBufferInfo = &fault_buffer_info,
|
||||
},
|
||||
{
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = 1,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||
.pBufferInfo = &download_info,
|
||||
},
|
||||
};
|
||||
download_buffer.Commit();
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.fillBuffer(download_buffer.Handle(), offset, MaxPageFaults * sizeof(u64), 0);
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 2,
|
||||
.pBufferMemoryBarriers = barriers.data(),
|
||||
});
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *fault_process_pipeline);
|
||||
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *fault_process_pipeline_layout, 0,
|
||||
writes);
|
||||
constexpr u32 num_threads = CACHING_NUMPAGES / 32; // 1 bit per page, 32 pages per workgroup
|
||||
constexpr u32 num_workgroups = Common::DivCeil(num_threads, 64u);
|
||||
cmdbuf.dispatch(num_workgroups, 1, 1);
|
||||
|
||||
// Reset fault buffer
|
||||
const vk::BufferMemoryBarrier2 reset_pre_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eComputeShader,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eShaderRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.buffer = fault_buffer.Handle(),
|
||||
.offset = 0,
|
||||
.size = FAULT_BUFFER_SIZE,
|
||||
};
|
||||
const vk::BufferMemoryBarrier2 reset_post_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
||||
.buffer = fault_buffer.Handle(),
|
||||
.offset = 0,
|
||||
.size = FAULT_BUFFER_SIZE,
|
||||
};
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &reset_pre_barrier,
|
||||
});
|
||||
cmdbuf.fillBuffer(fault_buffer.buffer, 0, FAULT_BUFFER_SIZE, 0);
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &reset_post_barrier,
|
||||
});
|
||||
|
||||
// Defer creating buffers
|
||||
scheduler.DeferOperation([this, mapped]() {
|
||||
// Create the fault buffers batched
|
||||
boost::icl::interval_set<VAddr> fault_ranges;
|
||||
const u64* fault_ptr = std::bit_cast<const u64*>(mapped);
|
||||
const u32 fault_count = static_cast<u32>(*(fault_ptr++));
|
||||
for (u32 i = 0; i < fault_count; ++i) {
|
||||
const VAddr fault = *(fault_ptr++);
|
||||
const VAddr fault_end = fault + CACHING_PAGESIZE; // This can be adjusted
|
||||
fault_ranges +=
|
||||
boost::icl::interval_set<VAddr>::interval_type::right_open(fault, fault_end);
|
||||
LOG_INFO(Render_Vulkan, "Accessed non-GPU cached memory at {:#x}", fault);
|
||||
}
|
||||
for (const auto& range : fault_ranges) {
|
||||
const VAddr start = range.lower();
|
||||
const VAddr end = range.upper();
|
||||
const u64 page_start = start >> CACHING_PAGEBITS;
|
||||
const u64 page_end = Common::DivCeil(end, CACHING_PAGESIZE);
|
||||
// Buffer size is in 32 bits
|
||||
ASSERT_MSG((range.upper() - range.lower()) <= std::numeric_limits<u32>::max(),
|
||||
"Buffer size is too large");
|
||||
CreateBuffer(start, static_cast<u32>(end - start));
|
||||
}
|
||||
});
|
||||
fault_manager.ProcessFaultBuffer();
|
||||
}
|
||||
|
||||
void BufferCache::Register(BufferId buffer_id) {
|
||||
@@ -972,10 +768,7 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
|
||||
}
|
||||
|
||||
void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
|
||||
if (device_addr == 0) {
|
||||
return;
|
||||
}
|
||||
VAddr device_addr_end = device_addr + size;
|
||||
const VAddr device_addr_end = device_addr + size;
|
||||
ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
|
||||
RENDERER_TRACE;
|
||||
VAddr start = std::max(buffer.CpuAddr(), device_addr);
|
||||
@@ -985,21 +778,6 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCache::MemoryBarrier() {
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
vk::MemoryBarrier2 barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
};
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.memoryBarrierCount = 1,
|
||||
.pMemoryBarriers = &barrier,
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCache::InlineDataBuffer(Buffer& buffer, VAddr address, const void* value,
|
||||
u32 num_bytes) {
|
||||
scheduler.EndRendering();
|
||||
|
||||
@@ -3,12 +3,12 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <shared_mutex>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include "common/lru_cache.h"
|
||||
#include "common/slot_vector.h"
|
||||
#include "common/types.h"
|
||||
#include "video_core/buffer_cache/buffer.h"
|
||||
#include "video_core/buffer_cache/fault_manager.h"
|
||||
#include "video_core/buffer_cache/range_set.h"
|
||||
#include "video_core/multi_level_page_table.h"
|
||||
|
||||
@@ -40,9 +40,7 @@ public:
|
||||
static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS;
|
||||
static constexpr u64 DEVICE_PAGESIZE = 16_KB;
|
||||
static constexpr u64 CACHING_NUMPAGES = u64{1} << (40 - CACHING_PAGEBITS);
|
||||
|
||||
static constexpr u64 BDA_PAGETABLE_SIZE = CACHING_NUMPAGES * sizeof(vk::DeviceAddress);
|
||||
static constexpr u64 FAULT_BUFFER_SIZE = CACHING_NUMPAGES / 8; // Bit per page
|
||||
|
||||
// Default values for garbage collection
|
||||
static constexpr s64 DEFAULT_TRIGGER_GC_MEMORY = 1_GB;
|
||||
@@ -68,12 +66,6 @@ public:
|
||||
bool has_stream_leap = false;
|
||||
};
|
||||
|
||||
using IntervalSet =
|
||||
boost::icl::interval_set<VAddr, std::less,
|
||||
ICL_INTERVAL_INSTANCE(ICL_INTERVAL_DEFAULT, VAddr, std::less),
|
||||
RangeSetsAllocator>;
|
||||
using IntervalType = typename IntervalSet::interval_type;
|
||||
|
||||
public:
|
||||
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
||||
AmdGpu::Liverpool* liverpool, TextureCache& texture_cache,
|
||||
@@ -92,7 +84,7 @@ public:
|
||||
|
||||
/// Retrieves the fault buffer.
|
||||
[[nodiscard]] Buffer* GetFaultBuffer() noexcept {
|
||||
return &fault_buffer;
|
||||
return fault_manager.GetFaultBuffer();
|
||||
}
|
||||
|
||||
/// Retrieves the buffer with the specified id.
|
||||
@@ -160,9 +152,6 @@ public:
|
||||
/// Synchronizes all buffers neede for DMA.
|
||||
void SynchronizeDmaBuffers();
|
||||
|
||||
/// Record memory barrier. Used for buffers when accessed via BDA.
|
||||
void MemoryBarrier();
|
||||
|
||||
/// Runs the garbage collector.
|
||||
void RunGarbageCollector();
|
||||
|
||||
@@ -217,6 +206,7 @@ private:
|
||||
AmdGpu::Liverpool* liverpool;
|
||||
Core::MemoryManager* memory;
|
||||
TextureCache& texture_cache;
|
||||
FaultManager fault_manager;
|
||||
std::unique_ptr<MemoryTracker> memory_tracker;
|
||||
StreamBuffer staging_buffer;
|
||||
StreamBuffer stream_buffer;
|
||||
@@ -224,8 +214,6 @@ private:
|
||||
StreamBuffer device_buffer;
|
||||
Buffer gds_buffer;
|
||||
Buffer bda_pagetable_buffer;
|
||||
Buffer fault_buffer;
|
||||
std::shared_mutex slot_buffers_mutex;
|
||||
Common::SlotVector<Buffer> slot_buffers;
|
||||
u64 total_used_memory = 0;
|
||||
u64 trigger_gc_memory = 0;
|
||||
@@ -235,9 +223,6 @@ private:
|
||||
RangeSet gpu_modified_ranges;
|
||||
SplitRangeMap<BufferId> buffer_ranges;
|
||||
PageTable page_table;
|
||||
vk::UniqueDescriptorSetLayout fault_process_desc_layout;
|
||||
vk::UniquePipeline fault_process_pipeline;
|
||||
vk::UniquePipelineLayout fault_process_pipeline_layout;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
||||
|
||||
177
src/video_core/buffer_cache/fault_manager.cpp
Normal file
177
src/video_core/buffer_cache/fault_manager.cpp
Normal file
@@ -0,0 +1,177 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/div_ceil.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/buffer_cache/fault_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_platform.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
|
||||
#include "video_core/host_shaders/fault_buffer_process_comp.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
static constexpr size_t MaxPageFaults = 1024;
|
||||
static constexpr size_t PageFaultAreaSize = MaxPageFaults * sizeof(u64);
|
||||
|
||||
FaultManager::FaultManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler_,
|
||||
BufferCache& buffer_cache_, u32 caching_pagebits, u64 caching_num_pages_)
|
||||
: scheduler{scheduler_}, buffer_cache{buffer_cache_},
|
||||
caching_pagesize{1ULL << caching_pagebits}, caching_num_pages{caching_num_pages_},
|
||||
fault_buffer_size{caching_num_pages_ / 8},
|
||||
fault_buffer{instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, fault_buffer_size},
|
||||
download_buffer{instance, scheduler, MemoryUsage::Download,
|
||||
0, AllFlags, MaxPendingFaults * PageFaultAreaSize} {
|
||||
const auto device = instance.GetDevice();
|
||||
Vulkan::SetObjectName(device, fault_buffer.Handle(), "Fault Buffer");
|
||||
|
||||
const std::array<vk::DescriptorSetLayoutBinding, 2> bindings = {{
|
||||
{
|
||||
.binding = 0,
|
||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
},
|
||||
{
|
||||
.binding = 1,
|
||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
},
|
||||
}};
|
||||
const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = {
|
||||
.flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR,
|
||||
.bindingCount = 2,
|
||||
.pBindings = bindings.data(),
|
||||
};
|
||||
fault_process_desc_layout =
|
||||
Vulkan::Check(device.createDescriptorSetLayoutUnique(desc_layout_ci));
|
||||
|
||||
std::vector<std::string> defines{{fmt::format("CACHING_PAGEBITS={}", caching_pagebits),
|
||||
fmt::format("MAX_PAGE_FAULTS={}", MaxPageFaults)}};
|
||||
const auto module = Vulkan::Compile(HostShaders::FAULT_BUFFER_PROCESS_COMP,
|
||||
vk::ShaderStageFlagBits::eCompute, device, defines);
|
||||
Vulkan::SetObjectName(device, module, "Fault Buffer Parser");
|
||||
|
||||
const vk::PipelineShaderStageCreateInfo shader_ci = {
|
||||
.stage = vk::ShaderStageFlagBits::eCompute,
|
||||
.module = module,
|
||||
.pName = "main",
|
||||
};
|
||||
|
||||
const vk::PipelineLayoutCreateInfo layout_info = {
|
||||
.setLayoutCount = 1U,
|
||||
.pSetLayouts = &(*fault_process_desc_layout),
|
||||
};
|
||||
fault_process_pipeline_layout = Vulkan::Check(device.createPipelineLayoutUnique(layout_info));
|
||||
|
||||
const vk::ComputePipelineCreateInfo pipeline_info = {
|
||||
.stage = shader_ci,
|
||||
.layout = *fault_process_pipeline_layout,
|
||||
};
|
||||
fault_process_pipeline = Vulkan::Check(device.createComputePipelineUnique({}, pipeline_info));
|
||||
Vulkan::SetObjectName(device, *fault_process_pipeline, "Fault Buffer Parser Pipeline");
|
||||
|
||||
device.destroyShaderModule(module);
|
||||
}
|
||||
|
||||
void FaultManager::ProcessFaultBuffer() {
|
||||
if (u64 wait_tick = fault_areas[current_area]) {
|
||||
scheduler.Wait(wait_tick);
|
||||
scheduler.PopPendingOperations();
|
||||
}
|
||||
|
||||
const u32 offset = current_area * PageFaultAreaSize;
|
||||
u8* mapped = download_buffer.mapped_data.data() + offset;
|
||||
std::memset(mapped, 0, PageFaultAreaSize);
|
||||
|
||||
const vk::BufferMemoryBarrier2 pre_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eShaderWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eShaderRead,
|
||||
.buffer = fault_buffer.Handle(),
|
||||
.offset = 0,
|
||||
.size = fault_buffer_size,
|
||||
};
|
||||
const vk::BufferMemoryBarrier2 post_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eComputeShader,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eShaderWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eShaderWrite,
|
||||
.buffer = fault_buffer.Handle(),
|
||||
.offset = 0,
|
||||
.size = fault_buffer_size,
|
||||
};
|
||||
const vk::DescriptorBufferInfo fault_buffer_info = {
|
||||
.buffer = fault_buffer.Handle(),
|
||||
.offset = 0,
|
||||
.range = fault_buffer_size,
|
||||
};
|
||||
const vk::DescriptorBufferInfo download_info = {
|
||||
.buffer = download_buffer.Handle(),
|
||||
.offset = offset,
|
||||
.range = PageFaultAreaSize,
|
||||
};
|
||||
const std::array<vk::WriteDescriptorSet, 2> writes = {{
|
||||
{
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = 0,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||
.pBufferInfo = &fault_buffer_info,
|
||||
},
|
||||
{
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = 1,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||
.pBufferInfo = &download_info,
|
||||
},
|
||||
}};
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &pre_barrier,
|
||||
});
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *fault_process_pipeline);
|
||||
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *fault_process_pipeline_layout, 0,
|
||||
writes);
|
||||
// 1 bit per page, 32 pages per workgroup
|
||||
const u32 num_threads = caching_num_pages / 32;
|
||||
const u32 num_workgroups = Common::DivCeil(num_threads, 64u);
|
||||
cmdbuf.dispatch(num_workgroups, 1, 1);
|
||||
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &post_barrier,
|
||||
});
|
||||
|
||||
scheduler.DeferOperation([this, mapped, area = current_area] {
|
||||
fault_ranges.Clear();
|
||||
const u64* fault_buf = std::bit_cast<const u64*>(mapped);
|
||||
const u32 fault_count = fault_buf[0];
|
||||
for (u32 i = 1; i <= fault_count; ++i) {
|
||||
fault_ranges.Add(fault_buf[i], caching_pagesize);
|
||||
LOG_INFO(Render_Vulkan, "Accessed non-GPU cached memory at {:#x}", fault_buf[i]);
|
||||
}
|
||||
fault_ranges.ForEach([&](VAddr start, VAddr end) {
|
||||
ASSERT_MSG((end - start) <= std::numeric_limits<u32>::max(),
|
||||
"Buffer size is too large");
|
||||
buffer_cache.FindBuffer(start, static_cast<u32>(end - start));
|
||||
});
|
||||
fault_areas[area] = 0;
|
||||
});
|
||||
|
||||
fault_areas[current_area++] = scheduler.CurrentTick();
|
||||
current_area %= MaxPendingFaults;
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
||||
42
src/video_core/buffer_cache/fault_manager.h
Normal file
42
src/video_core/buffer_cache/fault_manager.h
Normal file
@@ -0,0 +1,42 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/buffer_cache/buffer.h"
|
||||
#include "video_core/buffer_cache/range_set.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
class BufferCache;
|
||||
|
||||
class FaultManager {
|
||||
static constexpr size_t MaxPendingFaults = 8;
|
||||
|
||||
public:
|
||||
explicit FaultManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
||||
BufferCache& buffer_cache, u32 caching_pagebits, u64 caching_num_pages);
|
||||
|
||||
[[nodiscard]] Buffer* GetFaultBuffer() noexcept {
|
||||
return &fault_buffer;
|
||||
}
|
||||
|
||||
void ProcessFaultBuffer();
|
||||
|
||||
private:
|
||||
Vulkan::Scheduler& scheduler;
|
||||
BufferCache& buffer_cache;
|
||||
RangeSet fault_ranges;
|
||||
u64 caching_pagesize;
|
||||
u64 caching_num_pages;
|
||||
u64 fault_buffer_size;
|
||||
Buffer fault_buffer;
|
||||
Buffer download_buffer;
|
||||
std::array<u64, MaxPendingFaults> fault_areas{};
|
||||
u32 current_area{};
|
||||
vk::UniqueDescriptorSetLayout fault_process_desc_layout;
|
||||
vk::UniquePipeline fault_process_pipeline;
|
||||
vk::UniquePipelineLayout fault_process_pipeline_layout;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
||||
@@ -13,30 +13,23 @@ layout(std430, binding = 0) buffer input_buf {
|
||||
layout(std430, binding = 1) buffer output_buf {
|
||||
uint64_t download_buffer[];
|
||||
};
|
||||
|
||||
// Overlap for 32 bit atomics
|
||||
layout(std430, binding = 1) buffer output_buf32 {
|
||||
uint download_buffer32[];
|
||||
};
|
||||
|
||||
layout(constant_id = 0) const uint CACHING_PAGEBITS = 0;
|
||||
|
||||
void main() {
|
||||
uint id = gl_GlobalInvocationID.x;
|
||||
const uint id = gl_GlobalInvocationID.x;
|
||||
uint word = fault_buffer[id];
|
||||
if (word == 0u) {
|
||||
return;
|
||||
}
|
||||
// 1 page per bit
|
||||
uint base_bit = id * 32u;
|
||||
fault_buffer[id] = 0u;
|
||||
const uint base_bit = id * 32u;
|
||||
while (word != 0u) {
|
||||
uint bit = findLSB(word);
|
||||
word &= word - 1;
|
||||
uint page = base_bit + bit;
|
||||
uint store_index = atomicAdd(download_buffer32[0], 1u) + 1u;
|
||||
// It is very unlikely, but should we check for overflow?
|
||||
if (store_index < 1024u) { // only support 1024 page faults
|
||||
download_buffer[store_index] = uint64_t(page) << CACHING_PAGEBITS;
|
||||
const uint store_index = atomicAdd(download_buffer32[0], 1u) + 1u;
|
||||
if (store_index >= MAX_PAGE_FAULTS) {
|
||||
return;
|
||||
}
|
||||
const uint bit = findLSB(word);
|
||||
word &= word - 1;
|
||||
const uint page = base_bit + bit;
|
||||
download_buffer[store_index] = uint64_t(page) << CACHING_PAGEBITS;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -407,18 +407,13 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
||||
|
||||
if (uses_dma) {
|
||||
// We only use fault buffer for DMA right now.
|
||||
{
|
||||
Common::RecursiveSharedLock lock{mapped_ranges_mutex};
|
||||
for (auto& range : mapped_ranges) {
|
||||
buffer_cache.SynchronizeBuffersInRange(range.lower(),
|
||||
range.upper() - range.lower());
|
||||
}
|
||||
Common::RecursiveSharedLock lock{mapped_ranges_mutex};
|
||||
for (auto& range : mapped_ranges) {
|
||||
buffer_cache.SynchronizeBuffersInRange(range.lower(), range.upper() - range.lower());
|
||||
}
|
||||
buffer_cache.MemoryBarrier();
|
||||
fault_process_pending = true;
|
||||
}
|
||||
|
||||
fault_process_pending |= uses_dma;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -84,15 +84,6 @@ void Scheduler::Wait(u64 tick) {
|
||||
Flush(info);
|
||||
}
|
||||
master_semaphore.Wait(tick);
|
||||
|
||||
// CAUTION: This can introduce unexpected variation in the wait time.
|
||||
// We don't currently sync the GPU, and some games are very sensitive to this.
|
||||
// If this becomes a problem, it can be commented out.
|
||||
// Idealy we would implement proper gpu sync.
|
||||
while (!pending_ops.empty() && pending_ops.front().gpu_tick <= tick) {
|
||||
pending_ops.front().callback();
|
||||
pending_ops.pop();
|
||||
}
|
||||
}
|
||||
|
||||
void Scheduler::PopPendingOperations() {
|
||||
@@ -109,9 +100,7 @@ void Scheduler::AllocateWorkerCommandBuffers() {
|
||||
};
|
||||
|
||||
current_cmdbuf = command_pool.Commit();
|
||||
auto begin_result = current_cmdbuf.begin(begin_info);
|
||||
ASSERT_MSG(begin_result == vk::Result::eSuccess, "Failed to begin command buffer: {}",
|
||||
vk::to_string(begin_result));
|
||||
Check(current_cmdbuf.begin(begin_info));
|
||||
|
||||
// Invalidate dynamic state so it gets applied to the new command buffer.
|
||||
dynamic_state.Invalidate();
|
||||
@@ -139,9 +128,7 @@ void Scheduler::SubmitExecution(SubmitInfo& info) {
|
||||
#endif
|
||||
|
||||
EndRendering();
|
||||
auto end_result = current_cmdbuf.end();
|
||||
ASSERT_MSG(end_result == vk::Result::eSuccess, "Failed to end command buffer: {}",
|
||||
vk::to_string(end_result));
|
||||
Check(current_cmdbuf.end());
|
||||
|
||||
const vk::Semaphore timeline = master_semaphore.Handle();
|
||||
info.AddSignal(timeline, signal_value);
|
||||
|
||||
Reference in New Issue
Block a user