Process faults on submit

This commit is contained in:
Lander Gallastegi 2025-04-28 20:55:57 +02:00
parent 69a39a3afe
commit c642e4f1be
7 changed files with 73 additions and 75 deletions

View File

@ -133,6 +133,7 @@ void Liverpool::Process(std::stop_token stoken) {
VideoCore::EndCapture(); VideoCore::EndCapture();
if (rasterizer) { if (rasterizer) {
rasterizer->ProcessFaults();
rasterizer->Flush(); rasterizer->Flush();
} }
submit_done = false; submit_done = false;

View File

@ -7,7 +7,7 @@
#include "common/types.h" #include "common/types.h"
#include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/liverpool.h"
#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/host_shaders/fault_buffer_parser_comp.h" #include "video_core/host_shaders/fault_buffer_process_comp.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
@ -33,14 +33,14 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize}, gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize},
bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal, bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal,
0, AllFlags, BDA_PAGETABLE_SIZE}, 0, AllFlags, BDA_PAGETABLE_SIZE},
fault_readback_buffer(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, fault_buffer(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags,
FAULT_READBACK_SIZE), FAULT_READBACK_SIZE),
memory_tracker{&tracker} { memory_tracker{&tracker} {
Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer"); Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer");
Vulkan::SetObjectName(instance.GetDevice(), bda_pagetable_buffer.Handle(), Vulkan::SetObjectName(instance.GetDevice(), bda_pagetable_buffer.Handle(),
"BDA Page Table Buffer"); "BDA Page Table Buffer");
Vulkan::SetObjectName(instance.GetDevice(), fault_readback_buffer.Handle(), Vulkan::SetObjectName(instance.GetDevice(), fault_buffer.Handle(),
"Fault Readback Buffer"); "Fault Buffer");
// Ensure the first slot is used for the null buffer // Ensure the first slot is used for the null buffer
const auto null_id = const auto null_id =
@ -75,10 +75,10 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
ASSERT_MSG(desc_layout_result == vk::Result::eSuccess, ASSERT_MSG(desc_layout_result == vk::Result::eSuccess,
"Failed to create descriptor set layout: {}", "Failed to create descriptor set layout: {}",
vk::to_string(desc_layout_result)); vk::to_string(desc_layout_result));
fault_parse_desc_layout = std::move(desc_layout); fault_process_desc_layout = std::move(desc_layout);
const auto& module = Vulkan::Compile( const auto& module = Vulkan::Compile(
HostShaders::FAULT_BUFFER_PARSER_COMP, vk::ShaderStageFlagBits::eCompute, HostShaders::FAULT_BUFFER_PROCESS_COMP, vk::ShaderStageFlagBits::eCompute,
instance.GetDevice()); instance.GetDevice());
Vulkan::SetObjectName(instance.GetDevice(), module, "Fault Buffer Parser"); Vulkan::SetObjectName(instance.GetDevice(), module, "Fault Buffer Parser");
@ -96,7 +96,7 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
const vk::PipelineLayoutCreateInfo layout_info = { const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 1U, .setLayoutCount = 1U,
.pSetLayouts = &(*fault_parse_desc_layout), .pSetLayouts = &(*fault_process_desc_layout),
.pushConstantRangeCount = 1, .pushConstantRangeCount = 1,
.pPushConstantRanges = &push_constants, .pPushConstantRanges = &push_constants,
}; };
@ -105,19 +105,19 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
ASSERT_MSG(layout_result == vk::Result::eSuccess, ASSERT_MSG(layout_result == vk::Result::eSuccess,
"Failed to create pipeline layout: {}", "Failed to create pipeline layout: {}",
vk::to_string(layout_result)); vk::to_string(layout_result));
fault_parse_pipeline_layout = std::move(layout); fault_process_pipeline_layout = std::move(layout);
const vk::ComputePipelineCreateInfo pipeline_info = { const vk::ComputePipelineCreateInfo pipeline_info = {
.stage = shader_ci, .stage = shader_ci,
.layout = *fault_parse_pipeline_layout, .layout = *fault_process_pipeline_layout,
}; };
auto [pipeline_result, pipeline] = auto [pipeline_result, pipeline] =
instance.GetDevice().createComputePipelineUnique({}, pipeline_info); instance.GetDevice().createComputePipelineUnique({}, pipeline_info);
ASSERT_MSG(pipeline_result == vk::Result::eSuccess, ASSERT_MSG(pipeline_result == vk::Result::eSuccess,
"Failed to create compute pipeline: {}", "Failed to create compute pipeline: {}",
vk::to_string(pipeline_result)); vk::to_string(pipeline_result));
fault_parse_pipeline = std::move(pipeline); fault_process_pipeline = std::move(pipeline);
Vulkan::SetObjectName(instance.GetDevice(), *fault_parse_pipeline, "Fault Buffer Parser Pipeline"); Vulkan::SetObjectName(instance.GetDevice(), *fault_process_pipeline, "Fault Buffer Parser Pipeline");
instance.GetDevice().destroyShaderModule(module); instance.GetDevice().destroyShaderModule(module);
} }
@ -614,14 +614,15 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
return new_buffer_id; return new_buffer_id;
} }
void BufferCache::CreateFaultBuffers() { void BufferCache::ProcessFaultBuffer() {
// Run fault processing shader
const auto [mapped, offset] = download_buffer.Map(MaxPageFaults * sizeof(u64)); const auto [mapped, offset] = download_buffer.Map(MaxPageFaults * sizeof(u64));
vk::BufferMemoryBarrier2 fault_readback_barrier{ vk::BufferMemoryBarrier2 fault_readback_barrier{
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eShaderWrite, .srcAccessMask = vk::AccessFlagBits2::eShaderWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader, .dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.dstAccessMask = vk::AccessFlagBits2::eShaderRead, .dstAccessMask = vk::AccessFlagBits2::eShaderRead,
.buffer = fault_readback_buffer.Handle(), .buffer = fault_buffer.Handle(),
.offset = 0, .offset = 0,
.size = FAULT_READBACK_SIZE, .size = FAULT_READBACK_SIZE,
}; };
@ -629,14 +630,14 @@ void BufferCache::CreateFaultBuffers() {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer, .srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite, .srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader, .dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.dstAccessMask = vk::AccessFlagBits2::eShaderRead, .dstAccessMask = vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eShaderWrite,
.buffer = download_buffer.Handle(), .buffer = download_buffer.Handle(),
.offset = offset, .offset = offset,
.size = MaxPageFaults * sizeof(u64), .size = MaxPageFaults * sizeof(u64),
}; };
std::array<vk::BufferMemoryBarrier2, 2> barriers{fault_readback_barrier, download_barrier}; std::array<vk::BufferMemoryBarrier2, 2> barriers{fault_readback_barrier, download_barrier};
vk::DescriptorBufferInfo fault_readback_info{ vk::DescriptorBufferInfo fault_readback_info{
.buffer = fault_readback_buffer.Handle(), .buffer = fault_buffer.Handle(),
.offset = 0, .offset = 0,
.range = FAULT_READBACK_SIZE, .range = FAULT_READBACK_SIZE,
}; };
@ -672,12 +673,45 @@ void BufferCache::CreateFaultBuffers() {
.bufferMemoryBarrierCount = 2, .bufferMemoryBarrierCount = 2,
.pBufferMemoryBarriers = barriers.data(), .pBufferMemoryBarriers = barriers.data(),
}); });
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *fault_parse_pipeline); cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *fault_process_pipeline);
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *fault_parse_pipeline_layout, 0, writes); cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *fault_process_pipeline_layout, 0, writes);
cmdbuf.pushConstants(*fault_parse_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(u32), &CACHING_PAGEBITS); cmdbuf.pushConstants(*fault_process_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(u32), &CACHING_PAGEBITS);
constexpr u32 num_threads = CACHING_NUMPAGES / 32; // 1 bit per page, 32 pages per workgroup constexpr u32 num_threads = CACHING_NUMPAGES / 32; // 1 bit per page, 32 pages per workgroup
constexpr u32 num_workgroups = Common::DivCeil(num_threads, 64u); constexpr u32 num_workgroups = Common::DivCeil(num_threads, 64u);
cmdbuf.dispatch(num_workgroups, 1, 1); cmdbuf.dispatch(num_workgroups, 1, 1);
// Reset fault buffer
const vk::BufferMemoryBarrier2 reset_pre_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.srcAccessMask = vk::AccessFlagBits2::eShaderRead,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = fault_buffer.Handle(),
.offset = 0,
.size = FAULT_READBACK_SIZE,
};
const vk::BufferMemoryBarrier2 reset_post_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.buffer = fault_buffer.Handle(),
.offset = 0,
.size = FAULT_READBACK_SIZE,
};
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &reset_pre_barrier,
});
cmdbuf.fillBuffer(fault_buffer.buffer, 0, FAULT_READBACK_SIZE, 0);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &reset_post_barrier,
});
// Defer creating buffers
scheduler.DeferOperation([this, mapped]() { scheduler.DeferOperation([this, mapped]() {
// Create the fault buffers batched // Create the fault buffers batched
boost::icl::interval_set<VAddr> fault_ranges; boost::icl::interval_set<VAddr> fault_ranges;
@ -702,41 +736,6 @@ void BufferCache::CreateFaultBuffers() {
}); });
} }
void BufferCache::ResetFaultReadbackBuffer() {
const vk::BufferMemoryBarrier2 pre_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = fault_readback_buffer.Handle(),
.offset = 0,
.size = FAULT_READBACK_SIZE,
};
const vk::BufferMemoryBarrier2 post_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.buffer = fault_readback_buffer.Handle(),
.offset = 0,
.size = FAULT_READBACK_SIZE,
};
// Reset the fault readback buffer
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &pre_barrier,
});
cmdbuf.fillBuffer(fault_readback_buffer.buffer, 0, FAULT_READBACK_SIZE, 0);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &post_barrier,
});
}
void BufferCache::Register(BufferId buffer_id) { void BufferCache::Register(BufferId buffer_id) {
ChangeRegister<true>(buffer_id); ChangeRegister<true>(buffer_id);
} }

View File

@ -84,7 +84,7 @@ public:
/// Retrieves the fault readback buffer. /// Retrieves the fault readback buffer.
[[nodiscard]] Buffer* GetFaultReadbackBuffer() noexcept { [[nodiscard]] Buffer* GetFaultReadbackBuffer() noexcept {
return &fault_readback_buffer; return &fault_buffer;
} }
/// Retrieves the buffer with the specified id. /// Retrieves the buffer with the specified id.
@ -134,11 +134,8 @@ public:
/// Covers all queued regions. /// Covers all queued regions.
void CoverQueuedRegions(); void CoverQueuedRegions();
/// Creates buffers for "faulted" shader accesses to host memory. /// Processes the fault buffer.
void CreateFaultBuffers(); void ProcessFaultBuffer();
/// Reset the fault readback buffer.
void ResetFaultReadbackBuffer();
/// Synchronizes all buffers in the specified range. /// Synchronizes all buffers in the specified range.
void SynchronizeRange(VAddr device_addr, u64 size); void SynchronizeRange(VAddr device_addr, u64 size);
@ -202,7 +199,7 @@ private:
StreamBuffer download_buffer; StreamBuffer download_buffer;
Buffer gds_buffer; Buffer gds_buffer;
Buffer bda_pagetable_buffer; Buffer bda_pagetable_buffer;
Buffer fault_readback_buffer; Buffer fault_buffer;
boost::icl::interval_set<VAddr> queued_converages; boost::icl::interval_set<VAddr> queued_converages;
boost::icl::interval_set<u64> convered_regions; boost::icl::interval_set<u64> convered_regions;
std::shared_mutex covered_regions_mutex; std::shared_mutex covered_regions_mutex;
@ -211,9 +208,9 @@ private:
RangeSet gpu_modified_ranges; RangeSet gpu_modified_ranges;
MemoryTracker memory_tracker; MemoryTracker memory_tracker;
PageTable page_table; PageTable page_table;
vk::UniqueDescriptorSetLayout fault_parse_desc_layout; vk::UniqueDescriptorSetLayout fault_process_desc_layout;
vk::UniquePipeline fault_parse_pipeline; vk::UniquePipeline fault_process_pipeline;
vk::UniquePipelineLayout fault_parse_pipeline_layout; vk::UniquePipelineLayout fault_process_pipeline_layout;
}; };
} // namespace VideoCore } // namespace VideoCore

View File

@ -11,7 +11,7 @@ set(SHADER_FILES
detilers/micro_32bpp.comp detilers/micro_32bpp.comp
detilers/micro_64bpp.comp detilers/micro_64bpp.comp
detilers/micro_8bpp.comp detilers/micro_8bpp.comp
fault_buffer_parser.comp fault_buffer_process.comp
fs_tri.vert fs_tri.vert
fsr.comp fsr.comp
post_process.frag post_process.frag

View File

@ -439,6 +439,13 @@ void Rasterizer::Finish() {
scheduler.Finish(); scheduler.Finish();
} }
void Rasterizer::ProcessFaults() {
if (fault_process_pending) {
fault_process_pending = false;
buffer_cache.ProcessFaultBuffer();
}
}
bool Rasterizer::BindResources(const Pipeline* pipeline) { bool Rasterizer::BindResources(const Pipeline* pipeline) {
if (IsComputeMetaClear(pipeline)) { if (IsComputeMetaClear(pipeline)) {
return false; return false;
@ -460,12 +467,13 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
BindBuffers(*stage, binding, push_data); BindBuffers(*stage, binding, push_data);
BindTextures(*stage, binding); BindTextures(*stage, binding);
dma_enabled |= stage->dma_types != Shader::IR::Type::Void; fault_process_pending |= stage->dma_types != Shader::IR::Type::Void;
} }
pipeline->BindResources(set_writes, buffer_barriers, push_data); pipeline->BindResources(set_writes, buffer_barriers, push_data);
if (dma_enabled) { if (fault_process_pending) {
// We only use fault buffer for DMA right now.
// First, import any queued host memory, then sync every mapped // First, import any queued host memory, then sync every mapped
// region that is cached on GPU memory. // region that is cached on GPU memory.
buffer_cache.CoverQueuedRegions(); buffer_cache.CoverQueuedRegions();
@ -475,7 +483,6 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
buffer_cache.SynchronizeRange(range.lower(), range.upper() - range.lower()); buffer_cache.SynchronizeRange(range.lower(), range.upper() - range.lower());
} }
} }
buffer_cache.ResetFaultReadbackBuffer();
buffer_cache.MemoryBarrier(); buffer_cache.MemoryBarrier();
} }

View File

@ -65,6 +65,7 @@ public:
void CpSync(); void CpSync();
u64 Flush(); u64 Flush();
void Finish(); void Finish();
void ProcessFaults();
PipelineCache& GetPipelineCache() { PipelineCache& GetPipelineCache() {
return pipeline_cache; return pipeline_cache;
@ -95,13 +96,6 @@ private:
texture_cache.GetImage(image_id).binding.Reset(); texture_cache.GetImage(image_id).binding.Reset();
} }
bound_images.clear(); bound_images.clear();
if (dma_enabled) {
dma_enabled = false;
// If a shader accesses a buffer that is not cached, we need to
// cache it.
buffer_cache.CreateFaultBuffers();
}
} }
bool IsComputeMetaClear(const Pipeline* pipeline); bool IsComputeMetaClear(const Pipeline* pipeline);
@ -135,7 +129,7 @@ private:
boost::container::static_vector<BufferBindingInfo, Shader::NumBuffers> buffer_bindings; boost::container::static_vector<BufferBindingInfo, Shader::NumBuffers> buffer_bindings;
using ImageBindingInfo = std::pair<VideoCore::ImageId, VideoCore::TextureCache::TextureDesc>; using ImageBindingInfo = std::pair<VideoCore::ImageId, VideoCore::TextureCache::TextureDesc>;
boost::container::static_vector<ImageBindingInfo, Shader::NumImages> image_bindings; boost::container::static_vector<ImageBindingInfo, Shader::NumImages> image_bindings;
bool dma_enabled{false}; bool fault_process_pending{false};
}; };
} // namespace Vulkan } // namespace Vulkan