diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 967b952c6..6e7c335e8 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -133,6 +133,7 @@ void Liverpool::Process(std::stop_token stoken) { VideoCore::EndCapture(); if (rasterizer) { + rasterizer->ProcessFaults(); rasterizer->Flush(); } submit_done = false; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 06d384c6a..3e906745b 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -7,7 +7,7 @@ #include "common/types.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/buffer_cache/buffer_cache.h" -#include "video_core/host_shaders/fault_buffer_parser_comp.h" +#include "video_core/host_shaders/fault_buffer_process_comp.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -33,14 +33,14 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize}, bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, BDA_PAGETABLE_SIZE}, - fault_readback_buffer(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, + fault_buffer(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, FAULT_READBACK_SIZE), memory_tracker{&tracker} { Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer"); Vulkan::SetObjectName(instance.GetDevice(), bda_pagetable_buffer.Handle(), "BDA Page Table Buffer"); - Vulkan::SetObjectName(instance.GetDevice(), fault_readback_buffer.Handle(), - "Fault Readback Buffer"); + Vulkan::SetObjectName(instance.GetDevice(), fault_buffer.Handle(), + "Fault Buffer"); // Ensure the first slot is used for the null buffer const auto null_id = @@ -75,10 +75,10 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s ASSERT_MSG(desc_layout_result == vk::Result::eSuccess, "Failed to create descriptor set layout: {}", vk::to_string(desc_layout_result)); - fault_parse_desc_layout = std::move(desc_layout); + fault_process_desc_layout = std::move(desc_layout); const auto& module = Vulkan::Compile( - HostShaders::FAULT_BUFFER_PARSER_COMP, vk::ShaderStageFlagBits::eCompute, + HostShaders::FAULT_BUFFER_PROCESS_COMP, vk::ShaderStageFlagBits::eCompute, instance.GetDevice()); Vulkan::SetObjectName(instance.GetDevice(), module, "Fault Buffer Parser"); @@ -96,7 +96,7 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s const vk::PipelineLayoutCreateInfo layout_info = { .setLayoutCount = 1U, - .pSetLayouts = &(*fault_parse_desc_layout), + .pSetLayouts = &(*fault_process_desc_layout), .pushConstantRangeCount = 1, .pPushConstantRanges = &push_constants, }; @@ -105,19 +105,19 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s ASSERT_MSG(layout_result == vk::Result::eSuccess, "Failed to create pipeline layout: {}", vk::to_string(layout_result)); - fault_parse_pipeline_layout = std::move(layout); + fault_process_pipeline_layout = std::move(layout); const vk::ComputePipelineCreateInfo pipeline_info = { .stage = shader_ci, - .layout = *fault_parse_pipeline_layout, + .layout = *fault_process_pipeline_layout, }; auto [pipeline_result, pipeline] = instance.GetDevice().createComputePipelineUnique({}, pipeline_info); ASSERT_MSG(pipeline_result == vk::Result::eSuccess, "Failed to create compute pipeline: {}", vk::to_string(pipeline_result)); - fault_parse_pipeline = std::move(pipeline); - Vulkan::SetObjectName(instance.GetDevice(), *fault_parse_pipeline, "Fault Buffer Parser Pipeline"); + fault_process_pipeline = std::move(pipeline); + Vulkan::SetObjectName(instance.GetDevice(), *fault_process_pipeline, "Fault Buffer Parser Pipeline"); instance.GetDevice().destroyShaderModule(module); } @@ -614,14 +614,15 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) { return new_buffer_id; } -void BufferCache::CreateFaultBuffers() { +void BufferCache::ProcessFaultBuffer() { + // Run fault processing shader const auto [mapped, offset] = download_buffer.Map(MaxPageFaults * sizeof(u64)); vk::BufferMemoryBarrier2 fault_readback_barrier{ .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, .srcAccessMask = vk::AccessFlagBits2::eShaderWrite, .dstStageMask = vk::PipelineStageFlagBits2::eComputeShader, .dstAccessMask = vk::AccessFlagBits2::eShaderRead, - .buffer = fault_readback_buffer.Handle(), + .buffer = fault_buffer.Handle(), .offset = 0, .size = FAULT_READBACK_SIZE, }; @@ -629,14 +630,14 @@ void BufferCache::CreateFaultBuffers() { .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, .dstStageMask = vk::PipelineStageFlagBits2::eComputeShader, - .dstAccessMask = vk::AccessFlagBits2::eShaderRead, + .dstAccessMask = vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eShaderWrite, .buffer = download_buffer.Handle(), .offset = offset, .size = MaxPageFaults * sizeof(u64), }; std::array barriers{fault_readback_barrier, download_barrier}; vk::DescriptorBufferInfo fault_readback_info{ - .buffer = fault_readback_buffer.Handle(), + .buffer = fault_buffer.Handle(), .offset = 0, .range = FAULT_READBACK_SIZE, }; @@ -672,12 +673,45 @@ void BufferCache::CreateFaultBuffers() { .bufferMemoryBarrierCount = 2, .pBufferMemoryBarriers = barriers.data(), }); - cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *fault_parse_pipeline); - cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *fault_parse_pipeline_layout, 0, writes); - cmdbuf.pushConstants(*fault_parse_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(u32), &CACHING_PAGEBITS); + cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *fault_process_pipeline); + cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *fault_process_pipeline_layout, 0, writes); + cmdbuf.pushConstants(*fault_process_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(u32), &CACHING_PAGEBITS); constexpr u32 num_threads = CACHING_NUMPAGES / 32; // 1 bit per page, 32 pages per workgroup constexpr u32 num_workgroups = Common::DivCeil(num_threads, 64u); cmdbuf.dispatch(num_workgroups, 1, 1); + + // Reset fault buffer + const vk::BufferMemoryBarrier2 reset_pre_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eComputeShader, + .srcAccessMask = vk::AccessFlagBits2::eShaderRead, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = fault_buffer.Handle(), + .offset = 0, + .size = FAULT_READBACK_SIZE, + }; + const vk::BufferMemoryBarrier2 reset_post_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, + .buffer = fault_buffer.Handle(), + .offset = 0, + .size = FAULT_READBACK_SIZE, + }; + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &reset_pre_barrier, + }); + cmdbuf.fillBuffer(fault_buffer.buffer, 0, FAULT_READBACK_SIZE, 0); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &reset_post_barrier, + }); + + // Defer creating buffers scheduler.DeferOperation([this, mapped]() { // Create the fault buffers batched boost::icl::interval_set fault_ranges; @@ -702,41 +736,6 @@ void BufferCache::CreateFaultBuffers() { }); } -void BufferCache::ResetFaultReadbackBuffer() { - const vk::BufferMemoryBarrier2 pre_barrier = { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, - .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, - .buffer = fault_readback_buffer.Handle(), - .offset = 0, - .size = FAULT_READBACK_SIZE, - }; - const vk::BufferMemoryBarrier2 post_barrier = { - .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, - .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, - .buffer = fault_readback_buffer.Handle(), - .offset = 0, - .size = FAULT_READBACK_SIZE, - }; - // Reset the fault readback buffer - scheduler.EndRendering(); - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &pre_barrier, - }); - cmdbuf.fillBuffer(fault_readback_buffer.buffer, 0, FAULT_READBACK_SIZE, 0); - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &post_barrier, - }); -} - void BufferCache::Register(BufferId buffer_id) { ChangeRegister(buffer_id); } diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 038128745..3daa27ef2 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -84,7 +84,7 @@ public: /// Retrieves the fault readback buffer. [[nodiscard]] Buffer* GetFaultReadbackBuffer() noexcept { - return &fault_readback_buffer; + return &fault_buffer; } /// Retrieves the buffer with the specified id. @@ -134,11 +134,8 @@ public: /// Covers all queued regions. void CoverQueuedRegions(); - /// Creates buffers for "faulted" shader accesses to host memory. - void CreateFaultBuffers(); - - /// Reset the fault readback buffer. - void ResetFaultReadbackBuffer(); + /// Processes the fault buffer. + void ProcessFaultBuffer(); /// Synchronizes all buffers in the specified range. void SynchronizeRange(VAddr device_addr, u64 size); @@ -202,7 +199,7 @@ private: StreamBuffer download_buffer; Buffer gds_buffer; Buffer bda_pagetable_buffer; - Buffer fault_readback_buffer; + Buffer fault_buffer; boost::icl::interval_set queued_converages; boost::icl::interval_set convered_regions; std::shared_mutex covered_regions_mutex; @@ -211,9 +208,9 @@ private: RangeSet gpu_modified_ranges; MemoryTracker memory_tracker; PageTable page_table; - vk::UniqueDescriptorSetLayout fault_parse_desc_layout; - vk::UniquePipeline fault_parse_pipeline; - vk::UniquePipelineLayout fault_parse_pipeline_layout; + vk::UniqueDescriptorSetLayout fault_process_desc_layout; + vk::UniquePipeline fault_process_pipeline; + vk::UniquePipelineLayout fault_process_pipeline_layout; }; } // namespace VideoCore diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index c439c860c..d52afe738 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -11,7 +11,7 @@ set(SHADER_FILES detilers/micro_32bpp.comp detilers/micro_64bpp.comp detilers/micro_8bpp.comp - fault_buffer_parser.comp + fault_buffer_process.comp fs_tri.vert fsr.comp post_process.frag diff --git a/src/video_core/host_shaders/fault_buffer_parser.comp b/src/video_core/host_shaders/fault_buffer_process.comp similarity index 100% rename from src/video_core/host_shaders/fault_buffer_parser.comp rename to src/video_core/host_shaders/fault_buffer_process.comp diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 3d79f4669..69f7f6f74 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -439,6 +439,13 @@ void Rasterizer::Finish() { scheduler.Finish(); } +void Rasterizer::ProcessFaults() { + if (fault_process_pending) { + fault_process_pending = false; + buffer_cache.ProcessFaultBuffer(); + } +} + bool Rasterizer::BindResources(const Pipeline* pipeline) { if (IsComputeMetaClear(pipeline)) { return false; @@ -460,12 +467,13 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { BindBuffers(*stage, binding, push_data); BindTextures(*stage, binding); - dma_enabled |= stage->dma_types != Shader::IR::Type::Void; + fault_process_pending |= stage->dma_types != Shader::IR::Type::Void; } pipeline->BindResources(set_writes, buffer_barriers, push_data); - if (dma_enabled) { + if (fault_process_pending) { + // We only use fault buffer for DMA right now. // First, import any queued host memory, then sync every mapped // region that is cached on GPU memory. buffer_cache.CoverQueuedRegions(); @@ -475,7 +483,6 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { buffer_cache.SynchronizeRange(range.lower(), range.upper() - range.lower()); } } - buffer_cache.ResetFaultReadbackBuffer(); buffer_cache.MemoryBarrier(); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 91d34ad4b..2e6de69f0 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -65,6 +65,7 @@ public: void CpSync(); u64 Flush(); void Finish(); + void ProcessFaults(); PipelineCache& GetPipelineCache() { return pipeline_cache; @@ -95,13 +96,6 @@ private: texture_cache.GetImage(image_id).binding.Reset(); } bound_images.clear(); - - if (dma_enabled) { - dma_enabled = false; - // If a shader accesses a buffer that is not cached, we need to - // cache it. - buffer_cache.CreateFaultBuffers(); - } } bool IsComputeMetaClear(const Pipeline* pipeline); @@ -135,7 +129,7 @@ private: boost::container::static_vector buffer_bindings; using ImageBindingInfo = std::pair; boost::container::static_vector image_bindings; - bool dma_enabled{false}; + bool fault_process_pending{false}; }; } // namespace Vulkan