diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index a8ffe6ae5..6b12cb2d9 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -1105,30 +1105,30 @@ Id EmitContext::DefineUfloatM5ToFloat32(u32 mantissa_bits, const std::string_vie } Id EmitContext::DefineGetBdaPointer() { - const auto caching_pagebits{ + const Id caching_pagebits{ Constant(U64, static_cast(VideoCore::BufferCache::CACHING_PAGEBITS))}; - const auto caching_pagemask{Constant(U64, VideoCore::BufferCache::CACHING_PAGESIZE - 1)}; + const Id caching_pagemask{Constant(U64, VideoCore::BufferCache::CACHING_PAGESIZE - 1)}; - const auto func_type{TypeFunction(U64, U64)}; - const auto func{OpFunction(U64, spv::FunctionControlMask::MaskNone, func_type)}; - const auto address{OpFunctionParameter(U64)}; + const Id func_type{TypeFunction(U64, U64)}; + const Id func{OpFunction(U64, spv::FunctionControlMask::MaskNone, func_type)}; + const Id address{OpFunctionParameter(U64)}; Name(func, "get_bda_pointer"); AddLabel(); - const auto fault_label{OpLabel()}; - const auto available_label{OpLabel()}; - const auto merge_label{OpLabel()}; + const Id fault_label{OpLabel()}; + const Id available_label{OpLabel()}; + const Id merge_label{OpLabel()}; // Get page BDA - const auto page{OpShiftRightLogical(U64, address, caching_pagebits)}; - const auto page32{OpUConvert(U32[1], page)}; const auto& bda_buffer{buffers[bda_pagetable_index]}; const auto [bda_buffer_id, bda_pointer_type] = bda_buffer.Alias(PointerType::U64); - const auto bda_ptr{OpAccessChain(bda_pointer_type, bda_buffer_id, u32_zero_value, page32)}; - const auto bda{OpLoad(U64, bda_ptr)}; + const Id page{OpShiftRightLogical(U64, address, caching_pagebits)}; + const Id page32{OpUConvert(U32[1], page)}; + const Id bda_ptr{OpAccessChain(bda_pointer_type, bda_buffer_id, u32_zero_value, page32)}; + const Id bda{OpLoad(U64, bda_ptr)}; // Check if page is GPU cached - const auto is_fault{OpIEqual(U1[1], bda, u64_zero_value)}; + const Id is_fault{OpIEqual(U1[1], bda, u64_zero_value)}; OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone); OpBranchConditional(is_fault, fault_label, available_label); @@ -1136,28 +1136,26 @@ Id EmitContext::DefineGetBdaPointer() { AddLabel(fault_label); const auto& fault_buffer{buffers[fault_buffer_index]}; const auto [fault_buffer_id, fault_pointer_type] = fault_buffer.Alias(PointerType::U32); - const auto page_div32{OpShiftRightLogical(U32[1], page32, ConstU32(5U))}; - const auto page_mod32{OpBitwiseAnd(U32[1], page32, ConstU32(31U))}; - const auto page_mask{OpShiftLeftLogical(U32[1], u32_one_value, page_mod32)}; - const auto fault_ptr{ + const Id page_div32{OpShiftRightLogical(U32[1], page32, ConstU32(5U))}; + const Id page_mod32{OpBitwiseAnd(U32[1], page32, ConstU32(31U))}; + const Id page_mask{OpShiftLeftLogical(U32[1], u32_one_value, page_mod32)}; + const Id fault_ptr{ OpAccessChain(fault_pointer_type, fault_buffer_id, u32_zero_value, page_div32)}; - const auto fault_value{OpLoad(U32[1], fault_ptr)}; - const auto fault_value_masked{OpBitwiseOr(U32[1], fault_value, page_mask)}; - OpStore(fault_ptr, fault_value_masked); + OpAtomicOr(U32[1], fault_ptr, ConstU32(u32(spv::Scope::Device)), u32_zero_value, page_mask); // Return null pointer - const auto fallback_result{u64_zero_value}; + const Id fallback_result{u64_zero_value}; OpBranch(merge_label); // Value is available, compute address AddLabel(available_label); - const auto offset_in_bda{OpBitwiseAnd(U64, address, caching_pagemask)}; - const auto addr{OpIAdd(U64, bda, offset_in_bda)}; + const Id offset_in_bda{OpBitwiseAnd(U64, address, caching_pagemask)}; + const Id addr{OpIAdd(U64, bda, offset_in_bda)}; OpBranch(merge_label); // Merge AddLabel(merge_label); - const auto result{OpPhi(U64, addr, available_label, fallback_result, fault_label)}; + const Id result{OpPhi(U64, addr, available_label, fallback_result, fault_label)}; OpReturnValue(result); OpFunctionEnd(); return func; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index c5e5d18f8..42c575610 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -657,10 +657,6 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) { } WriteDataBuffer(bda_pagetable_buffer, start_page * sizeof(vk::DeviceAddress), bda_addrs.data(), bda_addrs.size() * sizeof(vk::DeviceAddress)); - const size_t size_bytes = new_buffer.SizeBytes(); - const auto cmdbuf = scheduler.CommandBuffer(); - scheduler.EndRendering(); - cmdbuf.fillBuffer(new_buffer.buffer, 0, size_bytes, 0); for (const BufferId overlap_id : overlap.ids) { JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); } @@ -670,8 +666,10 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) { void BufferCache::ProcessFaultBuffer() { // Run fault processing shader - const auto [mapped, offset] = download_buffer.Map(MaxPageFaults * sizeof(u64)); - vk::BufferMemoryBarrier2 fault_buffer_barrier{ + static constexpr size_t StagingSize = MaxPageFaults * sizeof(u64); + const auto [mapped, offset] = download_buffer.Map(StagingSize); + std::memset(mapped, 0, StagingSize); + const vk::BufferMemoryBarrier2 fault_buffer_pre_barrier{ .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, .srcAccessMask = vk::AccessFlagBits2::eShaderWrite, .dstStageMask = vk::PipelineStageFlagBits2::eComputeShader, @@ -680,27 +678,17 @@ void BufferCache::ProcessFaultBuffer() { .offset = 0, .size = FAULT_BUFFER_SIZE, }; - vk::BufferMemoryBarrier2 download_barrier{ - .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, - .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eComputeShader, - .dstAccessMask = vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eShaderWrite, - .buffer = download_buffer.Handle(), - .offset = offset, - .size = MaxPageFaults * sizeof(u64), - }; - std::array barriers{fault_buffer_barrier, download_barrier}; - vk::DescriptorBufferInfo fault_buffer_info{ + const vk::DescriptorBufferInfo fault_buffer_info{ .buffer = fault_buffer.Handle(), .offset = 0, .range = FAULT_BUFFER_SIZE, }; - vk::DescriptorBufferInfo download_info{ + const vk::DescriptorBufferInfo download_info{ .buffer = download_buffer.Handle(), .offset = offset, - .range = MaxPageFaults * sizeof(u64), + .range = StagingSize, }; - boost::container::small_vector writes{ + const std::array writes{{ { .dstSet = VK_NULL_HANDLE, .dstBinding = 0, @@ -717,15 +705,14 @@ void BufferCache::ProcessFaultBuffer() { .descriptorType = vk::DescriptorType::eStorageBuffer, .pBufferInfo = &download_info, }, - }; + }}; download_buffer.Commit(); scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.fillBuffer(download_buffer.Handle(), offset, MaxPageFaults * sizeof(u64), 0); cmdbuf.pipelineBarrier2(vk::DependencyInfo{ .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 2, - .pBufferMemoryBarriers = barriers.data(), + .bufferMemoryBarrierCount = 1U, + .pBufferMemoryBarriers = &fault_buffer_pre_barrier, }); cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *fault_process_pipeline); cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *fault_process_pipeline_layout, 0, @@ -735,34 +722,19 @@ void BufferCache::ProcessFaultBuffer() { cmdbuf.dispatch(num_workgroups, 1, 1); // Reset fault buffer - const vk::BufferMemoryBarrier2 reset_pre_barrier = { + const vk::BufferMemoryBarrier2 fault_buffer_post_barrier{ .srcStageMask = vk::PipelineStageFlagBits2::eComputeShader, - .srcAccessMask = vk::AccessFlagBits2::eShaderRead, - .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, - .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, - .buffer = fault_buffer.Handle(), - .offset = 0, - .size = FAULT_BUFFER_SIZE, - }; - const vk::BufferMemoryBarrier2 reset_post_barrier = { - .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, - .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, + .srcAccessMask = vk::AccessFlagBits2::eShaderWrite | vk::AccessFlagBits2::eShaderRead, + .dstStageMask = vk::PipelineStageFlagBits2::eComputeShader, + .dstAccessMask = vk::AccessFlagBits2::eShaderWrite, .buffer = fault_buffer.Handle(), .offset = 0, .size = FAULT_BUFFER_SIZE, }; cmdbuf.pipelineBarrier2(vk::DependencyInfo{ .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &reset_pre_barrier, - }); - cmdbuf.fillBuffer(fault_buffer.buffer, 0, FAULT_BUFFER_SIZE, 0); - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &reset_post_barrier, + .bufferMemoryBarrierCount = 1U, + .pBufferMemoryBarriers = &fault_buffer_post_barrier, }); // Defer creating buffers @@ -1036,25 +1008,6 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) { }); } -void BufferCache::MemoryBarrier() { - // Vulkan doesn't know which buffer we access in a shader if we use - // BufferDeviceAddress. We need a full memory barrier. - // For now, we only read memory using BDA. If we want to write to it, - // we might need to change this. - scheduler.EndRendering(); - const auto cmdbuf = scheduler.CommandBuffer(); - vk::MemoryBarrier2 barrier = { - .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, - .srcAccessMask = vk::AccessFlagBits2::eMemoryWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, - }; - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .memoryBarrierCount = 1, - .pMemoryBarriers = &barrier, - }); -} - void BufferCache::InlineDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes) { scheduler.EndRendering(); diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b509ce2d0..b552d0b1f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -159,9 +159,6 @@ public: /// Synchronizes all buffers neede for DMA. void SynchronizeDmaBuffers(); - /// Record memory barrier. Used for buffers when accessed via BDA. - void MemoryBarrier(); - private: template void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) { diff --git a/src/video_core/host_shaders/fault_buffer_process.comp b/src/video_core/host_shaders/fault_buffer_process.comp index a712cf441..173896329 100644 --- a/src/video_core/host_shaders/fault_buffer_process.comp +++ b/src/video_core/host_shaders/fault_buffer_process.comp @@ -24,9 +24,6 @@ layout(constant_id = 0) const uint CACHING_PAGEBITS = 0; void main() { uint id = gl_GlobalInvocationID.x; uint word = fault_buffer[id]; - if (word == 0u) { - return; - } // 1 page per bit uint base_bit = id * 32u; while (word != 0u) { @@ -39,4 +36,5 @@ void main() { download_buffer[store_index] = uint64_t(page) << CACHING_PAGEBITS; } } + fault_buffer[id] = 0u; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c3e221739..a2f8541f7 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -488,7 +488,6 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { range.upper() - range.lower()); } } - buffer_cache.MemoryBarrier(); } fault_process_pending |= uses_dma;