buffer_cache: Remove useless fillBuffer, remove some barriers on fault buffer process

This commit is contained in:
IndecisiveTurtle 2025-07-17 16:02:22 +03:00
parent 6cbb304afc
commit 34ce2723f8
2 changed files with 18 additions and 48 deletions

View File

@ -657,10 +657,6 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
} }
WriteDataBuffer(bda_pagetable_buffer, start_page * sizeof(vk::DeviceAddress), bda_addrs.data(), WriteDataBuffer(bda_pagetable_buffer, start_page * sizeof(vk::DeviceAddress), bda_addrs.data(),
bda_addrs.size() * sizeof(vk::DeviceAddress)); bda_addrs.size() * sizeof(vk::DeviceAddress));
const size_t size_bytes = new_buffer.SizeBytes();
const auto cmdbuf = scheduler.CommandBuffer();
scheduler.EndRendering();
cmdbuf.fillBuffer(new_buffer.buffer, 0, size_bytes, 0);
for (const BufferId overlap_id : overlap.ids) { for (const BufferId overlap_id : overlap.ids) {
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
} }
@ -670,8 +666,10 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
void BufferCache::ProcessFaultBuffer() { void BufferCache::ProcessFaultBuffer() {
// Run fault processing shader // Run fault processing shader
const auto [mapped, offset] = download_buffer.Map(MaxPageFaults * sizeof(u64)); static constexpr size_t StagingSize = MaxPageFaults * sizeof(u64);
vk::BufferMemoryBarrier2 fault_buffer_barrier{ const auto [mapped, offset] = download_buffer.Map(StagingSize);
std::memset(mapped, 0, StagingSize);
const vk::BufferMemoryBarrier2 fault_buffer_pre_barrier{
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eShaderWrite, .srcAccessMask = vk::AccessFlagBits2::eShaderWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader, .dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
@ -680,27 +678,17 @@ void BufferCache::ProcessFaultBuffer() {
.offset = 0, .offset = 0,
.size = FAULT_BUFFER_SIZE, .size = FAULT_BUFFER_SIZE,
}; };
vk::BufferMemoryBarrier2 download_barrier{ const vk::DescriptorBufferInfo fault_buffer_info{
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.dstAccessMask = vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eShaderWrite,
.buffer = download_buffer.Handle(),
.offset = offset,
.size = MaxPageFaults * sizeof(u64),
};
std::array<vk::BufferMemoryBarrier2, 2> barriers{fault_buffer_barrier, download_barrier};
vk::DescriptorBufferInfo fault_buffer_info{
.buffer = fault_buffer.Handle(), .buffer = fault_buffer.Handle(),
.offset = 0, .offset = 0,
.range = FAULT_BUFFER_SIZE, .range = FAULT_BUFFER_SIZE,
}; };
vk::DescriptorBufferInfo download_info{ const vk::DescriptorBufferInfo download_info{
.buffer = download_buffer.Handle(), .buffer = download_buffer.Handle(),
.offset = offset, .offset = offset,
.range = MaxPageFaults * sizeof(u64), .range = StagingSize,
}; };
boost::container::small_vector<vk::WriteDescriptorSet, 2> writes{ const std::array<vk::WriteDescriptorSet, 2> writes{{
{ {
.dstSet = VK_NULL_HANDLE, .dstSet = VK_NULL_HANDLE,
.dstBinding = 0, .dstBinding = 0,
@ -717,15 +705,14 @@ void BufferCache::ProcessFaultBuffer() {
.descriptorType = vk::DescriptorType::eStorageBuffer, .descriptorType = vk::DescriptorType::eStorageBuffer,
.pBufferInfo = &download_info, .pBufferInfo = &download_info,
}, },
}; }};
download_buffer.Commit(); download_buffer.Commit();
scheduler.EndRendering(); scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer(); const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.fillBuffer(download_buffer.Handle(), offset, MaxPageFaults * sizeof(u64), 0);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{ cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion, .dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 2, .bufferMemoryBarrierCount = 1U,
.pBufferMemoryBarriers = barriers.data(), .pBufferMemoryBarriers = &fault_buffer_pre_barrier,
}); });
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *fault_process_pipeline); cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *fault_process_pipeline);
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *fault_process_pipeline_layout, 0, cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *fault_process_pipeline_layout, 0,
@ -735,34 +722,19 @@ void BufferCache::ProcessFaultBuffer() {
cmdbuf.dispatch(num_workgroups, 1, 1); cmdbuf.dispatch(num_workgroups, 1, 1);
// Reset fault buffer // Reset fault buffer
const vk::BufferMemoryBarrier2 reset_pre_barrier = { const vk::BufferMemoryBarrier2 fault_buffer_post_barrier{
.srcStageMask = vk::PipelineStageFlagBits2::eComputeShader, .srcStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.srcAccessMask = vk::AccessFlagBits2::eShaderRead, .srcAccessMask = vk::AccessFlagBits2::eShaderWrite | vk::AccessFlagBits2::eShaderRead,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer, .dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite, .dstAccessMask = vk::AccessFlagBits2::eShaderWrite,
.buffer = fault_buffer.Handle(),
.offset = 0,
.size = FAULT_BUFFER_SIZE,
};
const vk::BufferMemoryBarrier2 reset_post_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.buffer = fault_buffer.Handle(), .buffer = fault_buffer.Handle(),
.offset = 0, .offset = 0,
.size = FAULT_BUFFER_SIZE, .size = FAULT_BUFFER_SIZE,
}; };
cmdbuf.pipelineBarrier2(vk::DependencyInfo{ cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion, .dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1, .bufferMemoryBarrierCount = 1U,
.pBufferMemoryBarriers = &reset_pre_barrier, .pBufferMemoryBarriers = &fault_buffer_post_barrier,
});
cmdbuf.fillBuffer(fault_buffer.buffer, 0, FAULT_BUFFER_SIZE, 0);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &reset_post_barrier,
}); });
// Defer creating buffers // Defer creating buffers

View File

@ -24,9 +24,6 @@ layout(constant_id = 0) const uint CACHING_PAGEBITS = 0;
void main() { void main() {
uint id = gl_GlobalInvocationID.x; uint id = gl_GlobalInvocationID.x;
uint word = fault_buffer[id]; uint word = fault_buffer[id];
if (word == 0u) {
return;
}
// 1 page per bit // 1 page per bit
uint base_bit = id * 32u; uint base_bit = id * 32u;
while (word != 0u) { while (word != 0u) {
@ -39,4 +36,5 @@ void main() {
download_buffer[store_index] = uint64_t(page) << CACHING_PAGEBITS; download_buffer[store_index] = uint64_t(page) << CACHING_PAGEBITS;
} }
} }
fault_buffer[id] = 0u;
} }