This commit is contained in:
TheTurtle 2025-07-20 19:56:46 +00:00 committed by GitHub
commit 17c3902fcf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 40 additions and 95 deletions

View File

@ -1105,30 +1105,30 @@ Id EmitContext::DefineUfloatM5ToFloat32(u32 mantissa_bits, const std::string_vie
}
Id EmitContext::DefineGetBdaPointer() {
const auto caching_pagebits{
const Id caching_pagebits{
Constant(U64, static_cast<u64>(VideoCore::BufferCache::CACHING_PAGEBITS))};
const auto caching_pagemask{Constant(U64, VideoCore::BufferCache::CACHING_PAGESIZE - 1)};
const Id caching_pagemask{Constant(U64, VideoCore::BufferCache::CACHING_PAGESIZE - 1)};
const auto func_type{TypeFunction(U64, U64)};
const auto func{OpFunction(U64, spv::FunctionControlMask::MaskNone, func_type)};
const auto address{OpFunctionParameter(U64)};
const Id func_type{TypeFunction(U64, U64)};
const Id func{OpFunction(U64, spv::FunctionControlMask::MaskNone, func_type)};
const Id address{OpFunctionParameter(U64)};
Name(func, "get_bda_pointer");
AddLabel();
const auto fault_label{OpLabel()};
const auto available_label{OpLabel()};
const auto merge_label{OpLabel()};
const Id fault_label{OpLabel()};
const Id available_label{OpLabel()};
const Id merge_label{OpLabel()};
// Get page BDA
const auto page{OpShiftRightLogical(U64, address, caching_pagebits)};
const auto page32{OpUConvert(U32[1], page)};
const auto& bda_buffer{buffers[bda_pagetable_index]};
const auto [bda_buffer_id, bda_pointer_type] = bda_buffer.Alias(PointerType::U64);
const auto bda_ptr{OpAccessChain(bda_pointer_type, bda_buffer_id, u32_zero_value, page32)};
const auto bda{OpLoad(U64, bda_ptr)};
const Id page{OpShiftRightLogical(U64, address, caching_pagebits)};
const Id page32{OpUConvert(U32[1], page)};
const Id bda_ptr{OpAccessChain(bda_pointer_type, bda_buffer_id, u32_zero_value, page32)};
const Id bda{OpLoad(U64, bda_ptr)};
// Check if page is GPU cached
const auto is_fault{OpIEqual(U1[1], bda, u64_zero_value)};
const Id is_fault{OpIEqual(U1[1], bda, u64_zero_value)};
OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
OpBranchConditional(is_fault, fault_label, available_label);
@ -1136,28 +1136,26 @@ Id EmitContext::DefineGetBdaPointer() {
AddLabel(fault_label);
const auto& fault_buffer{buffers[fault_buffer_index]};
const auto [fault_buffer_id, fault_pointer_type] = fault_buffer.Alias(PointerType::U32);
const auto page_div32{OpShiftRightLogical(U32[1], page32, ConstU32(5U))};
const auto page_mod32{OpBitwiseAnd(U32[1], page32, ConstU32(31U))};
const auto page_mask{OpShiftLeftLogical(U32[1], u32_one_value, page_mod32)};
const auto fault_ptr{
const Id page_div32{OpShiftRightLogical(U32[1], page32, ConstU32(5U))};
const Id page_mod32{OpBitwiseAnd(U32[1], page32, ConstU32(31U))};
const Id page_mask{OpShiftLeftLogical(U32[1], u32_one_value, page_mod32)};
const Id fault_ptr{
OpAccessChain(fault_pointer_type, fault_buffer_id, u32_zero_value, page_div32)};
const auto fault_value{OpLoad(U32[1], fault_ptr)};
const auto fault_value_masked{OpBitwiseOr(U32[1], fault_value, page_mask)};
OpStore(fault_ptr, fault_value_masked);
OpAtomicOr(U32[1], fault_ptr, ConstU32(u32(spv::Scope::Device)), u32_zero_value, page_mask);
// Return null pointer
const auto fallback_result{u64_zero_value};
const Id fallback_result{u64_zero_value};
OpBranch(merge_label);
// Value is available, compute address
AddLabel(available_label);
const auto offset_in_bda{OpBitwiseAnd(U64, address, caching_pagemask)};
const auto addr{OpIAdd(U64, bda, offset_in_bda)};
const Id offset_in_bda{OpBitwiseAnd(U64, address, caching_pagemask)};
const Id addr{OpIAdd(U64, bda, offset_in_bda)};
OpBranch(merge_label);
// Merge
AddLabel(merge_label);
const auto result{OpPhi(U64, addr, available_label, fallback_result, fault_label)};
const Id result{OpPhi(U64, addr, available_label, fallback_result, fault_label)};
OpReturnValue(result);
OpFunctionEnd();
return func;

View File

@ -657,10 +657,6 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
}
WriteDataBuffer(bda_pagetable_buffer, start_page * sizeof(vk::DeviceAddress), bda_addrs.data(),
bda_addrs.size() * sizeof(vk::DeviceAddress));
const size_t size_bytes = new_buffer.SizeBytes();
const auto cmdbuf = scheduler.CommandBuffer();
scheduler.EndRendering();
cmdbuf.fillBuffer(new_buffer.buffer, 0, size_bytes, 0);
for (const BufferId overlap_id : overlap.ids) {
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
}
@ -670,8 +666,10 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
void BufferCache::ProcessFaultBuffer() {
// Run fault processing shader
const auto [mapped, offset] = download_buffer.Map(MaxPageFaults * sizeof(u64));
vk::BufferMemoryBarrier2 fault_buffer_barrier{
static constexpr size_t StagingSize = MaxPageFaults * sizeof(u64);
const auto [mapped, offset] = download_buffer.Map(StagingSize);
std::memset(mapped, 0, StagingSize);
const vk::BufferMemoryBarrier2 fault_buffer_pre_barrier{
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eShaderWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
@ -680,27 +678,17 @@ void BufferCache::ProcessFaultBuffer() {
.offset = 0,
.size = FAULT_BUFFER_SIZE,
};
vk::BufferMemoryBarrier2 download_barrier{
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.dstAccessMask = vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eShaderWrite,
.buffer = download_buffer.Handle(),
.offset = offset,
.size = MaxPageFaults * sizeof(u64),
};
std::array<vk::BufferMemoryBarrier2, 2> barriers{fault_buffer_barrier, download_barrier};
vk::DescriptorBufferInfo fault_buffer_info{
const vk::DescriptorBufferInfo fault_buffer_info{
.buffer = fault_buffer.Handle(),
.offset = 0,
.range = FAULT_BUFFER_SIZE,
};
vk::DescriptorBufferInfo download_info{
const vk::DescriptorBufferInfo download_info{
.buffer = download_buffer.Handle(),
.offset = offset,
.range = MaxPageFaults * sizeof(u64),
.range = StagingSize,
};
boost::container::small_vector<vk::WriteDescriptorSet, 2> writes{
const std::array<vk::WriteDescriptorSet, 2> writes{{
{
.dstSet = VK_NULL_HANDLE,
.dstBinding = 0,
@ -717,15 +705,14 @@ void BufferCache::ProcessFaultBuffer() {
.descriptorType = vk::DescriptorType::eStorageBuffer,
.pBufferInfo = &download_info,
},
};
}};
download_buffer.Commit();
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.fillBuffer(download_buffer.Handle(), offset, MaxPageFaults * sizeof(u64), 0);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 2,
.pBufferMemoryBarriers = barriers.data(),
.bufferMemoryBarrierCount = 1U,
.pBufferMemoryBarriers = &fault_buffer_pre_barrier,
});
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *fault_process_pipeline);
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *fault_process_pipeline_layout, 0,
@ -735,34 +722,19 @@ void BufferCache::ProcessFaultBuffer() {
cmdbuf.dispatch(num_workgroups, 1, 1);
// Reset fault buffer
const vk::BufferMemoryBarrier2 reset_pre_barrier = {
const vk::BufferMemoryBarrier2 fault_buffer_post_barrier{
.srcStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.srcAccessMask = vk::AccessFlagBits2::eShaderRead,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = fault_buffer.Handle(),
.offset = 0,
.size = FAULT_BUFFER_SIZE,
};
const vk::BufferMemoryBarrier2 reset_post_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.srcAccessMask = vk::AccessFlagBits2::eShaderWrite | vk::AccessFlagBits2::eShaderRead,
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.dstAccessMask = vk::AccessFlagBits2::eShaderWrite,
.buffer = fault_buffer.Handle(),
.offset = 0,
.size = FAULT_BUFFER_SIZE,
};
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &reset_pre_barrier,
});
cmdbuf.fillBuffer(fault_buffer.buffer, 0, FAULT_BUFFER_SIZE, 0);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &reset_post_barrier,
.bufferMemoryBarrierCount = 1U,
.pBufferMemoryBarriers = &fault_buffer_post_barrier,
});
// Defer creating buffers
@ -1036,25 +1008,6 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
});
}
void BufferCache::MemoryBarrier() {
// Vulkan doesn't know which buffer we access in a shader if we use
// BufferDeviceAddress. We need a full memory barrier.
// For now, we only read memory using BDA. If we want to write to it,
// we might need to change this.
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
vk::MemoryBarrier2 barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eMemoryWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
};
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.memoryBarrierCount = 1,
.pMemoryBarriers = &barrier,
});
}
void BufferCache::InlineDataBuffer(Buffer& buffer, VAddr address, const void* value,
u32 num_bytes) {
scheduler.EndRendering();

View File

@ -159,9 +159,6 @@ public:
/// Synchronizes all buffers neede for DMA.
void SynchronizeDmaBuffers();
/// Record memory barrier. Used for buffers when accessed via BDA.
void MemoryBarrier();
private:
template <typename Func>
void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) {

View File

@ -24,9 +24,6 @@ layout(constant_id = 0) const uint CACHING_PAGEBITS = 0;
void main() {
uint id = gl_GlobalInvocationID.x;
uint word = fault_buffer[id];
if (word == 0u) {
return;
}
// 1 page per bit
uint base_bit = id * 32u;
while (word != 0u) {
@ -39,4 +36,5 @@ void main() {
download_buffer[store_index] = uint64_t(page) << CACHING_PAGEBITS;
}
}
fault_buffer[id] = 0u;
}

View File

@ -488,7 +488,6 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
range.upper() - range.lower());
}
}
buffer_cache.MemoryBarrier();
}
fault_process_pending |= uses_dma;