mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-22 18:15:14 +00:00
Merge a5e8aa960d
into 95a386308a
This commit is contained in:
commit
17c3902fcf
@ -1105,30 +1105,30 @@ Id EmitContext::DefineUfloatM5ToFloat32(u32 mantissa_bits, const std::string_vie
|
|||||||
}
|
}
|
||||||
|
|
||||||
Id EmitContext::DefineGetBdaPointer() {
|
Id EmitContext::DefineGetBdaPointer() {
|
||||||
const auto caching_pagebits{
|
const Id caching_pagebits{
|
||||||
Constant(U64, static_cast<u64>(VideoCore::BufferCache::CACHING_PAGEBITS))};
|
Constant(U64, static_cast<u64>(VideoCore::BufferCache::CACHING_PAGEBITS))};
|
||||||
const auto caching_pagemask{Constant(U64, VideoCore::BufferCache::CACHING_PAGESIZE - 1)};
|
const Id caching_pagemask{Constant(U64, VideoCore::BufferCache::CACHING_PAGESIZE - 1)};
|
||||||
|
|
||||||
const auto func_type{TypeFunction(U64, U64)};
|
const Id func_type{TypeFunction(U64, U64)};
|
||||||
const auto func{OpFunction(U64, spv::FunctionControlMask::MaskNone, func_type)};
|
const Id func{OpFunction(U64, spv::FunctionControlMask::MaskNone, func_type)};
|
||||||
const auto address{OpFunctionParameter(U64)};
|
const Id address{OpFunctionParameter(U64)};
|
||||||
Name(func, "get_bda_pointer");
|
Name(func, "get_bda_pointer");
|
||||||
AddLabel();
|
AddLabel();
|
||||||
|
|
||||||
const auto fault_label{OpLabel()};
|
const Id fault_label{OpLabel()};
|
||||||
const auto available_label{OpLabel()};
|
const Id available_label{OpLabel()};
|
||||||
const auto merge_label{OpLabel()};
|
const Id merge_label{OpLabel()};
|
||||||
|
|
||||||
// Get page BDA
|
// Get page BDA
|
||||||
const auto page{OpShiftRightLogical(U64, address, caching_pagebits)};
|
|
||||||
const auto page32{OpUConvert(U32[1], page)};
|
|
||||||
const auto& bda_buffer{buffers[bda_pagetable_index]};
|
const auto& bda_buffer{buffers[bda_pagetable_index]};
|
||||||
const auto [bda_buffer_id, bda_pointer_type] = bda_buffer.Alias(PointerType::U64);
|
const auto [bda_buffer_id, bda_pointer_type] = bda_buffer.Alias(PointerType::U64);
|
||||||
const auto bda_ptr{OpAccessChain(bda_pointer_type, bda_buffer_id, u32_zero_value, page32)};
|
const Id page{OpShiftRightLogical(U64, address, caching_pagebits)};
|
||||||
const auto bda{OpLoad(U64, bda_ptr)};
|
const Id page32{OpUConvert(U32[1], page)};
|
||||||
|
const Id bda_ptr{OpAccessChain(bda_pointer_type, bda_buffer_id, u32_zero_value, page32)};
|
||||||
|
const Id bda{OpLoad(U64, bda_ptr)};
|
||||||
|
|
||||||
// Check if page is GPU cached
|
// Check if page is GPU cached
|
||||||
const auto is_fault{OpIEqual(U1[1], bda, u64_zero_value)};
|
const Id is_fault{OpIEqual(U1[1], bda, u64_zero_value)};
|
||||||
OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
|
OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
|
||||||
OpBranchConditional(is_fault, fault_label, available_label);
|
OpBranchConditional(is_fault, fault_label, available_label);
|
||||||
|
|
||||||
@ -1136,28 +1136,26 @@ Id EmitContext::DefineGetBdaPointer() {
|
|||||||
AddLabel(fault_label);
|
AddLabel(fault_label);
|
||||||
const auto& fault_buffer{buffers[fault_buffer_index]};
|
const auto& fault_buffer{buffers[fault_buffer_index]};
|
||||||
const auto [fault_buffer_id, fault_pointer_type] = fault_buffer.Alias(PointerType::U32);
|
const auto [fault_buffer_id, fault_pointer_type] = fault_buffer.Alias(PointerType::U32);
|
||||||
const auto page_div32{OpShiftRightLogical(U32[1], page32, ConstU32(5U))};
|
const Id page_div32{OpShiftRightLogical(U32[1], page32, ConstU32(5U))};
|
||||||
const auto page_mod32{OpBitwiseAnd(U32[1], page32, ConstU32(31U))};
|
const Id page_mod32{OpBitwiseAnd(U32[1], page32, ConstU32(31U))};
|
||||||
const auto page_mask{OpShiftLeftLogical(U32[1], u32_one_value, page_mod32)};
|
const Id page_mask{OpShiftLeftLogical(U32[1], u32_one_value, page_mod32)};
|
||||||
const auto fault_ptr{
|
const Id fault_ptr{
|
||||||
OpAccessChain(fault_pointer_type, fault_buffer_id, u32_zero_value, page_div32)};
|
OpAccessChain(fault_pointer_type, fault_buffer_id, u32_zero_value, page_div32)};
|
||||||
const auto fault_value{OpLoad(U32[1], fault_ptr)};
|
OpAtomicOr(U32[1], fault_ptr, ConstU32(u32(spv::Scope::Device)), u32_zero_value, page_mask);
|
||||||
const auto fault_value_masked{OpBitwiseOr(U32[1], fault_value, page_mask)};
|
|
||||||
OpStore(fault_ptr, fault_value_masked);
|
|
||||||
|
|
||||||
// Return null pointer
|
// Return null pointer
|
||||||
const auto fallback_result{u64_zero_value};
|
const Id fallback_result{u64_zero_value};
|
||||||
OpBranch(merge_label);
|
OpBranch(merge_label);
|
||||||
|
|
||||||
// Value is available, compute address
|
// Value is available, compute address
|
||||||
AddLabel(available_label);
|
AddLabel(available_label);
|
||||||
const auto offset_in_bda{OpBitwiseAnd(U64, address, caching_pagemask)};
|
const Id offset_in_bda{OpBitwiseAnd(U64, address, caching_pagemask)};
|
||||||
const auto addr{OpIAdd(U64, bda, offset_in_bda)};
|
const Id addr{OpIAdd(U64, bda, offset_in_bda)};
|
||||||
OpBranch(merge_label);
|
OpBranch(merge_label);
|
||||||
|
|
||||||
// Merge
|
// Merge
|
||||||
AddLabel(merge_label);
|
AddLabel(merge_label);
|
||||||
const auto result{OpPhi(U64, addr, available_label, fallback_result, fault_label)};
|
const Id result{OpPhi(U64, addr, available_label, fallback_result, fault_label)};
|
||||||
OpReturnValue(result);
|
OpReturnValue(result);
|
||||||
OpFunctionEnd();
|
OpFunctionEnd();
|
||||||
return func;
|
return func;
|
||||||
|
@ -657,10 +657,6 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
|
|||||||
}
|
}
|
||||||
WriteDataBuffer(bda_pagetable_buffer, start_page * sizeof(vk::DeviceAddress), bda_addrs.data(),
|
WriteDataBuffer(bda_pagetable_buffer, start_page * sizeof(vk::DeviceAddress), bda_addrs.data(),
|
||||||
bda_addrs.size() * sizeof(vk::DeviceAddress));
|
bda_addrs.size() * sizeof(vk::DeviceAddress));
|
||||||
const size_t size_bytes = new_buffer.SizeBytes();
|
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
|
||||||
scheduler.EndRendering();
|
|
||||||
cmdbuf.fillBuffer(new_buffer.buffer, 0, size_bytes, 0);
|
|
||||||
for (const BufferId overlap_id : overlap.ids) {
|
for (const BufferId overlap_id : overlap.ids) {
|
||||||
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
|
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
|
||||||
}
|
}
|
||||||
@ -670,8 +666,10 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
|
|||||||
|
|
||||||
void BufferCache::ProcessFaultBuffer() {
|
void BufferCache::ProcessFaultBuffer() {
|
||||||
// Run fault processing shader
|
// Run fault processing shader
|
||||||
const auto [mapped, offset] = download_buffer.Map(MaxPageFaults * sizeof(u64));
|
static constexpr size_t StagingSize = MaxPageFaults * sizeof(u64);
|
||||||
vk::BufferMemoryBarrier2 fault_buffer_barrier{
|
const auto [mapped, offset] = download_buffer.Map(StagingSize);
|
||||||
|
std::memset(mapped, 0, StagingSize);
|
||||||
|
const vk::BufferMemoryBarrier2 fault_buffer_pre_barrier{
|
||||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
.srcAccessMask = vk::AccessFlagBits2::eShaderWrite,
|
.srcAccessMask = vk::AccessFlagBits2::eShaderWrite,
|
||||||
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
|
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
|
||||||
@ -680,27 +678,17 @@ void BufferCache::ProcessFaultBuffer() {
|
|||||||
.offset = 0,
|
.offset = 0,
|
||||||
.size = FAULT_BUFFER_SIZE,
|
.size = FAULT_BUFFER_SIZE,
|
||||||
};
|
};
|
||||||
vk::BufferMemoryBarrier2 download_barrier{
|
const vk::DescriptorBufferInfo fault_buffer_info{
|
||||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
|
||||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
|
||||||
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
|
|
||||||
.dstAccessMask = vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eShaderWrite,
|
|
||||||
.buffer = download_buffer.Handle(),
|
|
||||||
.offset = offset,
|
|
||||||
.size = MaxPageFaults * sizeof(u64),
|
|
||||||
};
|
|
||||||
std::array<vk::BufferMemoryBarrier2, 2> barriers{fault_buffer_barrier, download_barrier};
|
|
||||||
vk::DescriptorBufferInfo fault_buffer_info{
|
|
||||||
.buffer = fault_buffer.Handle(),
|
.buffer = fault_buffer.Handle(),
|
||||||
.offset = 0,
|
.offset = 0,
|
||||||
.range = FAULT_BUFFER_SIZE,
|
.range = FAULT_BUFFER_SIZE,
|
||||||
};
|
};
|
||||||
vk::DescriptorBufferInfo download_info{
|
const vk::DescriptorBufferInfo download_info{
|
||||||
.buffer = download_buffer.Handle(),
|
.buffer = download_buffer.Handle(),
|
||||||
.offset = offset,
|
.offset = offset,
|
||||||
.range = MaxPageFaults * sizeof(u64),
|
.range = StagingSize,
|
||||||
};
|
};
|
||||||
boost::container::small_vector<vk::WriteDescriptorSet, 2> writes{
|
const std::array<vk::WriteDescriptorSet, 2> writes{{
|
||||||
{
|
{
|
||||||
.dstSet = VK_NULL_HANDLE,
|
.dstSet = VK_NULL_HANDLE,
|
||||||
.dstBinding = 0,
|
.dstBinding = 0,
|
||||||
@ -717,15 +705,14 @@ void BufferCache::ProcessFaultBuffer() {
|
|||||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||||
.pBufferInfo = &download_info,
|
.pBufferInfo = &download_info,
|
||||||
},
|
},
|
||||||
};
|
}};
|
||||||
download_buffer.Commit();
|
download_buffer.Commit();
|
||||||
scheduler.EndRendering();
|
scheduler.EndRendering();
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
cmdbuf.fillBuffer(download_buffer.Handle(), offset, MaxPageFaults * sizeof(u64), 0);
|
|
||||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||||
.bufferMemoryBarrierCount = 2,
|
.bufferMemoryBarrierCount = 1U,
|
||||||
.pBufferMemoryBarriers = barriers.data(),
|
.pBufferMemoryBarriers = &fault_buffer_pre_barrier,
|
||||||
});
|
});
|
||||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *fault_process_pipeline);
|
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *fault_process_pipeline);
|
||||||
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *fault_process_pipeline_layout, 0,
|
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *fault_process_pipeline_layout, 0,
|
||||||
@ -735,34 +722,19 @@ void BufferCache::ProcessFaultBuffer() {
|
|||||||
cmdbuf.dispatch(num_workgroups, 1, 1);
|
cmdbuf.dispatch(num_workgroups, 1, 1);
|
||||||
|
|
||||||
// Reset fault buffer
|
// Reset fault buffer
|
||||||
const vk::BufferMemoryBarrier2 reset_pre_barrier = {
|
const vk::BufferMemoryBarrier2 fault_buffer_post_barrier{
|
||||||
.srcStageMask = vk::PipelineStageFlagBits2::eComputeShader,
|
.srcStageMask = vk::PipelineStageFlagBits2::eComputeShader,
|
||||||
.srcAccessMask = vk::AccessFlagBits2::eShaderRead,
|
.srcAccessMask = vk::AccessFlagBits2::eShaderWrite | vk::AccessFlagBits2::eShaderRead,
|
||||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
|
||||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
.dstAccessMask = vk::AccessFlagBits2::eShaderWrite,
|
||||||
.buffer = fault_buffer.Handle(),
|
|
||||||
.offset = 0,
|
|
||||||
.size = FAULT_BUFFER_SIZE,
|
|
||||||
};
|
|
||||||
const vk::BufferMemoryBarrier2 reset_post_barrier = {
|
|
||||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
|
||||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
|
||||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
|
||||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
|
||||||
.buffer = fault_buffer.Handle(),
|
.buffer = fault_buffer.Handle(),
|
||||||
.offset = 0,
|
.offset = 0,
|
||||||
.size = FAULT_BUFFER_SIZE,
|
.size = FAULT_BUFFER_SIZE,
|
||||||
};
|
};
|
||||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||||
.bufferMemoryBarrierCount = 1,
|
.bufferMemoryBarrierCount = 1U,
|
||||||
.pBufferMemoryBarriers = &reset_pre_barrier,
|
.pBufferMemoryBarriers = &fault_buffer_post_barrier,
|
||||||
});
|
|
||||||
cmdbuf.fillBuffer(fault_buffer.buffer, 0, FAULT_BUFFER_SIZE, 0);
|
|
||||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
|
||||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
|
||||||
.bufferMemoryBarrierCount = 1,
|
|
||||||
.pBufferMemoryBarriers = &reset_post_barrier,
|
|
||||||
});
|
});
|
||||||
|
|
||||||
// Defer creating buffers
|
// Defer creating buffers
|
||||||
@ -1036,25 +1008,6 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferCache::MemoryBarrier() {
|
|
||||||
// Vulkan doesn't know which buffer we access in a shader if we use
|
|
||||||
// BufferDeviceAddress. We need a full memory barrier.
|
|
||||||
// For now, we only read memory using BDA. If we want to write to it,
|
|
||||||
// we might need to change this.
|
|
||||||
scheduler.EndRendering();
|
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
|
||||||
vk::MemoryBarrier2 barrier = {
|
|
||||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
|
||||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryWrite,
|
|
||||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
|
||||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
|
||||||
};
|
|
||||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
|
||||||
.memoryBarrierCount = 1,
|
|
||||||
.pMemoryBarriers = &barrier,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
void BufferCache::InlineDataBuffer(Buffer& buffer, VAddr address, const void* value,
|
void BufferCache::InlineDataBuffer(Buffer& buffer, VAddr address, const void* value,
|
||||||
u32 num_bytes) {
|
u32 num_bytes) {
|
||||||
scheduler.EndRendering();
|
scheduler.EndRendering();
|
||||||
|
@ -159,9 +159,6 @@ public:
|
|||||||
/// Synchronizes all buffers neede for DMA.
|
/// Synchronizes all buffers neede for DMA.
|
||||||
void SynchronizeDmaBuffers();
|
void SynchronizeDmaBuffers();
|
||||||
|
|
||||||
/// Record memory barrier. Used for buffers when accessed via BDA.
|
|
||||||
void MemoryBarrier();
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <typename Func>
|
template <typename Func>
|
||||||
void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) {
|
void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) {
|
||||||
|
@ -24,9 +24,6 @@ layout(constant_id = 0) const uint CACHING_PAGEBITS = 0;
|
|||||||
void main() {
|
void main() {
|
||||||
uint id = gl_GlobalInvocationID.x;
|
uint id = gl_GlobalInvocationID.x;
|
||||||
uint word = fault_buffer[id];
|
uint word = fault_buffer[id];
|
||||||
if (word == 0u) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// 1 page per bit
|
// 1 page per bit
|
||||||
uint base_bit = id * 32u;
|
uint base_bit = id * 32u;
|
||||||
while (word != 0u) {
|
while (word != 0u) {
|
||||||
@ -39,4 +36,5 @@ void main() {
|
|||||||
download_buffer[store_index] = uint64_t(page) << CACHING_PAGEBITS;
|
download_buffer[store_index] = uint64_t(page) << CACHING_PAGEBITS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
fault_buffer[id] = 0u;
|
||||||
}
|
}
|
||||||
|
@ -488,7 +488,6 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
|||||||
range.upper() - range.lower());
|
range.upper() - range.lower());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
buffer_cache.MemoryBarrier();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fault_process_pending |= uses_dma;
|
fault_process_pending |= uses_dma;
|
||||||
|
Loading…
Reference in New Issue
Block a user