mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-27 04:25:12 +00:00
Cleanup part 1
This commit is contained in:
parent
e50b94183f
commit
1fcd800c46
@ -204,8 +204,6 @@ void EmitContext::DefineArithmeticTypes() {
|
|||||||
// Used to calculate fault buffer position and mask
|
// Used to calculate fault buffer position and mask
|
||||||
u32_three_value = ConstU32(3U);
|
u32_three_value = ConstU32(3U);
|
||||||
u32_seven_value = ConstU32(7U);
|
u32_seven_value = ConstU32(7U);
|
||||||
bda_first_time_mask = Constant(U64, 0x1ULL);
|
|
||||||
bda_first_time_inv_mask = Constant(U64, ~0x1ULL);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -175,12 +175,8 @@ public:
|
|||||||
|
|
||||||
template <typename Func>
|
template <typename Func>
|
||||||
Id EmitMemoryAccess(Id type, Id address, Func&& fallback) {
|
Id EmitMemoryAccess(Id type, Id address, Func&& fallback) {
|
||||||
const Id first_time_label = OpLabel();
|
const Id fault_label = OpLabel();
|
||||||
const Id after_first_time_label = OpLabel();
|
|
||||||
const Id fallback_label = OpLabel();
|
|
||||||
const Id available_label = OpLabel();
|
const Id available_label = OpLabel();
|
||||||
const Id save_masked_label = OpLabel();
|
|
||||||
const Id after_save_masked_label = OpLabel();
|
|
||||||
const Id merge_label = OpLabel();
|
const Id merge_label = OpLabel();
|
||||||
|
|
||||||
// Get page BDA
|
// Get page BDA
|
||||||
@ -191,14 +187,13 @@ public:
|
|||||||
const Id bda_ptr = OpAccessChain(bda_pointer_type, bda_buffer_id, u32_zero_value, page32);
|
const Id bda_ptr = OpAccessChain(bda_pointer_type, bda_buffer_id, u32_zero_value, page32);
|
||||||
const Id bda = OpLoad(U64, bda_ptr);
|
const Id bda = OpLoad(U64, bda_ptr);
|
||||||
|
|
||||||
// Check if it is the first time we access this page
|
// Check if the page is GPU mapped
|
||||||
const Id bda_and_mask = OpBitwiseAnd(U64, bda, bda_first_time_mask);
|
const Id is_fault = OpIEqual(U1[1], bda, u64_zero_value);
|
||||||
const Id first_time = OpIEqual(U1[1], bda_and_mask, u64_zero_value);
|
OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
|
||||||
OpSelectionMerge(after_first_time_label, spv::SelectionControlMask::MaskNone);
|
OpBranchConditional(is_fault, fault_label, available_label);
|
||||||
OpBranchConditional(first_time, first_time_label, after_first_time_label);
|
|
||||||
|
|
||||||
// First time access
|
// First time access
|
||||||
AddLabel(first_time_label);
|
AddLabel(fault_label);
|
||||||
const auto& fault_buffer = buffers[fault_buffer_index];
|
const auto& fault_buffer = buffers[fault_buffer_index];
|
||||||
const auto [fault_buffer_id, fault_pointer_type] = fault_buffer[PointerType::U8];
|
const auto [fault_buffer_id, fault_pointer_type] = fault_buffer[PointerType::U8];
|
||||||
const Id page_div8 = OpShiftRightLogical(U32[1], page32, u32_three_value);
|
const Id page_div8 = OpShiftRightLogical(U32[1], page32, u32_three_value);
|
||||||
@ -210,35 +205,14 @@ public:
|
|||||||
const Id page_mask8 = OpUConvert(U8, page_mask);
|
const Id page_mask8 = OpUConvert(U8, page_mask);
|
||||||
const Id fault_value_masked = OpBitwiseOr(U8, fault_value, page_mask8);
|
const Id fault_value_masked = OpBitwiseOr(U8, fault_value, page_mask8);
|
||||||
OpStore(fault_ptr, fault_value_masked);
|
OpStore(fault_ptr, fault_value_masked);
|
||||||
OpBranch(after_first_time_label);
|
// Fallback (we are not able to access the page)
|
||||||
|
|
||||||
// Check if the value is available
|
|
||||||
AddLabel(after_first_time_label);
|
|
||||||
const Id bda_eq_zero = OpIEqual(U1[1], bda, u64_zero_value);
|
|
||||||
OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
|
|
||||||
OpBranchConditional(bda_eq_zero, fallback_label, available_label);
|
|
||||||
|
|
||||||
// Fallback (and mark on faul buffer)
|
|
||||||
AddLabel(fallback_label);
|
|
||||||
const Id fallback_result = fallback();
|
const Id fallback_result = fallback();
|
||||||
OpBranch(merge_label);
|
OpBranch(merge_label);
|
||||||
|
|
||||||
// Value is available
|
// Value is available
|
||||||
AddLabel(available_label);
|
AddLabel(available_label);
|
||||||
OpSelectionMerge(after_save_masked_label, spv::SelectionControlMask::MaskNone);
|
|
||||||
OpBranchConditional(first_time, save_masked_label, after_save_masked_label);
|
|
||||||
|
|
||||||
// Save unmasked BDA
|
|
||||||
AddLabel(save_masked_label);
|
|
||||||
const Id masked_bda = OpBitwiseOr(U64, bda, bda_first_time_mask);
|
|
||||||
OpStore(bda_ptr, masked_bda);
|
|
||||||
OpBranch(after_save_masked_label);
|
|
||||||
|
|
||||||
// Load value
|
|
||||||
AddLabel(after_save_masked_label);
|
|
||||||
const Id unmasked_bda = OpBitwiseAnd(U64, bda, bda_first_time_inv_mask);
|
|
||||||
const Id offset_in_bda = OpBitwiseAnd(U64, address, caching_pagemask_value);
|
const Id offset_in_bda = OpBitwiseAnd(U64, address, caching_pagemask_value);
|
||||||
const Id addr = OpIAdd(U64, unmasked_bda, offset_in_bda);
|
const Id addr = OpIAdd(U64, bda, offset_in_bda);
|
||||||
const PointerType pointer_type = PointerTypeFromType(type);
|
const PointerType pointer_type = PointerTypeFromType(type);
|
||||||
const Id pointer_type_id = physical_pointer_types[pointer_type];
|
const Id pointer_type_id = physical_pointer_types[pointer_type];
|
||||||
const Id addr_ptr = OpConvertUToPtr(pointer_type_id, addr);
|
const Id addr_ptr = OpConvertUToPtr(pointer_type_id, addr);
|
||||||
@ -247,8 +221,7 @@ public:
|
|||||||
|
|
||||||
// Merge
|
// Merge
|
||||||
AddLabel(merge_label);
|
AddLabel(merge_label);
|
||||||
const Id final_result =
|
const Id final_result = OpPhi(type, fallback_result, fault_label, result, available_label);
|
||||||
OpPhi(type, fallback_result, fallback_label, result, after_save_masked_label);
|
|
||||||
return final_result;
|
return final_result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -292,8 +265,6 @@ public:
|
|||||||
|
|
||||||
Id caching_pagebits_value{};
|
Id caching_pagebits_value{};
|
||||||
Id caching_pagemask_value{};
|
Id caching_pagemask_value{};
|
||||||
Id bda_first_time_mask{};
|
|
||||||
Id bda_first_time_inv_mask{};
|
|
||||||
|
|
||||||
Id shared_u8{};
|
Id shared_u8{};
|
||||||
Id shared_u16{};
|
Id shared_u16{};
|
||||||
|
@ -135,14 +135,6 @@ void BufferCache::InvalidateMemory(VAddr device_addr, u64 size, bool unmap) {
|
|||||||
if (unmap) {
|
if (unmap) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
|
||||||
// This is a temporary workaround...
|
|
||||||
std::scoped_lock lock(dma_sync_ranges_mutex);
|
|
||||||
const VAddr aligned_addr = Common::AlignDown(device_addr, 4_KB);
|
|
||||||
const u64 aligned_size = Common::AlignUp(device_addr + size, 4_KB) - aligned_addr;
|
|
||||||
dma_sync_ranges.Add(aligned_addr, aligned_size);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -553,10 +545,7 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
|
|||||||
bda_addrs.reserve(size_pages);
|
bda_addrs.reserve(size_pages);
|
||||||
for (u64 i = 0; i < size_pages; ++i) {
|
for (u64 i = 0; i < size_pages; ++i) {
|
||||||
vk::DeviceAddress addr = new_buffer.BufferDeviceAddress() + (i << CACHING_PAGEBITS);
|
vk::DeviceAddress addr = new_buffer.BufferDeviceAddress() + (i << CACHING_PAGEBITS);
|
||||||
const bool is_dma_synced = page_table[start_page + i].is_dma_synced;
|
bda_addrs.push_back(addr);
|
||||||
// Use LSB to mark if the page is DMA synced. If it is not synced, it
|
|
||||||
// we haven't accessed it yet.
|
|
||||||
bda_addrs.push_back(addr | (is_dma_synced ? 0x1 : 0x0));
|
|
||||||
}
|
}
|
||||||
WriteDataBuffer(bda_pagetable_buffer, start_page * sizeof(vk::DeviceAddress), bda_addrs.data(),
|
WriteDataBuffer(bda_pagetable_buffer, start_page * sizeof(vk::DeviceAddress), bda_addrs.data(),
|
||||||
bda_addrs.size() * sizeof(vk::DeviceAddress));
|
bda_addrs.size() * sizeof(vk::DeviceAddress));
|
||||||
@ -568,10 +557,6 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
|
|||||||
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
|
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
|
||||||
}
|
}
|
||||||
Register(new_buffer_id);
|
Register(new_buffer_id);
|
||||||
{
|
|
||||||
std::scoped_lock lk(dma_sync_ranges_mutex);
|
|
||||||
dma_sync_ranges.Add(overlap.begin, overlap.end);
|
|
||||||
}
|
|
||||||
return new_buffer_id;
|
return new_buffer_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -683,22 +668,17 @@ void BufferCache::ProcessFaultBuffer() {
|
|||||||
const VAddr fault_end = fault + CACHING_PAGESIZE; // This can be adjusted
|
const VAddr fault_end = fault + CACHING_PAGESIZE; // This can be adjusted
|
||||||
fault_ranges +=
|
fault_ranges +=
|
||||||
boost::icl::interval_set<VAddr>::interval_type::right_open(fault, fault_end);
|
boost::icl::interval_set<VAddr>::interval_type::right_open(fault, fault_end);
|
||||||
LOG_WARNING(Render_Vulkan, "First time DMA access to memory at page {:#x}", fault);
|
LOG_INFO(Render_Vulkan, "Accessed non-GPU mapped memory at {:#x}", fault);
|
||||||
}
|
}
|
||||||
for (const auto& range : fault_ranges) {
|
for (const auto& range : fault_ranges) {
|
||||||
const VAddr start = range.lower();
|
const VAddr start = range.lower();
|
||||||
const VAddr end = range.upper();
|
const VAddr end = range.upper();
|
||||||
const u64 page_start = start >> CACHING_PAGEBITS;
|
const u64 page_start = start >> CACHING_PAGEBITS;
|
||||||
const u64 page_end = Common::DivCeil(end, CACHING_PAGESIZE);
|
const u64 page_end = Common::DivCeil(end, CACHING_PAGESIZE);
|
||||||
// Mark the pages as synced
|
|
||||||
for (u64 page = page_start; page < page_end; ++page) {
|
|
||||||
page_table[page].is_dma_synced = true;
|
|
||||||
}
|
|
||||||
// Buffer size is in 32 bits
|
// Buffer size is in 32 bits
|
||||||
ASSERT_MSG((range.upper() - range.lower()) <= std::numeric_limits<u32>::max(),
|
ASSERT_MSG((range.upper() - range.lower()) <= std::numeric_limits<u32>::max(),
|
||||||
"Buffer size is too large");
|
"Buffer size is too large");
|
||||||
// Only create a buffer is the current range doesn't fit in an existing one
|
CreateBuffer(start, static_cast<u32>(end - start));
|
||||||
FindBuffer(start, static_cast<u32>(end - start));
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -923,18 +903,6 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferCache::SynchronizeDmaBuffers() {
|
|
||||||
RENDERER_TRACE;
|
|
||||||
std::scoped_lock lk(dma_sync_ranges_mutex);
|
|
||||||
LOG_WARNING(Render_Vulkan, "Synchronizing ranges");
|
|
||||||
dma_sync_ranges.ForEach([&](VAddr device_addr, u64 end_addr) {
|
|
||||||
RENDERER_TRACE;
|
|
||||||
SynchronizeBuffersInRange(device_addr, end_addr - device_addr);
|
|
||||||
LOG_WARNING(Render_Vulkan, "Sync range {:#x} - {:#x}", device_addr, end_addr);
|
|
||||||
});
|
|
||||||
dma_sync_ranges.Clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
void BufferCache::MemoryBarrier() {
|
void BufferCache::MemoryBarrier() {
|
||||||
// Vulkan doesn't know which buffer we access in a shader if we use
|
// Vulkan doesn't know which buffer we access in a shader if we use
|
||||||
// BufferDeviceAddress. We need a full memory barrier.
|
// BufferDeviceAddress. We need a full memory barrier.
|
||||||
|
@ -48,7 +48,6 @@ public:
|
|||||||
|
|
||||||
struct PageData {
|
struct PageData {
|
||||||
BufferId buffer_id{};
|
BufferId buffer_id{};
|
||||||
bool is_dma_synced = false;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Traits {
|
struct Traits {
|
||||||
@ -194,8 +193,6 @@ private:
|
|||||||
Buffer fault_buffer;
|
Buffer fault_buffer;
|
||||||
std::shared_mutex slot_buffers_mutex;
|
std::shared_mutex slot_buffers_mutex;
|
||||||
Common::SlotVector<Buffer> slot_buffers;
|
Common::SlotVector<Buffer> slot_buffers;
|
||||||
std::shared_mutex dma_sync_ranges_mutex;
|
|
||||||
RangeSet dma_sync_ranges;
|
|
||||||
RangeSet gpu_modified_ranges;
|
RangeSet gpu_modified_ranges;
|
||||||
SplitRangeMap<BufferId> buffer_ranges;
|
SplitRangeMap<BufferId> buffer_ranges;
|
||||||
MemoryTracker memory_tracker;
|
MemoryTracker memory_tracker;
|
||||||
|
@ -477,9 +477,13 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
|||||||
if (uses_dma && !fault_process_pending) {
|
if (uses_dma && !fault_process_pending) {
|
||||||
// We only use fault buffer for DMA right now.
|
// We only use fault buffer for DMA right now.
|
||||||
{
|
{
|
||||||
// We don't want the mapped ranges to be modified while we are syncing
|
// TODO: GPU might have written to memory (for example with EVENT_WRITE_EOP)
|
||||||
|
// we need to account for that and synchronize.
|
||||||
Common::RecursiveSharedLock lock{mapped_ranges_mutex};
|
Common::RecursiveSharedLock lock{mapped_ranges_mutex};
|
||||||
buffer_cache.SynchronizeDmaBuffers();
|
for (auto& range : mapped_ranges) {
|
||||||
|
buffer_cache.SynchronizeBuffersInRange(range.lower(),
|
||||||
|
range.upper() - range.lower());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
buffer_cache.MemoryBarrier();
|
buffer_cache.MemoryBarrier();
|
||||||
}
|
}
|
||||||
|
@ -91,17 +91,14 @@ void Scheduler::Wait(u64 tick) {
|
|||||||
}
|
}
|
||||||
master_semaphore.Wait(tick);
|
master_semaphore.Wait(tick);
|
||||||
|
|
||||||
// TODO: We should be applyting pending operations here because that gives us
|
// CAUTION: This can introduce unexpected variation in the wait time.
|
||||||
// the ability to use mapped regions on stream buffers in deferred operations.
|
// We don't currently sync the GPU, and some games are very sensitive to this.
|
||||||
// We don't do that right now because it might introduce varioations in the
|
// If this becomes a problem, it can be commented out.
|
||||||
// timing and, since we don't sync the GPU some games might be affected by that.
|
// Idealy we would implement proper gpu sync.
|
||||||
// It shouldn't be an issue right now, because we only use mapped regions in
|
while (!pending_ops.empty() && pending_ops.front().gpu_tick <= tick) {
|
||||||
// deferred operations to download faulted addresses. That is only 8KB every tick
|
pending_ops.front().callback();
|
||||||
// and the stream buffer is 256MB. GPU doesn't go that behind.
|
pending_ops.pop();
|
||||||
// while (!pending_ops.empty() && pending_ops.front().gpu_tick <= tick) {
|
}
|
||||||
// pending_ops.front().callback();
|
|
||||||
// pending_ops.pop();
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Scheduler::AllocateWorkerCommandBuffers() {
|
void Scheduler::AllocateWorkerCommandBuffers() {
|
||||||
|
Loading…
Reference in New Issue
Block a user