mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-23 10:35:03 +00:00
buffer_cache: Simplify download copy generation
This commit is contained in:
parent
2ca34c4d94
commit
21f5d8e608
@ -621,7 +621,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||||||
case PM4ItOpcode::EventWriteEop: {
|
case PM4ItOpcode::EventWriteEop: {
|
||||||
const auto* event_eop = reinterpret_cast<const PM4CmdEventWriteEop*>(header);
|
const auto* event_eop = reinterpret_cast<const PM4CmdEventWriteEop*>(header);
|
||||||
if (rasterizer) {
|
if (rasterizer) {
|
||||||
rasterizer->CommitAsyncFlushes();
|
rasterizer->CommitPendingDownloads();
|
||||||
}
|
}
|
||||||
++fence_tick;
|
++fence_tick;
|
||||||
event_eop->SignalFence([](void* address, u64 data, u32 num_bytes) {
|
event_eop->SignalFence([](void* address, u64 data, u32 num_bytes) {
|
||||||
@ -1023,7 +1023,7 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq
|
|||||||
const auto* release_mem = reinterpret_cast<const PM4CmdReleaseMem*>(header);
|
const auto* release_mem = reinterpret_cast<const PM4CmdReleaseMem*>(header);
|
||||||
++fence_tick;
|
++fence_tick;
|
||||||
if (rasterizer) {
|
if (rasterizer) {
|
||||||
rasterizer->CommitAsyncFlushes();
|
rasterizer->CommitPendingDownloads();
|
||||||
}
|
}
|
||||||
release_mem->SignalFence(static_cast<Platform::InterruptId>(queue.pipe_id));
|
release_mem->SignalFence(static_cast<Platform::InterruptId>(queue.pipe_id));
|
||||||
break;
|
break;
|
||||||
|
@ -187,13 +187,12 @@ void BufferCache::DownloadBufferMemory(const Buffer& buffer, VAddr device_addr,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BufferCache::CommitAsyncFlushes() {
|
bool BufferCache::CommitPendingDownloads() {
|
||||||
if (pending_download_ranges.Empty()) {
|
if (pending_download_ranges.Empty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
using BufferCopies = boost::container::small_vector<vk::BufferCopy, 8>;
|
using BufferCopies = boost::container::small_vector<vk::BufferCopy, 8>;
|
||||||
boost::container::small_vector<BufferCopies, 8> copies;
|
tsl::robin_map<BufferId, BufferCopies> copies;
|
||||||
boost::container::small_vector<BufferId, 8> buffer_ids;
|
|
||||||
u64 total_size_bytes = 0;
|
u64 total_size_bytes = 0;
|
||||||
pending_download_ranges.ForEach([&](VAddr interval_lower, VAddr interval_upper) {
|
pending_download_ranges.ForEach([&](VAddr interval_lower, VAddr interval_upper) {
|
||||||
const std::size_t size = interval_upper - interval_lower;
|
const std::size_t size = interval_upper - interval_lower;
|
||||||
@ -203,27 +202,16 @@ bool BufferCache::CommitAsyncFlushes() {
|
|||||||
const VAddr buffer_end = buffer_start + buffer.SizeBytes();
|
const VAddr buffer_end = buffer_start + buffer.SizeBytes();
|
||||||
const VAddr new_start = std::max(buffer_start, device_addr);
|
const VAddr new_start = std::max(buffer_start, device_addr);
|
||||||
const VAddr new_end = std::min(buffer_end, device_addr + size);
|
const VAddr new_end = std::min(buffer_end, device_addr + size);
|
||||||
auto& buffer_copies = copies.emplace_back();
|
const u64 new_size = new_end - new_start;
|
||||||
buffer_ids.emplace_back(buffer_id);
|
copies[buffer_id].emplace_back(new_start - buffer_start, total_size_bytes, new_size);
|
||||||
memory_tracker.ForEachDownloadRange<false>(new_start, new_end - new_start,
|
// Align up to avoid cache conflicts
|
||||||
[&](u64 device_addr_out, u64 range_size) {
|
constexpr u64 align = std::hardware_destructive_interference_size;
|
||||||
const VAddr buffer_addr = buffer.CpuAddr();
|
constexpr u64 mask = ~(align - 1ULL);
|
||||||
const auto add_download = [&](VAddr start, VAddr end) {
|
total_size_bytes += (new_size + align - 1) & mask;
|
||||||
const u64 new_offset = start - buffer_addr;
|
|
||||||
const u64 new_size = end - start;
|
|
||||||
buffer_copies.emplace_back(new_offset, total_size_bytes, new_size);
|
|
||||||
// Align up to avoid cache conflicts
|
|
||||||
constexpr u64 align = std::hardware_destructive_interference_size;
|
|
||||||
constexpr u64 mask = ~(align - 1ULL);
|
|
||||||
total_size_bytes += (new_size + align - 1) & mask;
|
|
||||||
};
|
|
||||||
gpu_modified_ranges.ForEachInRange(device_addr_out, range_size,
|
|
||||||
add_download);
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
pending_download_ranges.Clear();
|
pending_download_ranges.Clear();
|
||||||
if (copies.empty()) {
|
if (total_size_bytes == 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const auto [download, offset] = download_buffer.Map(total_size_bytes);
|
const auto [download, offset] = download_buffer.Map(total_size_bytes);
|
||||||
@ -239,26 +227,23 @@ bool BufferCache::CommitAsyncFlushes() {
|
|||||||
.memoryBarrierCount = 1u,
|
.memoryBarrierCount = 1u,
|
||||||
.pMemoryBarriers = &read_barrier,
|
.pMemoryBarriers = &read_barrier,
|
||||||
});
|
});
|
||||||
for (s32 i = 0; i < buffer_ids.size(); ++i) {
|
for (auto it = copies.begin(); it != copies.end(); ++it) {
|
||||||
auto& buffer_copies = copies[i];
|
auto& buffer_copies = it.value();
|
||||||
if (buffer_copies.empty()) {
|
if (buffer_copies.empty()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
for (auto& copy : buffer_copies) {
|
for (auto& copy : buffer_copies) {
|
||||||
copy.dstOffset += offset;
|
copy.dstOffset += offset;
|
||||||
}
|
}
|
||||||
const BufferId buffer_id = buffer_ids[i];
|
const BufferId buffer_id = it.key();
|
||||||
Buffer& buffer = slot_buffers[buffer_id];
|
Buffer& buffer = slot_buffers[buffer_id];
|
||||||
cmdbuf.copyBuffer(buffer.Handle(), download_buffer.Handle(), buffer_copies);
|
cmdbuf.copyBuffer(buffer.Handle(), download_buffer.Handle(), buffer_copies);
|
||||||
}
|
}
|
||||||
scheduler.DeferOperation([this, download, offset, buffer_ids, copies]() {
|
scheduler.DeferOperation([this, download, offset, copies = std::move(copies)]() {
|
||||||
auto* memory = Core::Memory::Instance();
|
auto* memory = Core::Memory::Instance();
|
||||||
for (s32 i = 0; i < buffer_ids.size(); ++i) {
|
for (auto it = copies.begin(); it != copies.end(); ++it) {
|
||||||
auto& buffer_copies = copies[i];
|
auto& buffer_copies = it.value();
|
||||||
if (buffer_copies.empty()) {
|
const BufferId buffer_id = it.key();
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const BufferId buffer_id = buffer_ids[i];
|
|
||||||
Buffer& buffer = slot_buffers[buffer_id];
|
Buffer& buffer = slot_buffers[buffer_id];
|
||||||
for (auto& copy : buffer_copies) {
|
for (auto& copy : buffer_copies) {
|
||||||
const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset;
|
const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset;
|
||||||
|
@ -126,8 +126,8 @@ public:
|
|||||||
/// Performs buffer to buffer data copy on the GPU.
|
/// Performs buffer to buffer data copy on the GPU.
|
||||||
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
|
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
|
||||||
|
|
||||||
/// Schedules all GPU modified ranges since last commit to be copied back the host memory.
|
/// Schedules pending GPU modified ranges since last commit to be copied back the host memory.
|
||||||
bool CommitAsyncFlushes();
|
bool CommitPendingDownloads();
|
||||||
|
|
||||||
/// Obtains a buffer for the specified region.
|
/// Obtains a buffer for the specified region.
|
||||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,
|
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,
|
||||||
|
@ -60,9 +60,9 @@ void Rasterizer::CpSync() {
|
|||||||
vk::DependencyFlagBits::eByRegion, ib_barrier, {}, {});
|
vk::DependencyFlagBits::eByRegion, ib_barrier, {}, {});
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Rasterizer::CommitAsyncFlushes() {
|
bool Rasterizer::CommitPendingDownloads() {
|
||||||
scheduler.PopPendingOperations();
|
scheduler.PopPendingOperations();
|
||||||
return buffer_cache.CommitAsyncFlushes();
|
return buffer_cache.CommitPendingDownloads();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Rasterizer::FilterDraw() {
|
bool Rasterizer::FilterDraw() {
|
||||||
|
@ -65,7 +65,7 @@ public:
|
|||||||
void UnmapMemory(VAddr addr, u64 size);
|
void UnmapMemory(VAddr addr, u64 size);
|
||||||
|
|
||||||
void CpSync();
|
void CpSync();
|
||||||
bool CommitAsyncFlushes();
|
bool CommitPendingDownloads();
|
||||||
u64 Flush();
|
u64 Flush();
|
||||||
void Finish();
|
void Finish();
|
||||||
void ProcessFaults();
|
void ProcessFaults();
|
||||||
|
Loading…
Reference in New Issue
Block a user