buffer_cache: Use separate thread for handling downloads

This commit is contained in:
IndecisiveTurtle 2025-06-24 01:30:02 +03:00
parent 0d83fbf61b
commit 1af9e17176
5 changed files with 104 additions and 22 deletions

View File

@ -166,14 +166,21 @@ void EmitGetGotoVariable(EmitContext&) {
using PointerType = EmitContext::PointerType; using PointerType = EmitContext::PointerType;
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) { Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) {
const u32 flatbuf_off_dw = inst->Flags<u32>(); const u32 flatbuf_offset = inst->Flags<u32>();
const auto& flatbuf_buffer{ctx.buffers.back()};
ASSERT(flatbuf_buffer.binding >= 0 && flatbuf_buffer.buffer_type == BufferType::Flatbuf);
const auto [flatbuf_buffer_id, flatbuf_pointer_type] = flatbuf_buffer[PointerType::U32];
const auto ptr{ctx.OpAccessChain(flatbuf_pointer_type, flatbuf_buffer_id, ctx.u32_zero_value,
ctx.ConstU32(flatbuf_offset))};
return ctx.OpLoad(ctx.U32[1], ptr);
// We can only provide a fallback for immediate offsets. // We can only provide a fallback for immediate offsets.
if (flatbuf_off_dw == 0) { // if (flatbuf_off_dw == 0) {
return ctx.OpFunctionCall(ctx.U32[1], ctx.read_const_dynamic, addr, offset); // return ctx.OpFunctionCall(ctx.U32[1], ctx.read_const_dynamic, addr, offset);
} else { //} else {
return ctx.OpFunctionCall(ctx.U32[1], ctx.read_const, addr, offset, // return ctx.OpFunctionCall(ctx.U32[1], ctx.read_const, addr, offset,
ctx.ConstU32(flatbuf_off_dw)); // ctx.ConstU32(flatbuf_off_dw));
} //}
} }
template <PointerType type> template <PointerType type>

View File

@ -136,6 +136,10 @@ void CollectShaderInfoPass(IR::Program& program) {
} }
} }
program.info.readconst_types = Info::ReadConstType::None;
program.info.dma_types = IR::Type::Void;
return;
if (program.info.dma_types != IR::Type::Void) { if (program.info.dma_types != IR::Type::Void) {
program.info.buffers.push_back({ program.info.buffers.push_back({
.used_types = IR::Type::U64, .used_types = IR::Type::U64,

View File

@ -7,6 +7,7 @@
#include "common/alignment.h" #include "common/alignment.h"
#include "common/debug.h" #include "common/debug.h"
#include "common/div_ceil.h" #include "common/div_ceil.h"
#include "common/thread.h"
#include "common/types.h" #include "common/types.h"
#include "core/memory.h" #include "core/memory.h"
#include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/liverpool.h"
@ -27,7 +28,7 @@ static constexpr size_t UboStreamBufferSize = 128_MB;
static constexpr size_t DownloadBufferSize = 128_MB; static constexpr size_t DownloadBufferSize = 128_MB;
static constexpr size_t DeviceBufferSize = 128_MB; static constexpr size_t DeviceBufferSize = 128_MB;
static constexpr size_t MaxPageFaults = 1024; static constexpr size_t MaxPageFaults = 1024;
static constexpr size_t DownloadSizeThreshold = 1_MB; static constexpr size_t DownloadSizeThreshold = 512_KB;
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_, AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_,
@ -128,17 +129,25 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
"Fault Buffer Parser Pipeline"); "Fault Buffer Parser Pipeline");
instance.GetDevice().destroyShaderModule(module); instance.GetDevice().destroyShaderModule(module);
async_download_thread = std::jthread{std::bind_front(&BufferCache::DownloadThread, this)};
} }
BufferCache::~BufferCache() = default; BufferCache::~BufferCache() = default;
void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) { void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) {
const bool is_tracked = IsRegionRegistered(device_addr, size); const bool is_tracked = IsRegionRegistered(device_addr, size);
if (is_tracked) { if (!is_tracked) {
return;
}
// Wait for any pending downloads to this page.
const u64 target_tick = page_table[device_addr >> CACHING_PAGEBITS].target_tick;
WaitForTargetTick(target_tick);
// Mark the page as CPU modified to stop tracking writes. // Mark the page as CPU modified to stop tracking writes.
memory_tracker.MarkRegionAsCpuModified(device_addr, size); memory_tracker.MarkRegionAsCpuModified(device_addr, size);
} }
}
void BufferCache::ReadMemory(VAddr device_addr, u64 size) { void BufferCache::ReadMemory(VAddr device_addr, u64 size) {
if (!memory_tracker.IsRegionGpuModified(device_addr, size)) { if (!memory_tracker.IsRegionGpuModified(device_addr, size)) {
@ -215,6 +224,11 @@ bool BufferCache::CommitPendingDownloads(bool wait_done) {
pending_download_ranges.ForEach([&](VAddr interval_lower, VAddr interval_upper) { pending_download_ranges.ForEach([&](VAddr interval_lower, VAddr interval_upper) {
const std::size_t size = interval_upper - interval_lower; const std::size_t size = interval_upper - interval_lower;
const VAddr device_addr = interval_lower; const VAddr device_addr = interval_lower;
const u64 page_begin = device_addr >> CACHING_PAGEBITS;
const u64 page_end = Common::DivCeil(device_addr + size, CACHING_PAGESIZE);
for (u64 page = page_begin; page != page_end; ++page) {
page_table[page].target_tick = current_download_tick;
}
ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) { ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
const VAddr buffer_start = buffer.CpuAddr(); const VAddr buffer_start = buffer.CpuAddr();
const VAddr buffer_end = buffer_start + buffer.SizeBytes(); const VAddr buffer_end = buffer_start + buffer.SizeBytes();
@ -257,14 +271,14 @@ bool BufferCache::CommitPendingDownloads(bool wait_done) {
Buffer& buffer = slot_buffers[buffer_id]; Buffer& buffer = slot_buffers[buffer_id];
cmdbuf.copyBuffer(buffer.Handle(), download_buffer.Handle(), buffer_copies); cmdbuf.copyBuffer(buffer.Handle(), download_buffer.Handle(), buffer_copies);
} }
scheduler.DeferOperation([this, download, offset, copies]() { const auto writeback_host = [this, download, offset, copies = std::move(copies)]() {
auto* memory = Core::Memory::Instance(); auto* memory = Core::Memory::Instance();
for (auto it = copies.begin(); it != copies.end(); ++it) { for (auto it = copies.begin(); it != copies.end(); ++it) {
auto& buffer_copies = it.value(); auto& buffer_copies = it.value();
const BufferId buffer_id = it.key(); const BufferId buffer_id = it.key();
Buffer& buffer = slot_buffers[buffer_id]; const VAddr buffer_base = slot_buffers[buffer_id].CpuAddr();
for (auto& copy : buffer_copies) { for (auto& copy : buffer_copies) {
const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset; const VAddr copy_device_addr = buffer_base + copy.srcOffset;
const u64 dst_offset = copy.dstOffset - offset; const u64 dst_offset = copy.dstOffset - offset;
if (!memory->TryWriteBacking(std::bit_cast<u8*>(copy_device_addr), if (!memory->TryWriteBacking(std::bit_cast<u8*>(copy_device_addr),
download + dst_offset, copy.size)) { download + dst_offset, copy.size)) {
@ -273,12 +287,18 @@ bool BufferCache::CommitPendingDownloads(bool wait_done) {
} }
} }
} }
}); };
if (wait_done) { {
scheduler.Finish(); std::scoped_lock lk{queue_mutex};
} else { async_downloads.emplace(std::move(writeback_host), scheduler.CurrentTick(),
scheduler.Flush(); current_download_tick);
} }
queue_cv.notify_one();
scheduler.Flush();
if (wait_done) {
WaitForTargetTick(current_download_tick);
}
++current_download_tick;
return true; return true;
} }
@ -1233,4 +1253,29 @@ void BufferCache::DeleteBuffer(BufferId buffer_id) {
buffer.is_deleted = true; buffer.is_deleted = true;
} }
void BufferCache::DownloadThread(std::stop_token stoken) {
Common::SetCurrentThreadName("shadPS4:GpuCommandProcessor");
while (!stoken.stop_requested()) {
PendingDownload download;
{
std::unique_lock lk{queue_mutex};
Common::CondvarWait(queue_cv, lk, stoken, [this] { return !async_downloads.empty(); });
if (stoken.stop_requested()) {
break;
}
download = std::move(async_downloads.front());
async_downloads.pop();
}
// Wait for GPU to complete its work and writeback data to host
scheduler.Wait(download.gpu_tick);
download.callback();
// Signal completion of download
download_tick.store(download.signal_tick);
download_tick.notify_all();
}
}
} // namespace VideoCore } // namespace VideoCore

View File

@ -3,10 +3,15 @@
#pragma once #pragma once
#include <atomic>
#include <condition_variable>
#include <shared_mutex> #include <shared_mutex>
#include <thread>
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include <queue>
#include "common/slot_vector.h" #include "common/slot_vector.h"
#include "common/types.h" #include "common/types.h"
#include "common/unique_function.h"
#include "video_core/buffer_cache/buffer.h" #include "video_core/buffer_cache/buffer.h"
#include "video_core/buffer_cache/memory_tracker.h" #include "video_core/buffer_cache/memory_tracker.h"
#include "video_core/buffer_cache/range_set.h" #include "video_core/buffer_cache/range_set.h"
@ -51,7 +56,7 @@ public:
struct PageData { struct PageData {
BufferId buffer_id{}; BufferId buffer_id{};
u64 fence_tick; u64 target_tick{};
}; };
struct Traits { struct Traits {
@ -176,6 +181,14 @@ private:
return !buffer_id || slot_buffers[buffer_id].is_deleted; return !buffer_id || slot_buffers[buffer_id].is_deleted;
} }
inline void WaitForTargetTick(u64 target_tick) {
u64 tick = download_tick.load();
while (tick < target_tick) {
download_tick.wait(tick);
tick = download_tick.load();
}
}
void DownloadBufferMemory(const Buffer& buffer, VAddr device_addr, u64 size); void DownloadBufferMemory(const Buffer& buffer, VAddr device_addr, u64 size);
[[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size); [[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size);
@ -201,6 +214,8 @@ private:
void DeleteBuffer(BufferId buffer_id); void DeleteBuffer(BufferId buffer_id);
void DownloadThread(std::stop_token token);
const Vulkan::Instance& instance; const Vulkan::Instance& instance;
Vulkan::Scheduler& scheduler; Vulkan::Scheduler& scheduler;
AmdGpu::Liverpool* liverpool; AmdGpu::Liverpool* liverpool;
@ -224,6 +239,17 @@ private:
vk::UniqueDescriptorSetLayout fault_process_desc_layout; vk::UniqueDescriptorSetLayout fault_process_desc_layout;
vk::UniquePipeline fault_process_pipeline; vk::UniquePipeline fault_process_pipeline;
vk::UniquePipelineLayout fault_process_pipeline_layout; vk::UniquePipelineLayout fault_process_pipeline_layout;
std::jthread async_download_thread;
struct PendingDownload {
Common::UniqueFunction<void> callback;
u64 gpu_tick;
u64 signal_tick;
};
std::mutex queue_mutex;
std::condition_variable_any queue_cv;
std::queue<PendingDownload> async_downloads;
u64 current_download_tick{0};
std::atomic<u64> download_tick{1};
}; };
} // namespace VideoCore } // namespace VideoCore