mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-22 18:15:14 +00:00
buffer_cache: Use separate thread for handling downloads
This commit is contained in:
parent
0d83fbf61b
commit
1af9e17176
@ -166,14 +166,21 @@ void EmitGetGotoVariable(EmitContext&) {
|
||||
using PointerType = EmitContext::PointerType;
|
||||
|
||||
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) {
|
||||
const u32 flatbuf_off_dw = inst->Flags<u32>();
|
||||
const u32 flatbuf_offset = inst->Flags<u32>();
|
||||
const auto& flatbuf_buffer{ctx.buffers.back()};
|
||||
ASSERT(flatbuf_buffer.binding >= 0 && flatbuf_buffer.buffer_type == BufferType::Flatbuf);
|
||||
const auto [flatbuf_buffer_id, flatbuf_pointer_type] = flatbuf_buffer[PointerType::U32];
|
||||
const auto ptr{ctx.OpAccessChain(flatbuf_pointer_type, flatbuf_buffer_id, ctx.u32_zero_value,
|
||||
ctx.ConstU32(flatbuf_offset))};
|
||||
return ctx.OpLoad(ctx.U32[1], ptr);
|
||||
|
||||
// We can only provide a fallback for immediate offsets.
|
||||
if (flatbuf_off_dw == 0) {
|
||||
return ctx.OpFunctionCall(ctx.U32[1], ctx.read_const_dynamic, addr, offset);
|
||||
} else {
|
||||
return ctx.OpFunctionCall(ctx.U32[1], ctx.read_const, addr, offset,
|
||||
ctx.ConstU32(flatbuf_off_dw));
|
||||
}
|
||||
// if (flatbuf_off_dw == 0) {
|
||||
// return ctx.OpFunctionCall(ctx.U32[1], ctx.read_const_dynamic, addr, offset);
|
||||
//} else {
|
||||
// return ctx.OpFunctionCall(ctx.U32[1], ctx.read_const, addr, offset,
|
||||
// ctx.ConstU32(flatbuf_off_dw));
|
||||
//}
|
||||
}
|
||||
|
||||
template <PointerType type>
|
||||
|
@ -136,6 +136,10 @@ void CollectShaderInfoPass(IR::Program& program) {
|
||||
}
|
||||
}
|
||||
|
||||
program.info.readconst_types = Info::ReadConstType::None;
|
||||
program.info.dma_types = IR::Type::Void;
|
||||
return;
|
||||
|
||||
if (program.info.dma_types != IR::Type::Void) {
|
||||
program.info.buffers.push_back({
|
||||
.used_types = IR::Type::U64,
|
||||
|
@ -421,8 +421,8 @@ struct PM4CmdEventWriteEop {
|
||||
PM4Type3Header header;
|
||||
union {
|
||||
u32 event_control;
|
||||
BitField<0, 6, EventType> event_type; ///< Event type written to VGT_EVENT_INITIATOR
|
||||
BitField<8, 4, u32> event_index; ///< Event index
|
||||
BitField<0, 6, EventType> event_type; ///< Event type written to VGT_EVENT_INITIATOR
|
||||
BitField<8, 4, u32> event_index; ///< Event index
|
||||
};
|
||||
u32 address_lo;
|
||||
union {
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "common/alignment.h"
|
||||
#include "common/debug.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "common/thread.h"
|
||||
#include "common/types.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
@ -27,7 +28,7 @@ static constexpr size_t UboStreamBufferSize = 128_MB;
|
||||
static constexpr size_t DownloadBufferSize = 128_MB;
|
||||
static constexpr size_t DeviceBufferSize = 128_MB;
|
||||
static constexpr size_t MaxPageFaults = 1024;
|
||||
static constexpr size_t DownloadSizeThreshold = 1_MB;
|
||||
static constexpr size_t DownloadSizeThreshold = 512_KB;
|
||||
|
||||
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||
AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_,
|
||||
@ -128,16 +129,24 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
||||
"Fault Buffer Parser Pipeline");
|
||||
|
||||
instance.GetDevice().destroyShaderModule(module);
|
||||
|
||||
async_download_thread = std::jthread{std::bind_front(&BufferCache::DownloadThread, this)};
|
||||
}
|
||||
|
||||
BufferCache::~BufferCache() = default;
|
||||
|
||||
void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) {
|
||||
const bool is_tracked = IsRegionRegistered(device_addr, size);
|
||||
if (is_tracked) {
|
||||
// Mark the page as CPU modified to stop tracking writes.
|
||||
memory_tracker.MarkRegionAsCpuModified(device_addr, size);
|
||||
if (!is_tracked) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Wait for any pending downloads to this page.
|
||||
const u64 target_tick = page_table[device_addr >> CACHING_PAGEBITS].target_tick;
|
||||
WaitForTargetTick(target_tick);
|
||||
|
||||
// Mark the page as CPU modified to stop tracking writes.
|
||||
memory_tracker.MarkRegionAsCpuModified(device_addr, size);
|
||||
}
|
||||
|
||||
void BufferCache::ReadMemory(VAddr device_addr, u64 size) {
|
||||
@ -215,6 +224,11 @@ bool BufferCache::CommitPendingDownloads(bool wait_done) {
|
||||
pending_download_ranges.ForEach([&](VAddr interval_lower, VAddr interval_upper) {
|
||||
const std::size_t size = interval_upper - interval_lower;
|
||||
const VAddr device_addr = interval_lower;
|
||||
const u64 page_begin = device_addr >> CACHING_PAGEBITS;
|
||||
const u64 page_end = Common::DivCeil(device_addr + size, CACHING_PAGESIZE);
|
||||
for (u64 page = page_begin; page != page_end; ++page) {
|
||||
page_table[page].target_tick = current_download_tick;
|
||||
}
|
||||
ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
|
||||
const VAddr buffer_start = buffer.CpuAddr();
|
||||
const VAddr buffer_end = buffer_start + buffer.SizeBytes();
|
||||
@ -257,14 +271,14 @@ bool BufferCache::CommitPendingDownloads(bool wait_done) {
|
||||
Buffer& buffer = slot_buffers[buffer_id];
|
||||
cmdbuf.copyBuffer(buffer.Handle(), download_buffer.Handle(), buffer_copies);
|
||||
}
|
||||
scheduler.DeferOperation([this, download, offset, copies]() {
|
||||
const auto writeback_host = [this, download, offset, copies = std::move(copies)]() {
|
||||
auto* memory = Core::Memory::Instance();
|
||||
for (auto it = copies.begin(); it != copies.end(); ++it) {
|
||||
auto& buffer_copies = it.value();
|
||||
const BufferId buffer_id = it.key();
|
||||
Buffer& buffer = slot_buffers[buffer_id];
|
||||
const VAddr buffer_base = slot_buffers[buffer_id].CpuAddr();
|
||||
for (auto& copy : buffer_copies) {
|
||||
const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset;
|
||||
const VAddr copy_device_addr = buffer_base + copy.srcOffset;
|
||||
const u64 dst_offset = copy.dstOffset - offset;
|
||||
if (!memory->TryWriteBacking(std::bit_cast<u8*>(copy_device_addr),
|
||||
download + dst_offset, copy.size)) {
|
||||
@ -273,12 +287,18 @@ bool BufferCache::CommitPendingDownloads(bool wait_done) {
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
if (wait_done) {
|
||||
scheduler.Finish();
|
||||
} else {
|
||||
scheduler.Flush();
|
||||
};
|
||||
{
|
||||
std::scoped_lock lk{queue_mutex};
|
||||
async_downloads.emplace(std::move(writeback_host), scheduler.CurrentTick(),
|
||||
current_download_tick);
|
||||
}
|
||||
queue_cv.notify_one();
|
||||
scheduler.Flush();
|
||||
if (wait_done) {
|
||||
WaitForTargetTick(current_download_tick);
|
||||
}
|
||||
++current_download_tick;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1233,4 +1253,29 @@ void BufferCache::DeleteBuffer(BufferId buffer_id) {
|
||||
buffer.is_deleted = true;
|
||||
}
|
||||
|
||||
void BufferCache::DownloadThread(std::stop_token stoken) {
|
||||
Common::SetCurrentThreadName("shadPS4:GpuCommandProcessor");
|
||||
|
||||
while (!stoken.stop_requested()) {
|
||||
PendingDownload download;
|
||||
{
|
||||
std::unique_lock lk{queue_mutex};
|
||||
Common::CondvarWait(queue_cv, lk, stoken, [this] { return !async_downloads.empty(); });
|
||||
if (stoken.stop_requested()) {
|
||||
break;
|
||||
}
|
||||
download = std::move(async_downloads.front());
|
||||
async_downloads.pop();
|
||||
}
|
||||
|
||||
// Wait for GPU to complete its work and writeback data to host
|
||||
scheduler.Wait(download.gpu_tick);
|
||||
download.callback();
|
||||
|
||||
// Signal completion of download
|
||||
download_tick.store(download.signal_tick);
|
||||
download_tick.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
||||
|
@ -3,10 +3,15 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <shared_mutex>
|
||||
#include <thread>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <queue>
|
||||
#include "common/slot_vector.h"
|
||||
#include "common/types.h"
|
||||
#include "common/unique_function.h"
|
||||
#include "video_core/buffer_cache/buffer.h"
|
||||
#include "video_core/buffer_cache/memory_tracker.h"
|
||||
#include "video_core/buffer_cache/range_set.h"
|
||||
@ -51,7 +56,7 @@ public:
|
||||
|
||||
struct PageData {
|
||||
BufferId buffer_id{};
|
||||
u64 fence_tick;
|
||||
u64 target_tick{};
|
||||
};
|
||||
|
||||
struct Traits {
|
||||
@ -176,6 +181,14 @@ private:
|
||||
return !buffer_id || slot_buffers[buffer_id].is_deleted;
|
||||
}
|
||||
|
||||
inline void WaitForTargetTick(u64 target_tick) {
|
||||
u64 tick = download_tick.load();
|
||||
while (tick < target_tick) {
|
||||
download_tick.wait(tick);
|
||||
tick = download_tick.load();
|
||||
}
|
||||
}
|
||||
|
||||
void DownloadBufferMemory(const Buffer& buffer, VAddr device_addr, u64 size);
|
||||
|
||||
[[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size);
|
||||
@ -201,6 +214,8 @@ private:
|
||||
|
||||
void DeleteBuffer(BufferId buffer_id);
|
||||
|
||||
void DownloadThread(std::stop_token token);
|
||||
|
||||
const Vulkan::Instance& instance;
|
||||
Vulkan::Scheduler& scheduler;
|
||||
AmdGpu::Liverpool* liverpool;
|
||||
@ -224,6 +239,17 @@ private:
|
||||
vk::UniqueDescriptorSetLayout fault_process_desc_layout;
|
||||
vk::UniquePipeline fault_process_pipeline;
|
||||
vk::UniquePipelineLayout fault_process_pipeline_layout;
|
||||
std::jthread async_download_thread;
|
||||
struct PendingDownload {
|
||||
Common::UniqueFunction<void> callback;
|
||||
u64 gpu_tick;
|
||||
u64 signal_tick;
|
||||
};
|
||||
std::mutex queue_mutex;
|
||||
std::condition_variable_any queue_cv;
|
||||
std::queue<PendingDownload> async_downloads;
|
||||
u64 current_download_tick{0};
|
||||
std::atomic<u64> download_tick{1};
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
||||
|
Loading…
Reference in New Issue
Block a user