mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-27 20:44:28 +00:00
Buffer syncing, faulted readback adn BDA in Buffer
This commit is contained in:
parent
83255ee68f
commit
94a078207f
@ -70,8 +70,10 @@ UniqueBuffer::~UniqueBuffer() {
|
|||||||
|
|
||||||
void UniqueBuffer::Create(const vk::BufferCreateInfo& buffer_ci, MemoryUsage usage,
|
void UniqueBuffer::Create(const vk::BufferCreateInfo& buffer_ci, MemoryUsage usage,
|
||||||
VmaAllocationInfo* out_alloc_info) {
|
VmaAllocationInfo* out_alloc_info) {
|
||||||
|
const bool with_bda = bool(buffer_ci.usage & vk::BufferUsageFlagBits::eShaderDeviceAddress);
|
||||||
|
const VmaAllocationCreateFlags bda_flag = with_bda ? VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT : 0;
|
||||||
const VmaAllocationCreateInfo alloc_ci = {
|
const VmaAllocationCreateInfo alloc_ci = {
|
||||||
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage),
|
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | bda_flag | MemoryUsageVmaFlags(usage),
|
||||||
.usage = MemoryUsageVma(usage),
|
.usage = MemoryUsageVma(usage),
|
||||||
.requiredFlags = 0,
|
.requiredFlags = 0,
|
||||||
.preferredFlags = MemoryUsagePreferredVmaFlags(usage),
|
.preferredFlags = MemoryUsagePreferredVmaFlags(usage),
|
||||||
@ -86,6 +88,15 @@ void UniqueBuffer::Create(const vk::BufferCreateInfo& buffer_ci, MemoryUsage usa
|
|||||||
ASSERT_MSG(result == VK_SUCCESS, "Failed allocating buffer with error {}",
|
ASSERT_MSG(result == VK_SUCCESS, "Failed allocating buffer with error {}",
|
||||||
vk::to_string(vk::Result{result}));
|
vk::to_string(vk::Result{result}));
|
||||||
buffer = vk::Buffer{unsafe_buffer};
|
buffer = vk::Buffer{unsafe_buffer};
|
||||||
|
|
||||||
|
if (with_bda) {
|
||||||
|
vk::BufferDeviceAddressInfo bda_info{
|
||||||
|
.buffer = buffer,
|
||||||
|
};
|
||||||
|
auto bda_result = device.getBufferAddress(bda_info);
|
||||||
|
ASSERT_MSG(bda_result != 0, "Failed to get buffer device address");
|
||||||
|
bda_addr = bda_result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Buffer::Buffer(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, MemoryUsage usage_,
|
Buffer::Buffer(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, MemoryUsage usage_,
|
||||||
|
@ -68,6 +68,7 @@ struct UniqueBuffer {
|
|||||||
VmaAllocator allocator;
|
VmaAllocator allocator;
|
||||||
VmaAllocation allocation;
|
VmaAllocation allocation;
|
||||||
vk::Buffer buffer{};
|
vk::Buffer buffer{};
|
||||||
|
vk::DeviceAddress bda_addr = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
class Buffer {
|
class Buffer {
|
||||||
@ -115,6 +116,11 @@ public:
|
|||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vk::DeviceAddress BufferDeviceAddress() const noexcept {
|
||||||
|
ASSERT_MSG(buffer.bda_addr != 0, "Can't get BDA from a non BDA buffer");
|
||||||
|
return buffer.bda_addr;
|
||||||
|
}
|
||||||
|
|
||||||
std::optional<vk::BufferMemoryBarrier2> GetBarrier(
|
std::optional<vk::BufferMemoryBarrier2> GetBarrier(
|
||||||
vk::Flags<vk::AccessFlagBits2> dst_acess_mask, vk::PipelineStageFlagBits2 dst_stage,
|
vk::Flags<vk::AccessFlagBits2> dst_acess_mask, vk::PipelineStageFlagBits2 dst_stage,
|
||||||
u32 offset = 0) {
|
u32 offset = 0) {
|
||||||
|
@ -27,6 +27,7 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
|||||||
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
||||||
gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize},
|
gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize},
|
||||||
bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, BDA_PAGETABLE_SIZE},
|
bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, BDA_PAGETABLE_SIZE},
|
||||||
|
fault_readback_buffer(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, FAULT_READBACK_SIZE),
|
||||||
memory_tracker{&tracker} {
|
memory_tracker{&tracker} {
|
||||||
Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer");
|
Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer");
|
||||||
|
|
||||||
@ -323,36 +324,36 @@ BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) {
|
|||||||
return CreateBuffer(device_addr, size);
|
return CreateBuffer(device_addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferCache::QueueCoverage(VAddr device_addr, u64 size) {
|
void BufferCache::QueueMemoryImport(VAddr device_addr, u64 size) {
|
||||||
std::scoped_lock lk{mutex};
|
std::scoped_lock lk{mutex};
|
||||||
const u64 start = device_addr;
|
const u64 start = device_addr;
|
||||||
const u64 end = device_addr + size;
|
const u64 end = device_addr + size;
|
||||||
auto queue_range = decltype(covered_regions)::interval_type::right_open(start, end);
|
auto queue_range = decltype(imported_regions)::interval_type::right_open(start, end);
|
||||||
queued_coverage += queue_range;
|
queued_imports += queue_range;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferCache::CoverQueuedRegions() {
|
void BufferCache::ImportQueuedRegions() {
|
||||||
std::scoped_lock lk{mutex};
|
std::scoped_lock lk{mutex};
|
||||||
if (queued_coverage.empty()) {
|
if (queued_imports.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
for (const auto& range : queued_coverage) {
|
for (const auto& range : queued_imports) {
|
||||||
CoverMemory(range.lower(), range.upper());
|
ImportMemory(range.lower(), range.upper());
|
||||||
}
|
}
|
||||||
queued_coverage.clear();
|
queued_imports.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferCache::CoverMemory(u64 start, u64 end) {
|
void BufferCache::ImportMemory(u64 start, u64 end) {
|
||||||
const u64 page_start = start >> CACHING_PAGEBITS;
|
const u64 page_start = start >> CACHING_PAGEBITS;
|
||||||
const u64 page_end = Common::DivCeil(end, CACHING_PAGESIZE);
|
const u64 page_end = Common::DivCeil(end, CACHING_PAGESIZE);
|
||||||
auto interval = decltype(covered_regions)::interval_type::right_open(page_start, page_end);
|
auto interval = decltype(imported_regions)::interval_type::right_open(page_start, page_end);
|
||||||
auto interval_set = boost::icl::interval_set<u64>{interval};
|
auto interval_set = boost::icl::interval_set<u64>{interval};
|
||||||
auto uncovered_ranges = interval_set - covered_regions;
|
auto uncovered_ranges = interval_set - imported_regions;
|
||||||
if (uncovered_ranges.empty()) {
|
if (uncovered_ranges.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// We fill any holes within the given range
|
// We fill any holes within the given range
|
||||||
boost::container::small_vector<u64, 1024> bda_addrs;
|
boost::container::small_vector<vk::DeviceAddress, 128> bda_addrs;
|
||||||
for (const auto& range : uncovered_ranges) {
|
for (const auto& range : uncovered_ranges) {
|
||||||
// import host memory
|
// import host memory
|
||||||
const u64 range_start = range.lower();
|
const u64 range_start = range.lower();
|
||||||
@ -371,19 +372,19 @@ void BufferCache::CoverMemory(u64 start, u64 end) {
|
|||||||
bda_addrs.clear();
|
bda_addrs.clear();
|
||||||
bda_addrs.reserve(range_pages);
|
bda_addrs.reserve(range_pages);
|
||||||
for (u64 i = 0; i < range_pages; ++i) {
|
for (u64 i = 0; i < range_pages; ++i) {
|
||||||
// TODO: we may want to mark the page as host imported
|
// Mark the page as host imported to let the shader know
|
||||||
// to let the shader know so that it can notify us if it
|
// so that it can notify us if it accesses the page, so we can
|
||||||
// accesses the page, so we can create a GPU local buffer.
|
// create a GPU local buffer.
|
||||||
bda_addrs.push_back((bda_addr + (i << CACHING_PAGEBITS)) | 0x1);
|
bda_addrs.push_back((bda_addr + (i << CACHING_PAGEBITS)) | 0x1);
|
||||||
}
|
}
|
||||||
WriteDataBuffer(bda_pagetable_buffer, range_start * sizeof(u64), bda_addrs.data(),
|
WriteDataBuffer(bda_pagetable_buffer, range_start * sizeof(vk::DeviceAddress), bda_addrs.data(),
|
||||||
bda_addrs.size() * sizeof(u64));
|
bda_addrs.size() * sizeof(vk::DeviceAddress));
|
||||||
{
|
{
|
||||||
std::scoped_lock lk{mutex};
|
std::scoped_lock lk{mutex};
|
||||||
imported_buffers.emplace_back(std::move(buffer));
|
imported_buffers.emplace_back(std::move(buffer));
|
||||||
}
|
}
|
||||||
// Mark the pages as covered
|
// Mark the pages as covered
|
||||||
covered_regions += range;
|
imported_regions += range;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -525,9 +526,25 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
|
|||||||
const BufferId new_buffer_id = [&] {
|
const BufferId new_buffer_id = [&] {
|
||||||
std::scoped_lock lk{mutex};
|
std::scoped_lock lk{mutex};
|
||||||
return slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin,
|
return slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin,
|
||||||
AllFlags, size);
|
AllFlags | vk::BufferUsageFlagBits::eShaderDeviceAddress, size);
|
||||||
}();
|
}();
|
||||||
auto& new_buffer = slot_buffers[new_buffer_id];
|
auto& new_buffer = slot_buffers[new_buffer_id];
|
||||||
|
boost::container::small_vector<vk::DeviceAddress, 128> bda_addrs;
|
||||||
|
const u64 start_page = overlap.begin >> CACHING_PAGEBITS;
|
||||||
|
const u64 size_pages = size >> CACHING_PAGEBITS;
|
||||||
|
bda_addrs.reserve(size_pages);
|
||||||
|
for (u64 i = 0; i < size_pages; ++i) {
|
||||||
|
// Here, we do not set the host imported bit.
|
||||||
|
bda_addrs.push_back(new_buffer.BufferDeviceAddress() + (i << CACHING_PAGEBITS));
|
||||||
|
}
|
||||||
|
WriteDataBuffer(bda_pagetable_buffer, start_page * sizeof(vk::DeviceAddress), bda_addrs.data(),
|
||||||
|
bda_addrs.size() * sizeof(vk::DeviceAddress));
|
||||||
|
{
|
||||||
|
// Mark the pages as covered
|
||||||
|
std::scoped_lock lk{mutex};
|
||||||
|
imported_regions += boost::icl::interval_set<u64>::interval_type::right_open(
|
||||||
|
start_page, start_page + size_pages);
|
||||||
|
}
|
||||||
const size_t size_bytes = new_buffer.SizeBytes();
|
const size_t size_bytes = new_buffer.SizeBytes();
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
scheduler.EndRendering();
|
scheduler.EndRendering();
|
||||||
@ -539,6 +556,44 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
|
|||||||
return new_buffer_id;
|
return new_buffer_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BufferCache::CreateFaultBuffers() {
|
||||||
|
// Download the fault readback buffer
|
||||||
|
const auto [mapped, offset] = staging_buffer.Map(FAULT_READBACK_SIZE);
|
||||||
|
vk::BufferCopy copy = {
|
||||||
|
.srcOffset = 0,
|
||||||
|
.dstOffset = offset,
|
||||||
|
.size = FAULT_READBACK_SIZE,
|
||||||
|
};
|
||||||
|
staging_buffer.Commit();
|
||||||
|
scheduler.EndRendering();
|
||||||
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
|
cmdbuf.copyBuffer(fault_readback_buffer.buffer, staging_buffer.Handle(), copy);
|
||||||
|
scheduler.Finish();
|
||||||
|
std::array<u8, FAULT_READBACK_SIZE> buffer{};
|
||||||
|
std::memcpy(buffer.data(), mapped, FAULT_READBACK_SIZE);
|
||||||
|
// Reset the fault readback buffer
|
||||||
|
cmdbuf.fillBuffer(fault_readback_buffer.buffer, 0, FAULT_READBACK_SIZE, 0);
|
||||||
|
// Create the fault buffers batched
|
||||||
|
boost::icl::interval_set<VAddr> fault_ranges;
|
||||||
|
for (u64 i = 0; i < FAULT_READBACK_SIZE / sizeof(vk::DeviceAddress); ++i) {
|
||||||
|
if (buffer[i] != 0) {
|
||||||
|
// Each byte contains information for 8 pages.
|
||||||
|
// We are oing to create an aligned buffer of
|
||||||
|
// 8 * 64 KB = 512 KB arround the fault address.
|
||||||
|
const VAddr fault_addr = buffer[i] << CACHING_PAGEBITS;
|
||||||
|
const u32 fault_end = mapped[i + 1] << CACHING_PAGEBITS;
|
||||||
|
auto range = decltype(fault_ranges)::interval_type::right_open(
|
||||||
|
fault_addr, fault_end);
|
||||||
|
fault_ranges += range;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (const auto& range : fault_ranges) {
|
||||||
|
const VAddr start = range.lower();
|
||||||
|
const u32 size = range.upper() - start;
|
||||||
|
CreateBuffer(start, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void BufferCache::Register(BufferId buffer_id) {
|
void BufferCache::Register(BufferId buffer_id) {
|
||||||
ChangeRegister<true>(buffer_id);
|
ChangeRegister<true>(buffer_id);
|
||||||
}
|
}
|
||||||
@ -740,6 +795,18 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BufferCache::SynchronizeRange(VAddr device_addr, u32 size) {
|
||||||
|
if (device_addr == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
|
||||||
|
if (buffer.is_deleted) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
SynchronizeBuffer(buffer, buffer.CpuAddr(), buffer.SizeBytes(), false);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
void BufferCache::InlineDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes) {
|
void BufferCache::InlineDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes) {
|
||||||
scheduler.EndRendering();
|
scheduler.EndRendering();
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
|
@ -43,7 +43,9 @@ public:
|
|||||||
static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS;
|
static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS;
|
||||||
static constexpr u64 DEVICE_PAGESIZE = 64_KB;
|
static constexpr u64 DEVICE_PAGESIZE = 64_KB;
|
||||||
static constexpr u64 CACHING_NUMPAGES = u64{1} << (40 - CACHING_PAGEBITS);
|
static constexpr u64 CACHING_NUMPAGES = u64{1} << (40 - CACHING_PAGEBITS);
|
||||||
static constexpr u64 BDA_PAGETABLE_SIZE = CACHING_NUMPAGES * sizeof(u64);
|
|
||||||
|
static constexpr u64 BDA_PAGETABLE_SIZE = CACHING_NUMPAGES * sizeof(vk::DeviceAddress);
|
||||||
|
static constexpr u64 FAULT_READBACK_SIZE = CACHING_NUMPAGES / 8; // Bit per page
|
||||||
|
|
||||||
struct Traits {
|
struct Traits {
|
||||||
using Entry = BufferId;
|
using Entry = BufferId;
|
||||||
@ -81,6 +83,11 @@ public:
|
|||||||
return bda_pagetable_buffer;
|
return bda_pagetable_buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Retrieves the fault readback buffer.
|
||||||
|
[[nodiscard]] Buffer& GetFaultReadbackBuffer() noexcept {
|
||||||
|
return fault_readback_buffer;
|
||||||
|
}
|
||||||
|
|
||||||
/// Retrieves the buffer with the specified id.
|
/// Retrieves the buffer with the specified id.
|
||||||
[[nodiscard]] Buffer& GetBuffer(BufferId id) {
|
[[nodiscard]] Buffer& GetBuffer(BufferId id) {
|
||||||
return slot_buffers[id];
|
return slot_buffers[id];
|
||||||
@ -123,10 +130,16 @@ public:
|
|||||||
[[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size);
|
[[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size);
|
||||||
|
|
||||||
/// Queue a region for coverage for DMA.
|
/// Queue a region for coverage for DMA.
|
||||||
void QueueCoverage(VAddr device_addr, u64 size);
|
void QueueMemoryImport(VAddr device_addr, u64 size);
|
||||||
|
|
||||||
/// Covers all queued regions.
|
/// Covers all queued regions.
|
||||||
void CoverQueuedRegions();
|
void ImportQueuedRegions();
|
||||||
|
|
||||||
|
/// Creates buffers for "faulted" shader accesses to host memory.
|
||||||
|
void CreateFaultBuffers();
|
||||||
|
|
||||||
|
/// Synchronizes all buffers in the specified range.
|
||||||
|
void SynchronizeRange(VAddr device_addr, u32 size);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <typename Func>
|
template <typename Func>
|
||||||
@ -171,7 +184,7 @@ private:
|
|||||||
|
|
||||||
void DeleteBuffer(BufferId buffer_id);
|
void DeleteBuffer(BufferId buffer_id);
|
||||||
|
|
||||||
void CoverMemory(u64 start, u64 end);
|
void ImportMemory(u64 start, u64 end);
|
||||||
|
|
||||||
const Vulkan::Instance& instance;
|
const Vulkan::Instance& instance;
|
||||||
Vulkan::Scheduler& scheduler;
|
Vulkan::Scheduler& scheduler;
|
||||||
@ -183,8 +196,9 @@ private:
|
|||||||
StreamBuffer stream_buffer;
|
StreamBuffer stream_buffer;
|
||||||
Buffer gds_buffer;
|
Buffer gds_buffer;
|
||||||
Buffer bda_pagetable_buffer;
|
Buffer bda_pagetable_buffer;
|
||||||
boost::icl::interval_set<VAddr> queued_coverage;
|
Buffer fault_readback_buffer;
|
||||||
boost::icl::interval_set<u64> covered_regions;
|
boost::icl::interval_set<VAddr> queued_imports;
|
||||||
|
boost::icl::interval_set<u64> imported_regions;
|
||||||
std::vector<ImportedHostBuffer> imported_buffers;
|
std::vector<ImportedHostBuffer> imported_buffers;
|
||||||
std::shared_mutex mutex;
|
std::shared_mutex mutex;
|
||||||
Common::SlotVector<Buffer> slot_buffers;
|
Common::SlotVector<Buffer> slot_buffers;
|
||||||
|
@ -946,7 +946,7 @@ void Rasterizer::MapMemory(VAddr addr, u64 size) {
|
|||||||
mapped_ranges += decltype(mapped_ranges)::interval_type::right_open(addr, addr + size);
|
mapped_ranges += decltype(mapped_ranges)::interval_type::right_open(addr, addr + size);
|
||||||
}
|
}
|
||||||
page_manager.OnGpuMap(addr, size);
|
page_manager.OnGpuMap(addr, size);
|
||||||
buffer_cache.QueueCoverage(addr, size);
|
buffer_cache.QueueMemoryImport(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Rasterizer::UnmapMemory(VAddr addr, u64 size) {
|
void Rasterizer::UnmapMemory(VAddr addr, u64 size) {
|
||||||
|
Loading…
Reference in New Issue
Block a user