mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-28 04:54:30 +00:00
Import memory
This commit is contained in:
parent
6c39bf229c
commit
52253b45fb
2
externals/MoltenVK/MoltenVK
vendored
2
externals/MoltenVK/MoltenVK
vendored
@ -1 +1 @@
|
||||
Subproject commit 87a8e8b13d4ad8835367fea1ebad1896d0460946
|
||||
Subproject commit 4cf8f94684c53e581eb9cc694dd3305d1f7d9959
|
2
externals/MoltenVK/SPIRV-Cross
vendored
2
externals/MoltenVK/SPIRV-Cross
vendored
@ -1 +1 @@
|
||||
Subproject commit 7918775748c5e2f5c40d9918ce68825035b5a1e1
|
||||
Subproject commit 2275d0efc4f2fa46851035d9d3c67c105bc8b99e
|
2
externals/sirit
vendored
2
externals/sirit
vendored
@ -1 +1 @@
|
||||
Subproject commit 09a1416ab1b59ddfebd2618412f118f2004f3b2c
|
||||
Subproject commit 3af59da1d2a6974bcecbae45d4e734570248f814
|
@ -60,7 +60,7 @@ public:
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
[[nodiscard]] SlotId insert(Args&&... args) noexcept {
|
||||
SlotId insert(Args&&... args) noexcept {
|
||||
const u32 index = FreeValueIndex();
|
||||
new (&values[index].object) T(std::forward<Args>(args)...);
|
||||
SetStorageBit(index);
|
||||
|
@ -112,6 +112,103 @@ Buffer::Buffer(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||
is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
}
|
||||
|
||||
ImportedHostBuffer::ImportedHostBuffer(const Vulkan::Instance& instance_,
|
||||
Vulkan::Scheduler& scheduler_, void* cpu_addr_,
|
||||
u64 size_bytes_, vk::BufferUsageFlags flags)
|
||||
: cpu_addr{cpu_addr_}, size_bytes{size_bytes_}, instance{&instance_}, scheduler{&scheduler_} {
|
||||
ASSERT_MSG(size_bytes > 0, "Size must be greater than 0");
|
||||
ASSERT_MSG(cpu_addr != 0, "CPU address must not be null");
|
||||
const vk::DeviceSize alignment = instance->GetExternalHostMemoryHostAlignment();
|
||||
ASSERT_MSG(reinterpret_cast<u64>(cpu_addr) % alignment == 0,
|
||||
"CPU address {:#x} is not aligned to {:#x}", cpu_addr, alignment);
|
||||
ASSERT_MSG(size_bytes % alignment == 0, "Size {:#x} is not aligned to {:#x}", size_bytes,
|
||||
alignment);
|
||||
|
||||
const auto& mem_props = instance->GetMemoryProperties();
|
||||
auto ptr_props_result = instance->GetDevice().getMemoryHostPointerPropertiesEXT(
|
||||
vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, cpu_addr);
|
||||
ASSERT_MSG(ptr_props_result.result == vk::Result::eSuccess,
|
||||
"Failed getting host pointer properties with error {}",
|
||||
vk::to_string(ptr_props_result.result));
|
||||
auto ptr_props = ptr_props_result.value;
|
||||
u32 memory_type_index = UINT32_MAX;
|
||||
for (u32 i = 0; i < mem_props.memoryTypeCount; ++i) {
|
||||
if ((ptr_props.memoryTypeBits & (1 << i)) != 0) {
|
||||
if (mem_props.memoryTypes[i].propertyFlags & (vk::MemoryPropertyFlagBits::eHostVisible |
|
||||
vk::MemoryPropertyFlagBits::eHostCoherent)) {
|
||||
memory_type_index = i;
|
||||
// We prefer cache coherent memory types.
|
||||
if (mem_props.memoryTypes[i].propertyFlags &
|
||||
vk::MemoryPropertyFlagBits::eHostCached) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ASSERT_MSG(memory_type_index != UINT32_MAX,
|
||||
"Failed to find a host visible memory type for the imported host buffer");
|
||||
|
||||
const bool with_bda = bool(flags & vk::BufferUsageFlagBits::eShaderDeviceAddress);
|
||||
vk::ExternalMemoryBufferCreateInfo external_info{
|
||||
.handleTypes = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT,
|
||||
};
|
||||
vk::BufferCreateInfo buffer_ci{
|
||||
.pNext = &external_info,
|
||||
.size = size_bytes,
|
||||
.usage = flags,
|
||||
};
|
||||
vk::ImportMemoryHostPointerInfoEXT import_info{
|
||||
.handleType = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT,
|
||||
.pHostPointer = reinterpret_cast<void*>(cpu_addr),
|
||||
};
|
||||
vk::MemoryAllocateFlagsInfo memory_flags_info{
|
||||
.pNext = &import_info,
|
||||
.flags = with_bda ? vk::MemoryAllocateFlagBits::eDeviceAddress : vk::MemoryAllocateFlags{},
|
||||
};
|
||||
vk::MemoryAllocateInfo alloc_ci{
|
||||
.pNext = &memory_flags_info,
|
||||
.allocationSize = size_bytes,
|
||||
.memoryTypeIndex = memory_type_index,
|
||||
};
|
||||
|
||||
auto buffer_result = instance->GetDevice().createBuffer(buffer_ci);
|
||||
ASSERT_MSG(buffer_result.result == vk::Result::eSuccess,
|
||||
"Failed creating imported host buffer with error {}",
|
||||
vk::to_string(buffer_result.result));
|
||||
buffer = buffer_result.value;
|
||||
|
||||
auto device_memory_result = instance->GetDevice().allocateMemory(alloc_ci);
|
||||
if (device_memory_result.result != vk::Result::eSuccess) {
|
||||
// May fail to import the host memory if it is backed by a file. (AMD on Linux)
|
||||
instance->GetDevice().destroyBuffer(buffer);
|
||||
has_failed = true;
|
||||
return;
|
||||
}
|
||||
device_memory = device_memory_result.value;
|
||||
|
||||
auto result = instance->GetDevice().bindBufferMemory(buffer, device_memory, 0);
|
||||
ASSERT_MSG(result == vk::Result::eSuccess,
|
||||
"Failed binding imported host buffer with error {}",
|
||||
vk::to_string(result));
|
||||
|
||||
if (with_bda) {
|
||||
vk::BufferDeviceAddressInfo bda_info{
|
||||
.buffer = buffer,
|
||||
};
|
||||
bda_addr = instance->GetDevice().getBufferAddress(bda_info);
|
||||
ASSERT_MSG(bda_addr != 0, "Failed getting buffer device address");
|
||||
}
|
||||
}
|
||||
|
||||
ImportedHostBuffer::~ImportedHostBuffer() {
|
||||
if (!buffer) {
|
||||
return;
|
||||
}
|
||||
const auto device = instance->GetDevice();
|
||||
device.destroyBuffer(buffer);
|
||||
device.freeMemory(device_memory);
|
||||
}
|
||||
|
||||
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
|
||||
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
|
||||
|
||||
|
@ -156,6 +156,72 @@ public:
|
||||
vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eAllCommands};
|
||||
};
|
||||
|
||||
class ImportedHostBuffer {
|
||||
public:
|
||||
ImportedHostBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
||||
void* cpu_addr_, u64 size_bytes_, vk::BufferUsageFlags flags);
|
||||
~ImportedHostBuffer();
|
||||
|
||||
|
||||
ImportedHostBuffer& operator=(const ImportedHostBuffer&) = delete;
|
||||
ImportedHostBuffer(const ImportedHostBuffer&) = delete;
|
||||
|
||||
ImportedHostBuffer(ImportedHostBuffer&& other)
|
||||
: size_bytes{std::exchange(other.size_bytes, 0)},
|
||||
cpu_addr{std::exchange(other.cpu_addr, nullptr)},
|
||||
bda_addr{std::exchange(other.bda_addr, 0)},
|
||||
instance{other.instance}, scheduler{other.scheduler},
|
||||
buffer{std::exchange(other.buffer, VK_NULL_HANDLE)},
|
||||
device_memory{std::exchange(other.device_memory, VK_NULL_HANDLE)},
|
||||
has_failed{std::exchange(other.has_failed, false)} {}
|
||||
ImportedHostBuffer& operator=(ImportedHostBuffer&& other) {
|
||||
size_bytes = std::exchange(other.size_bytes, 0);
|
||||
cpu_addr = std::exchange(other.cpu_addr, nullptr);
|
||||
bda_addr = std::exchange(other.bda_addr, false);
|
||||
instance = other.instance;
|
||||
scheduler = other.scheduler;
|
||||
buffer = std::exchange(other.buffer, VK_NULL_HANDLE);
|
||||
device_memory = std::exchange(other.device_memory, VK_NULL_HANDLE);
|
||||
has_failed = std::exchange(other.has_failed, false);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Returns the base CPU address of the buffer
|
||||
void* CpuAddr() const noexcept {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
// Returns the handle to the Vulkan buffer
|
||||
vk::Buffer Handle() const noexcept {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
// Returns the size of the buffer in bytes
|
||||
size_t SizeBytes() const noexcept {
|
||||
return size_bytes;
|
||||
}
|
||||
|
||||
// Returns true if the buffer failed to be created
|
||||
bool HasFailed() const noexcept {
|
||||
return has_failed;
|
||||
}
|
||||
|
||||
// Returns the Buffer Device Address of the buffer
|
||||
vk::DeviceAddress BufferDeviceAddress() const noexcept {
|
||||
ASSERT_MSG(bda_addr != 0, "Can't get BDA from a non BDA buffer");
|
||||
return bda_addr;
|
||||
}
|
||||
private:
|
||||
size_t size_bytes = 0;
|
||||
void* cpu_addr = 0;
|
||||
vk::DeviceAddress bda_addr = 0;
|
||||
const Vulkan::Instance* instance;
|
||||
Vulkan::Scheduler* scheduler;
|
||||
vk::Buffer buffer;
|
||||
vk::DeviceMemory device_memory;
|
||||
bool has_failed = false;
|
||||
};
|
||||
|
||||
class StreamBuffer : public Buffer {
|
||||
public:
|
||||
explicit StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
||||
|
@ -26,6 +26,7 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
||||
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
||||
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
||||
gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize},
|
||||
bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, BDA_PAGETABLE_SIZE},
|
||||
memory_tracker{&tracker} {
|
||||
Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer");
|
||||
|
||||
@ -206,44 +207,30 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
|
||||
memcpy(std::bit_cast<void*>(address), value, num_bytes);
|
||||
return;
|
||||
}
|
||||
scheduler.EndRendering();
|
||||
const Buffer* buffer = [&] {
|
||||
Buffer* buffer = [&] {
|
||||
if (is_gds) {
|
||||
return &gds_buffer;
|
||||
}
|
||||
const BufferId buffer_id = FindBuffer(address, num_bytes);
|
||||
return &slot_buffers[buffer_id];
|
||||
}();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
const vk::BufferMemoryBarrier2 pre_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.buffer = buffer->Handle(),
|
||||
.offset = buffer->Offset(address),
|
||||
.size = num_bytes,
|
||||
};
|
||||
const vk::BufferMemoryBarrier2 post_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.buffer = buffer->Handle(),
|
||||
.offset = buffer->Offset(address),
|
||||
.size = num_bytes,
|
||||
};
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &pre_barrier,
|
||||
});
|
||||
cmdbuf.updateBuffer(buffer->Handle(), buffer->Offset(address), num_bytes, value);
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &post_barrier,
|
||||
});
|
||||
InlineDataBuffer(*buffer, address, value, num_bytes);
|
||||
}
|
||||
|
||||
void BufferCache::WriteData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
|
||||
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
|
||||
if (!is_gds && !IsRegionRegistered(address, num_bytes)) {
|
||||
memcpy(std::bit_cast<void*>(address), value, num_bytes);
|
||||
return;
|
||||
}
|
||||
Buffer* buffer = [&] {
|
||||
if (is_gds) {
|
||||
return &gds_buffer;
|
||||
}
|
||||
const BufferId buffer_id = FindBuffer(address, num_bytes);
|
||||
return &slot_buffers[buffer_id];
|
||||
}();
|
||||
WriteDataBuffer(*buffer, address, value, num_bytes);
|
||||
}
|
||||
|
||||
std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written,
|
||||
@ -336,6 +323,69 @@ BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) {
|
||||
return CreateBuffer(device_addr, size);
|
||||
}
|
||||
|
||||
void BufferCache::MapMemory(VAddr device_addr, u64 size) {
|
||||
const u64 page_start = device_addr >> BDA_PAGEBITS;
|
||||
const u64 page_end = Common::DivCeil(device_addr + size, BDA_PAGESIZE);
|
||||
// We fill any holes within the given range
|
||||
boost::container::small_vector<u64, 1024> bda_addrs;
|
||||
bool importing_failed = false;
|
||||
u64 range_start = page_start;
|
||||
u64 range_end = page_start;
|
||||
const auto import_range = [&]() {
|
||||
// Import the host memory
|
||||
void* cpu_addr = reinterpret_cast<void*>(range_start << BDA_PAGEBITS);
|
||||
const u64 range_size = (range_end - range_start) << BDA_PAGEBITS;
|
||||
ImportedHostBuffer buffer(instance, scheduler, cpu_addr, range_size, vk::BufferUsageFlagBits::eShaderDeviceAddress | vk::BufferUsageFlagBits::eStorageBuffer);
|
||||
if (buffer.HasFailed()) {
|
||||
importing_failed = true;
|
||||
}
|
||||
// Update BDA page table
|
||||
u64 bda_addr = buffer.BufferDeviceAddress();
|
||||
u64 range = range_end - range_start;
|
||||
bda_addrs.clear();
|
||||
bda_addrs.reserve(range);
|
||||
for (u64 i = 0; i < range; ++i) {
|
||||
// TODO: we may want to mark the page as host imported
|
||||
// to let the shader know so that it can notify us if it
|
||||
// accesses the page, so we can create a GPU local buffer.
|
||||
bda_addrs.push_back(bda_addr + (i << BDA_PAGEBITS));
|
||||
}
|
||||
WriteDataBuffer(bda_pagetable_buffer, range_start * sizeof(u64), bda_addrs.data(),
|
||||
bda_addrs.size() * sizeof(u64));
|
||||
{
|
||||
std::scoped_lock lk{mutex};
|
||||
imported_buffers.emplace_back(std::move(buffer));
|
||||
}
|
||||
};
|
||||
for (; range_end < page_end; ++range_end) {
|
||||
if (!bda_mapped_pages.test(range_end)) {
|
||||
continue;
|
||||
}
|
||||
if (range_start != range_end) {
|
||||
import_range();
|
||||
if (importing_failed) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
range_start = range_end + 1;
|
||||
}
|
||||
if (!importing_failed && range_start != range_end) {
|
||||
import_range();
|
||||
}
|
||||
// Mark the pages as mapped
|
||||
for (u64 page = page_start; page < page_end; ++page) {
|
||||
bda_mapped_pages.set(page);
|
||||
}
|
||||
if (!importing_failed) {
|
||||
return;
|
||||
}
|
||||
// If we failed to import the memory, fall back to copying the whole map
|
||||
// to GPU memory.
|
||||
LOG_INFO(Render_Vulkan, "Failed to import host memory at {:#x} size {:#x}, falling back to copying",
|
||||
device_addr, size);
|
||||
CreateBuffer(device_addr, size);
|
||||
}
|
||||
|
||||
BufferCache::OverlapResult BufferCache::ResolveOverlaps(VAddr device_addr, u32 wanted_size) {
|
||||
static constexpr int STREAM_LEAP_THRESHOLD = 16;
|
||||
boost::container::small_vector<BufferId, 16> overlap_ids;
|
||||
@ -689,6 +739,99 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
|
||||
return true;
|
||||
}
|
||||
|
||||
void BufferCache::InlineDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes) {
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
const vk::BufferMemoryBarrier2 pre_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.buffer = buffer.Handle(),
|
||||
.offset = buffer.Offset(address),
|
||||
.size = num_bytes,
|
||||
};
|
||||
const vk::BufferMemoryBarrier2 post_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.buffer = buffer.Handle(),
|
||||
.offset = buffer.Offset(address),
|
||||
.size = num_bytes,
|
||||
};
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &pre_barrier,
|
||||
});
|
||||
cmdbuf.updateBuffer(buffer.Handle(), buffer.Offset(address), num_bytes, value);
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &post_barrier,
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCache::WriteDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes) {
|
||||
vk::BufferCopy copy = {
|
||||
.srcOffset = 0,
|
||||
.dstOffset = buffer.Offset(address),
|
||||
.size = num_bytes,
|
||||
};
|
||||
vk::Buffer src_buffer = staging_buffer.Handle();
|
||||
if (num_bytes < StagingBufferSize) {
|
||||
const auto [staging, offset] = staging_buffer.Map(num_bytes);
|
||||
std::memcpy(staging + offset, value, num_bytes);
|
||||
copy.srcOffset = offset;
|
||||
staging_buffer.Commit();
|
||||
} else {
|
||||
// For large one time transfers use a temporary host buffer.
|
||||
// RenderDoc can lag quite a bit if the stream buffer is too large.
|
||||
Buffer temp_buffer{instance,
|
||||
scheduler,
|
||||
MemoryUsage::Upload,
|
||||
0,
|
||||
vk::BufferUsageFlagBits::eTransferSrc,
|
||||
num_bytes};
|
||||
src_buffer = temp_buffer.Handle();
|
||||
u8* const staging = temp_buffer.mapped_data.data();
|
||||
std::memcpy(staging, value, num_bytes);
|
||||
scheduler.DeferOperation([buffer = std::move(temp_buffer)]() mutable {});
|
||||
}
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
const vk::BufferMemoryBarrier2 pre_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.buffer = buffer.Handle(),
|
||||
.offset = buffer.Offset(address),
|
||||
.size = num_bytes,
|
||||
};
|
||||
const vk::BufferMemoryBarrier2 post_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
||||
.buffer = buffer.Handle(),
|
||||
.offset = buffer.Offset(address),
|
||||
.size = num_bytes,
|
||||
};
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &pre_barrier,
|
||||
});
|
||||
cmdbuf.copyBuffer(src_buffer, buffer.Handle(), copy);
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &post_barrier,
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCache::DeleteBuffer(BufferId buffer_id) {
|
||||
Buffer& buffer = slot_buffers[buffer_id];
|
||||
Unregister(buffer_id);
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <bitset>
|
||||
#include <shared_mutex>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include "common/div_ceil.h"
|
||||
@ -42,6 +43,11 @@ public:
|
||||
static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS;
|
||||
static constexpr u64 DEVICE_PAGESIZE = 4_KB;
|
||||
|
||||
static constexpr u64 BDA_PAGEBITS = 16;
|
||||
static constexpr u64 BDA_PAGESIZE = u64{1} << BDA_PAGEBITS;
|
||||
static constexpr u64 BDA_NUMPAGES = (u64{1} << (u64(40) - BDA_PAGEBITS));
|
||||
static constexpr u64 BDA_PAGETABLE_SIZE = BDA_NUMPAGES * sizeof(u64);
|
||||
|
||||
struct Traits {
|
||||
using Entry = BufferId;
|
||||
static constexpr size_t AddressSpaceBits = 40;
|
||||
@ -73,6 +79,11 @@ public:
|
||||
return stream_buffer;
|
||||
}
|
||||
|
||||
/// Retrieves the device local DBA page table buffer.
|
||||
[[nodiscard]] Buffer& GetBdaPageTableBuffer() noexcept {
|
||||
return bda_pagetable_buffer;
|
||||
}
|
||||
|
||||
/// Retrieves the buffer with the specified id.
|
||||
[[nodiscard]] Buffer& GetBuffer(BufferId id) {
|
||||
return slot_buffers[id];
|
||||
@ -87,9 +98,12 @@ public:
|
||||
/// Bind host index buffer for the current draw.
|
||||
void BindIndexBuffer(u32 index_offset);
|
||||
|
||||
/// Writes a value to GPU buffer.
|
||||
/// Writes a value to GPU buffer. (uses command buffer to temporarily store the data)
|
||||
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
||||
|
||||
/// Writes a value to GPU buffer. (uses staging buffer to temporarily store the data)
|
||||
void WriteData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
||||
|
||||
/// Obtains a buffer for the specified region.
|
||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,
|
||||
bool is_texel_buffer = false,
|
||||
@ -110,6 +124,8 @@ public:
|
||||
|
||||
[[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size);
|
||||
|
||||
void MapMemory(VAddr device_addr, u64 size);
|
||||
|
||||
private:
|
||||
template <typename Func>
|
||||
void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) {
|
||||
@ -134,7 +150,7 @@ private:
|
||||
|
||||
void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
|
||||
|
||||
[[nodiscard]] BufferId CreateBuffer(VAddr device_addr, u32 wanted_size);
|
||||
BufferId CreateBuffer(VAddr device_addr, u32 wanted_size);
|
||||
|
||||
void Register(BufferId buffer_id);
|
||||
|
||||
@ -147,6 +163,10 @@ private:
|
||||
|
||||
bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size);
|
||||
|
||||
void InlineDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes);
|
||||
|
||||
void WriteDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes);
|
||||
|
||||
void DeleteBuffer(BufferId buffer_id);
|
||||
|
||||
const Vulkan::Instance& instance;
|
||||
@ -157,6 +177,9 @@ private:
|
||||
StreamBuffer staging_buffer;
|
||||
StreamBuffer stream_buffer;
|
||||
Buffer gds_buffer;
|
||||
Buffer bda_pagetable_buffer;
|
||||
std::bitset<BDA_NUMPAGES> bda_mapped_pages;
|
||||
std::vector<ImportedHostBuffer> imported_buffers;
|
||||
std::shared_mutex mutex;
|
||||
Common::SlotVector<Buffer> slot_buffers;
|
||||
RangeSet gpu_modified_ranges;
|
||||
|
@ -147,6 +147,7 @@ Instance::Instance(Frontend::WindowSDL& window, s32 physical_device_index,
|
||||
available_extensions = GetSupportedExtensions(physical_device);
|
||||
format_properties = GetFormatProperties(physical_device);
|
||||
properties = physical_device.getProperties();
|
||||
memory_properties = physical_device.getMemoryProperties();
|
||||
CollectDeviceParameters();
|
||||
ASSERT_MSG(properties.apiVersion >= TargetVulkanApiVersion,
|
||||
"Vulkan {}.{} is required, but only {}.{} is supported by device!",
|
||||
@ -216,10 +217,13 @@ bool Instance::CreateDevice() {
|
||||
|
||||
const vk::StructureChain properties_chain = physical_device.getProperties2<
|
||||
vk::PhysicalDeviceProperties2, vk::PhysicalDeviceVulkan11Properties,
|
||||
vk::PhysicalDeviceVulkan12Properties, vk::PhysicalDevicePushDescriptorPropertiesKHR>();
|
||||
vk::PhysicalDeviceVulkan12Properties, vk::PhysicalDevicePushDescriptorPropertiesKHR,
|
||||
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT>();
|
||||
vk11_props = properties_chain.get<vk::PhysicalDeviceVulkan11Properties>();
|
||||
vk12_props = properties_chain.get<vk::PhysicalDeviceVulkan12Properties>();
|
||||
push_descriptor_props = properties_chain.get<vk::PhysicalDevicePushDescriptorPropertiesKHR>();
|
||||
external_memory_host_props =
|
||||
properties_chain.get<vk::PhysicalDeviceExternalMemoryHostPropertiesEXT>();
|
||||
LOG_INFO(Render_Vulkan, "Physical device subgroup size {}", vk11_props.subgroupSize);
|
||||
|
||||
if (available_extensions.empty()) {
|
||||
@ -371,6 +375,7 @@ bool Instance::CreateDevice() {
|
||||
.separateDepthStencilLayouts = vk12_features.separateDepthStencilLayouts,
|
||||
.hostQueryReset = vk12_features.hostQueryReset,
|
||||
.timelineSemaphore = vk12_features.timelineSemaphore,
|
||||
.bufferDeviceAddress = vk12_features.bufferDeviceAddress,
|
||||
},
|
||||
vk::PhysicalDeviceVulkan13Features{
|
||||
.robustImageAccess = vk13_features.robustImageAccess,
|
||||
@ -500,6 +505,7 @@ void Instance::CreateAllocator() {
|
||||
};
|
||||
|
||||
const VmaAllocatorCreateInfo allocator_info = {
|
||||
.flags = VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT,
|
||||
.physicalDevice = physical_device,
|
||||
.device = *device,
|
||||
.pVulkanFunctions = &functions,
|
||||
|
@ -286,6 +286,11 @@ public:
|
||||
return vk12_props;
|
||||
}
|
||||
|
||||
/// Returns the memory properties of the physical device.
|
||||
const vk::PhysicalDeviceMemoryProperties& GetMemoryProperties() const noexcept {
|
||||
return memory_properties;
|
||||
}
|
||||
|
||||
/// Returns true if shaders can declare the ClipDistance attribute
|
||||
bool IsShaderClipDistanceSupported() const {
|
||||
return features.shaderClipDistance;
|
||||
@ -308,6 +313,11 @@ public:
|
||||
properties.limits.framebufferStencilSampleCounts;
|
||||
}
|
||||
|
||||
/// Returns the minimum alignment for imported host memory.
|
||||
vk::DeviceSize GetExternalHostMemoryHostAlignment() const {
|
||||
return external_memory_host_props.minImportedHostPointerAlignment;
|
||||
}
|
||||
|
||||
/// Returns whether disabling primitive restart is supported.
|
||||
bool IsPrimitiveRestartDisableSupported() const {
|
||||
return driver_id != vk::DriverId::eMoltenvk;
|
||||
@ -335,9 +345,11 @@ private:
|
||||
vk::PhysicalDevice physical_device;
|
||||
vk::UniqueDevice device;
|
||||
vk::PhysicalDeviceProperties properties;
|
||||
vk::PhysicalDeviceMemoryProperties memory_properties;
|
||||
vk::PhysicalDeviceVulkan11Properties vk11_props;
|
||||
vk::PhysicalDeviceVulkan12Properties vk12_props;
|
||||
vk::PhysicalDevicePushDescriptorPropertiesKHR push_descriptor_props;
|
||||
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT external_memory_host_props;
|
||||
vk::PhysicalDeviceFeatures features;
|
||||
vk::PhysicalDevicePortabilitySubsetFeaturesKHR portability_features;
|
||||
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3_features;
|
||||
|
@ -946,6 +946,7 @@ void Rasterizer::MapMemory(VAddr addr, u64 size) {
|
||||
mapped_ranges += decltype(mapped_ranges)::interval_type::right_open(addr, addr + size);
|
||||
}
|
||||
page_manager.OnGpuMap(addr, size);
|
||||
buffer_cache.MapMemory(addr, size);
|
||||
}
|
||||
|
||||
void Rasterizer::UnmapMemory(VAddr addr, u64 size) {
|
||||
|
Loading…
Reference in New Issue
Block a user