Import memory

This commit is contained in:
Lander Gallastegi 2025-04-16 01:00:34 +02:00
parent 6c39bf229c
commit 52253b45fb
11 changed files with 387 additions and 39 deletions

@ -1 +1 @@
Subproject commit 87a8e8b13d4ad8835367fea1ebad1896d0460946 Subproject commit 4cf8f94684c53e581eb9cc694dd3305d1f7d9959

@ -1 +1 @@
Subproject commit 7918775748c5e2f5c40d9918ce68825035b5a1e1 Subproject commit 2275d0efc4f2fa46851035d9d3c67c105bc8b99e

2
externals/sirit vendored

@ -1 +1 @@
Subproject commit 09a1416ab1b59ddfebd2618412f118f2004f3b2c Subproject commit 3af59da1d2a6974bcecbae45d4e734570248f814

View File

@ -60,7 +60,7 @@ public:
} }
template <typename... Args> template <typename... Args>
[[nodiscard]] SlotId insert(Args&&... args) noexcept { SlotId insert(Args&&... args) noexcept {
const u32 index = FreeValueIndex(); const u32 index = FreeValueIndex();
new (&values[index].object) T(std::forward<Args>(args)...); new (&values[index].object) T(std::forward<Args>(args)...);
SetStorageBit(index); SetStorageBit(index);

View File

@ -112,6 +112,103 @@ Buffer::Buffer(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
} }
ImportedHostBuffer::ImportedHostBuffer(const Vulkan::Instance& instance_,
Vulkan::Scheduler& scheduler_, void* cpu_addr_,
u64 size_bytes_, vk::BufferUsageFlags flags)
: cpu_addr{cpu_addr_}, size_bytes{size_bytes_}, instance{&instance_}, scheduler{&scheduler_} {
ASSERT_MSG(size_bytes > 0, "Size must be greater than 0");
ASSERT_MSG(cpu_addr != 0, "CPU address must not be null");
const vk::DeviceSize alignment = instance->GetExternalHostMemoryHostAlignment();
ASSERT_MSG(reinterpret_cast<u64>(cpu_addr) % alignment == 0,
"CPU address {:#x} is not aligned to {:#x}", cpu_addr, alignment);
ASSERT_MSG(size_bytes % alignment == 0, "Size {:#x} is not aligned to {:#x}", size_bytes,
alignment);
const auto& mem_props = instance->GetMemoryProperties();
auto ptr_props_result = instance->GetDevice().getMemoryHostPointerPropertiesEXT(
vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, cpu_addr);
ASSERT_MSG(ptr_props_result.result == vk::Result::eSuccess,
"Failed getting host pointer properties with error {}",
vk::to_string(ptr_props_result.result));
auto ptr_props = ptr_props_result.value;
u32 memory_type_index = UINT32_MAX;
for (u32 i = 0; i < mem_props.memoryTypeCount; ++i) {
if ((ptr_props.memoryTypeBits & (1 << i)) != 0) {
if (mem_props.memoryTypes[i].propertyFlags & (vk::MemoryPropertyFlagBits::eHostVisible |
vk::MemoryPropertyFlagBits::eHostCoherent)) {
memory_type_index = i;
// We prefer cache coherent memory types.
if (mem_props.memoryTypes[i].propertyFlags &
vk::MemoryPropertyFlagBits::eHostCached) {
break;
}
}
}
}
ASSERT_MSG(memory_type_index != UINT32_MAX,
"Failed to find a host visible memory type for the imported host buffer");
const bool with_bda = bool(flags & vk::BufferUsageFlagBits::eShaderDeviceAddress);
vk::ExternalMemoryBufferCreateInfo external_info{
.handleTypes = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT,
};
vk::BufferCreateInfo buffer_ci{
.pNext = &external_info,
.size = size_bytes,
.usage = flags,
};
vk::ImportMemoryHostPointerInfoEXT import_info{
.handleType = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT,
.pHostPointer = reinterpret_cast<void*>(cpu_addr),
};
vk::MemoryAllocateFlagsInfo memory_flags_info{
.pNext = &import_info,
.flags = with_bda ? vk::MemoryAllocateFlagBits::eDeviceAddress : vk::MemoryAllocateFlags{},
};
vk::MemoryAllocateInfo alloc_ci{
.pNext = &memory_flags_info,
.allocationSize = size_bytes,
.memoryTypeIndex = memory_type_index,
};
auto buffer_result = instance->GetDevice().createBuffer(buffer_ci);
ASSERT_MSG(buffer_result.result == vk::Result::eSuccess,
"Failed creating imported host buffer with error {}",
vk::to_string(buffer_result.result));
buffer = buffer_result.value;
auto device_memory_result = instance->GetDevice().allocateMemory(alloc_ci);
if (device_memory_result.result != vk::Result::eSuccess) {
// May fail to import the host memory if it is backed by a file. (AMD on Linux)
instance->GetDevice().destroyBuffer(buffer);
has_failed = true;
return;
}
device_memory = device_memory_result.value;
auto result = instance->GetDevice().bindBufferMemory(buffer, device_memory, 0);
ASSERT_MSG(result == vk::Result::eSuccess,
"Failed binding imported host buffer with error {}",
vk::to_string(result));
if (with_bda) {
vk::BufferDeviceAddressInfo bda_info{
.buffer = buffer,
};
bda_addr = instance->GetDevice().getBufferAddress(bda_info);
ASSERT_MSG(bda_addr != 0, "Failed getting buffer device address");
}
}
ImportedHostBuffer::~ImportedHostBuffer() {
if (!buffer) {
return;
}
const auto device = instance->GetDevice();
device.destroyBuffer(buffer);
device.freeMemory(device_memory);
}
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;

View File

@ -156,6 +156,72 @@ public:
vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eAllCommands}; vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eAllCommands};
}; };
class ImportedHostBuffer {
public:
ImportedHostBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
void* cpu_addr_, u64 size_bytes_, vk::BufferUsageFlags flags);
~ImportedHostBuffer();
ImportedHostBuffer& operator=(const ImportedHostBuffer&) = delete;
ImportedHostBuffer(const ImportedHostBuffer&) = delete;
ImportedHostBuffer(ImportedHostBuffer&& other)
: size_bytes{std::exchange(other.size_bytes, 0)},
cpu_addr{std::exchange(other.cpu_addr, nullptr)},
bda_addr{std::exchange(other.bda_addr, 0)},
instance{other.instance}, scheduler{other.scheduler},
buffer{std::exchange(other.buffer, VK_NULL_HANDLE)},
device_memory{std::exchange(other.device_memory, VK_NULL_HANDLE)},
has_failed{std::exchange(other.has_failed, false)} {}
ImportedHostBuffer& operator=(ImportedHostBuffer&& other) {
size_bytes = std::exchange(other.size_bytes, 0);
cpu_addr = std::exchange(other.cpu_addr, nullptr);
bda_addr = std::exchange(other.bda_addr, false);
instance = other.instance;
scheduler = other.scheduler;
buffer = std::exchange(other.buffer, VK_NULL_HANDLE);
device_memory = std::exchange(other.device_memory, VK_NULL_HANDLE);
has_failed = std::exchange(other.has_failed, false);
return *this;
}
/// Returns the base CPU address of the buffer
void* CpuAddr() const noexcept {
return cpu_addr;
}
// Returns the handle to the Vulkan buffer
vk::Buffer Handle() const noexcept {
return buffer;
}
// Returns the size of the buffer in bytes
size_t SizeBytes() const noexcept {
return size_bytes;
}
// Returns true if the buffer failed to be created
bool HasFailed() const noexcept {
return has_failed;
}
// Returns the Buffer Device Address of the buffer
vk::DeviceAddress BufferDeviceAddress() const noexcept {
ASSERT_MSG(bda_addr != 0, "Can't get BDA from a non BDA buffer");
return bda_addr;
}
private:
size_t size_bytes = 0;
void* cpu_addr = 0;
vk::DeviceAddress bda_addr = 0;
const Vulkan::Instance* instance;
Vulkan::Scheduler* scheduler;
vk::Buffer buffer;
vk::DeviceMemory device_memory;
bool has_failed = false;
};
class StreamBuffer : public Buffer { class StreamBuffer : public Buffer {
public: public:
explicit StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, explicit StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,

View File

@ -26,6 +26,7 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize}, staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize}, stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize}, gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize},
bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, BDA_PAGETABLE_SIZE},
memory_tracker{&tracker} { memory_tracker{&tracker} {
Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer"); Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer");
@ -206,44 +207,30 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
memcpy(std::bit_cast<void*>(address), value, num_bytes); memcpy(std::bit_cast<void*>(address), value, num_bytes);
return; return;
} }
scheduler.EndRendering(); Buffer* buffer = [&] {
const Buffer* buffer = [&] {
if (is_gds) { if (is_gds) {
return &gds_buffer; return &gds_buffer;
} }
const BufferId buffer_id = FindBuffer(address, num_bytes); const BufferId buffer_id = FindBuffer(address, num_bytes);
return &slot_buffers[buffer_id]; return &slot_buffers[buffer_id];
}(); }();
const auto cmdbuf = scheduler.CommandBuffer(); InlineDataBuffer(*buffer, address, value, num_bytes);
const vk::BufferMemoryBarrier2 pre_barrier = { }
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead, void BufferCache::WriteData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer, ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite, if (!is_gds && !IsRegionRegistered(address, num_bytes)) {
.buffer = buffer->Handle(), memcpy(std::bit_cast<void*>(address), value, num_bytes);
.offset = buffer->Offset(address), return;
.size = num_bytes, }
}; Buffer* buffer = [&] {
const vk::BufferMemoryBarrier2 post_barrier = { if (is_gds) {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer, return &gds_buffer;
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite, }
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, const BufferId buffer_id = FindBuffer(address, num_bytes);
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead, return &slot_buffers[buffer_id];
.buffer = buffer->Handle(), }();
.offset = buffer->Offset(address), WriteDataBuffer(*buffer, address, value, num_bytes);
.size = num_bytes,
};
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &pre_barrier,
});
cmdbuf.updateBuffer(buffer->Handle(), buffer->Offset(address), num_bytes, value);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &post_barrier,
});
} }
std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written, std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written,
@ -336,6 +323,69 @@ BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) {
return CreateBuffer(device_addr, size); return CreateBuffer(device_addr, size);
} }
void BufferCache::MapMemory(VAddr device_addr, u64 size) {
const u64 page_start = device_addr >> BDA_PAGEBITS;
const u64 page_end = Common::DivCeil(device_addr + size, BDA_PAGESIZE);
// We fill any holes within the given range
boost::container::small_vector<u64, 1024> bda_addrs;
bool importing_failed = false;
u64 range_start = page_start;
u64 range_end = page_start;
const auto import_range = [&]() {
// Import the host memory
void* cpu_addr = reinterpret_cast<void*>(range_start << BDA_PAGEBITS);
const u64 range_size = (range_end - range_start) << BDA_PAGEBITS;
ImportedHostBuffer buffer(instance, scheduler, cpu_addr, range_size, vk::BufferUsageFlagBits::eShaderDeviceAddress | vk::BufferUsageFlagBits::eStorageBuffer);
if (buffer.HasFailed()) {
importing_failed = true;
}
// Update BDA page table
u64 bda_addr = buffer.BufferDeviceAddress();
u64 range = range_end - range_start;
bda_addrs.clear();
bda_addrs.reserve(range);
for (u64 i = 0; i < range; ++i) {
// TODO: we may want to mark the page as host imported
// to let the shader know so that it can notify us if it
// accesses the page, so we can create a GPU local buffer.
bda_addrs.push_back(bda_addr + (i << BDA_PAGEBITS));
}
WriteDataBuffer(bda_pagetable_buffer, range_start * sizeof(u64), bda_addrs.data(),
bda_addrs.size() * sizeof(u64));
{
std::scoped_lock lk{mutex};
imported_buffers.emplace_back(std::move(buffer));
}
};
for (; range_end < page_end; ++range_end) {
if (!bda_mapped_pages.test(range_end)) {
continue;
}
if (range_start != range_end) {
import_range();
if (importing_failed) {
break;
}
}
range_start = range_end + 1;
}
if (!importing_failed && range_start != range_end) {
import_range();
}
// Mark the pages as mapped
for (u64 page = page_start; page < page_end; ++page) {
bda_mapped_pages.set(page);
}
if (!importing_failed) {
return;
}
// If we failed to import the memory, fall back to copying the whole map
// to GPU memory.
LOG_INFO(Render_Vulkan, "Failed to import host memory at {:#x} size {:#x}, falling back to copying",
device_addr, size);
CreateBuffer(device_addr, size);
}
BufferCache::OverlapResult BufferCache::ResolveOverlaps(VAddr device_addr, u32 wanted_size) { BufferCache::OverlapResult BufferCache::ResolveOverlaps(VAddr device_addr, u32 wanted_size) {
static constexpr int STREAM_LEAP_THRESHOLD = 16; static constexpr int STREAM_LEAP_THRESHOLD = 16;
boost::container::small_vector<BufferId, 16> overlap_ids; boost::container::small_vector<BufferId, 16> overlap_ids;
@ -689,6 +739,99 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
return true; return true;
} }
void BufferCache::InlineDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes) {
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
const vk::BufferMemoryBarrier2 pre_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = buffer.Handle(),
.offset = buffer.Offset(address),
.size = num_bytes,
};
const vk::BufferMemoryBarrier2 post_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
.buffer = buffer.Handle(),
.offset = buffer.Offset(address),
.size = num_bytes,
};
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &pre_barrier,
});
cmdbuf.updateBuffer(buffer.Handle(), buffer.Offset(address), num_bytes, value);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &post_barrier,
});
}
void BufferCache::WriteDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes) {
vk::BufferCopy copy = {
.srcOffset = 0,
.dstOffset = buffer.Offset(address),
.size = num_bytes,
};
vk::Buffer src_buffer = staging_buffer.Handle();
if (num_bytes < StagingBufferSize) {
const auto [staging, offset] = staging_buffer.Map(num_bytes);
std::memcpy(staging + offset, value, num_bytes);
copy.srcOffset = offset;
staging_buffer.Commit();
} else {
// For large one time transfers use a temporary host buffer.
// RenderDoc can lag quite a bit if the stream buffer is too large.
Buffer temp_buffer{instance,
scheduler,
MemoryUsage::Upload,
0,
vk::BufferUsageFlagBits::eTransferSrc,
num_bytes};
src_buffer = temp_buffer.Handle();
u8* const staging = temp_buffer.mapped_data.data();
std::memcpy(staging, value, num_bytes);
scheduler.DeferOperation([buffer = std::move(temp_buffer)]() mutable {});
}
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
const vk::BufferMemoryBarrier2 pre_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = buffer.Handle(),
.offset = buffer.Offset(address),
.size = num_bytes,
};
const vk::BufferMemoryBarrier2 post_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.buffer = buffer.Handle(),
.offset = buffer.Offset(address),
.size = num_bytes,
};
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &pre_barrier,
});
cmdbuf.copyBuffer(src_buffer, buffer.Handle(), copy);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &post_barrier,
});
}
void BufferCache::DeleteBuffer(BufferId buffer_id) { void BufferCache::DeleteBuffer(BufferId buffer_id) {
Buffer& buffer = slot_buffers[buffer_id]; Buffer& buffer = slot_buffers[buffer_id];
Unregister(buffer_id); Unregister(buffer_id);

View File

@ -3,6 +3,7 @@
#pragma once #pragma once
#include <bitset>
#include <shared_mutex> #include <shared_mutex>
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include "common/div_ceil.h" #include "common/div_ceil.h"
@ -42,6 +43,11 @@ public:
static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS; static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS;
static constexpr u64 DEVICE_PAGESIZE = 4_KB; static constexpr u64 DEVICE_PAGESIZE = 4_KB;
static constexpr u64 BDA_PAGEBITS = 16;
static constexpr u64 BDA_PAGESIZE = u64{1} << BDA_PAGEBITS;
static constexpr u64 BDA_NUMPAGES = (u64{1} << (u64(40) - BDA_PAGEBITS));
static constexpr u64 BDA_PAGETABLE_SIZE = BDA_NUMPAGES * sizeof(u64);
struct Traits { struct Traits {
using Entry = BufferId; using Entry = BufferId;
static constexpr size_t AddressSpaceBits = 40; static constexpr size_t AddressSpaceBits = 40;
@ -73,6 +79,11 @@ public:
return stream_buffer; return stream_buffer;
} }
/// Retrieves the device local DBA page table buffer.
[[nodiscard]] Buffer& GetBdaPageTableBuffer() noexcept {
return bda_pagetable_buffer;
}
/// Retrieves the buffer with the specified id. /// Retrieves the buffer with the specified id.
[[nodiscard]] Buffer& GetBuffer(BufferId id) { [[nodiscard]] Buffer& GetBuffer(BufferId id) {
return slot_buffers[id]; return slot_buffers[id];
@ -87,9 +98,12 @@ public:
/// Bind host index buffer for the current draw. /// Bind host index buffer for the current draw.
void BindIndexBuffer(u32 index_offset); void BindIndexBuffer(u32 index_offset);
/// Writes a value to GPU buffer. /// Writes a value to GPU buffer. (uses command buffer to temporarily store the data)
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
/// Writes a value to GPU buffer. (uses staging buffer to temporarily store the data)
void WriteData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
/// Obtains a buffer for the specified region. /// Obtains a buffer for the specified region.
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written, [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,
bool is_texel_buffer = false, bool is_texel_buffer = false,
@ -110,6 +124,8 @@ public:
[[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size); [[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size);
void MapMemory(VAddr device_addr, u64 size);
private: private:
template <typename Func> template <typename Func>
void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) { void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) {
@ -134,7 +150,7 @@ private:
void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
[[nodiscard]] BufferId CreateBuffer(VAddr device_addr, u32 wanted_size); BufferId CreateBuffer(VAddr device_addr, u32 wanted_size);
void Register(BufferId buffer_id); void Register(BufferId buffer_id);
@ -147,6 +163,10 @@ private:
bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size); bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size);
void InlineDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes);
void WriteDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes);
void DeleteBuffer(BufferId buffer_id); void DeleteBuffer(BufferId buffer_id);
const Vulkan::Instance& instance; const Vulkan::Instance& instance;
@ -157,6 +177,9 @@ private:
StreamBuffer staging_buffer; StreamBuffer staging_buffer;
StreamBuffer stream_buffer; StreamBuffer stream_buffer;
Buffer gds_buffer; Buffer gds_buffer;
Buffer bda_pagetable_buffer;
std::bitset<BDA_NUMPAGES> bda_mapped_pages;
std::vector<ImportedHostBuffer> imported_buffers;
std::shared_mutex mutex; std::shared_mutex mutex;
Common::SlotVector<Buffer> slot_buffers; Common::SlotVector<Buffer> slot_buffers;
RangeSet gpu_modified_ranges; RangeSet gpu_modified_ranges;

View File

@ -147,6 +147,7 @@ Instance::Instance(Frontend::WindowSDL& window, s32 physical_device_index,
available_extensions = GetSupportedExtensions(physical_device); available_extensions = GetSupportedExtensions(physical_device);
format_properties = GetFormatProperties(physical_device); format_properties = GetFormatProperties(physical_device);
properties = physical_device.getProperties(); properties = physical_device.getProperties();
memory_properties = physical_device.getMemoryProperties();
CollectDeviceParameters(); CollectDeviceParameters();
ASSERT_MSG(properties.apiVersion >= TargetVulkanApiVersion, ASSERT_MSG(properties.apiVersion >= TargetVulkanApiVersion,
"Vulkan {}.{} is required, but only {}.{} is supported by device!", "Vulkan {}.{} is required, but only {}.{} is supported by device!",
@ -216,10 +217,13 @@ bool Instance::CreateDevice() {
const vk::StructureChain properties_chain = physical_device.getProperties2< const vk::StructureChain properties_chain = physical_device.getProperties2<
vk::PhysicalDeviceProperties2, vk::PhysicalDeviceVulkan11Properties, vk::PhysicalDeviceProperties2, vk::PhysicalDeviceVulkan11Properties,
vk::PhysicalDeviceVulkan12Properties, vk::PhysicalDevicePushDescriptorPropertiesKHR>(); vk::PhysicalDeviceVulkan12Properties, vk::PhysicalDevicePushDescriptorPropertiesKHR,
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT>();
vk11_props = properties_chain.get<vk::PhysicalDeviceVulkan11Properties>(); vk11_props = properties_chain.get<vk::PhysicalDeviceVulkan11Properties>();
vk12_props = properties_chain.get<vk::PhysicalDeviceVulkan12Properties>(); vk12_props = properties_chain.get<vk::PhysicalDeviceVulkan12Properties>();
push_descriptor_props = properties_chain.get<vk::PhysicalDevicePushDescriptorPropertiesKHR>(); push_descriptor_props = properties_chain.get<vk::PhysicalDevicePushDescriptorPropertiesKHR>();
external_memory_host_props =
properties_chain.get<vk::PhysicalDeviceExternalMemoryHostPropertiesEXT>();
LOG_INFO(Render_Vulkan, "Physical device subgroup size {}", vk11_props.subgroupSize); LOG_INFO(Render_Vulkan, "Physical device subgroup size {}", vk11_props.subgroupSize);
if (available_extensions.empty()) { if (available_extensions.empty()) {
@ -371,6 +375,7 @@ bool Instance::CreateDevice() {
.separateDepthStencilLayouts = vk12_features.separateDepthStencilLayouts, .separateDepthStencilLayouts = vk12_features.separateDepthStencilLayouts,
.hostQueryReset = vk12_features.hostQueryReset, .hostQueryReset = vk12_features.hostQueryReset,
.timelineSemaphore = vk12_features.timelineSemaphore, .timelineSemaphore = vk12_features.timelineSemaphore,
.bufferDeviceAddress = vk12_features.bufferDeviceAddress,
}, },
vk::PhysicalDeviceVulkan13Features{ vk::PhysicalDeviceVulkan13Features{
.robustImageAccess = vk13_features.robustImageAccess, .robustImageAccess = vk13_features.robustImageAccess,
@ -500,6 +505,7 @@ void Instance::CreateAllocator() {
}; };
const VmaAllocatorCreateInfo allocator_info = { const VmaAllocatorCreateInfo allocator_info = {
.flags = VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT,
.physicalDevice = physical_device, .physicalDevice = physical_device,
.device = *device, .device = *device,
.pVulkanFunctions = &functions, .pVulkanFunctions = &functions,

View File

@ -286,6 +286,11 @@ public:
return vk12_props; return vk12_props;
} }
/// Returns the memory properties of the physical device.
const vk::PhysicalDeviceMemoryProperties& GetMemoryProperties() const noexcept {
return memory_properties;
}
/// Returns true if shaders can declare the ClipDistance attribute /// Returns true if shaders can declare the ClipDistance attribute
bool IsShaderClipDistanceSupported() const { bool IsShaderClipDistanceSupported() const {
return features.shaderClipDistance; return features.shaderClipDistance;
@ -308,6 +313,11 @@ public:
properties.limits.framebufferStencilSampleCounts; properties.limits.framebufferStencilSampleCounts;
} }
/// Returns the minimum alignment for imported host memory.
vk::DeviceSize GetExternalHostMemoryHostAlignment() const {
return external_memory_host_props.minImportedHostPointerAlignment;
}
/// Returns whether disabling primitive restart is supported. /// Returns whether disabling primitive restart is supported.
bool IsPrimitiveRestartDisableSupported() const { bool IsPrimitiveRestartDisableSupported() const {
return driver_id != vk::DriverId::eMoltenvk; return driver_id != vk::DriverId::eMoltenvk;
@ -335,9 +345,11 @@ private:
vk::PhysicalDevice physical_device; vk::PhysicalDevice physical_device;
vk::UniqueDevice device; vk::UniqueDevice device;
vk::PhysicalDeviceProperties properties; vk::PhysicalDeviceProperties properties;
vk::PhysicalDeviceMemoryProperties memory_properties;
vk::PhysicalDeviceVulkan11Properties vk11_props; vk::PhysicalDeviceVulkan11Properties vk11_props;
vk::PhysicalDeviceVulkan12Properties vk12_props; vk::PhysicalDeviceVulkan12Properties vk12_props;
vk::PhysicalDevicePushDescriptorPropertiesKHR push_descriptor_props; vk::PhysicalDevicePushDescriptorPropertiesKHR push_descriptor_props;
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT external_memory_host_props;
vk::PhysicalDeviceFeatures features; vk::PhysicalDeviceFeatures features;
vk::PhysicalDevicePortabilitySubsetFeaturesKHR portability_features; vk::PhysicalDevicePortabilitySubsetFeaturesKHR portability_features;
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3_features; vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3_features;

View File

@ -946,6 +946,7 @@ void Rasterizer::MapMemory(VAddr addr, u64 size) {
mapped_ranges += decltype(mapped_ranges)::interval_type::right_open(addr, addr + size); mapped_ranges += decltype(mapped_ranges)::interval_type::right_open(addr, addr + size);
} }
page_manager.OnGpuMap(addr, size); page_manager.OnGpuMap(addr, size);
buffer_cache.MapMemory(addr, size);
} }
void Rasterizer::UnmapMemory(VAddr addr, u64 size) { void Rasterizer::UnmapMemory(VAddr addr, u64 size) {