diff --git a/externals/MoltenVK/MoltenVK b/externals/MoltenVK/MoltenVK index 87a8e8b13..4cf8f9468 160000 --- a/externals/MoltenVK/MoltenVK +++ b/externals/MoltenVK/MoltenVK @@ -1 +1 @@ -Subproject commit 87a8e8b13d4ad8835367fea1ebad1896d0460946 +Subproject commit 4cf8f94684c53e581eb9cc694dd3305d1f7d9959 diff --git a/externals/MoltenVK/SPIRV-Cross b/externals/MoltenVK/SPIRV-Cross index 791877574..2275d0efc 160000 --- a/externals/MoltenVK/SPIRV-Cross +++ b/externals/MoltenVK/SPIRV-Cross @@ -1 +1 @@ -Subproject commit 7918775748c5e2f5c40d9918ce68825035b5a1e1 +Subproject commit 2275d0efc4f2fa46851035d9d3c67c105bc8b99e diff --git a/externals/sirit b/externals/sirit index 09a1416ab..3af59da1d 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit 09a1416ab1b59ddfebd2618412f118f2004f3b2c +Subproject commit 3af59da1d2a6974bcecbae45d4e734570248f814 diff --git a/src/common/slot_vector.h b/src/common/slot_vector.h index d4ac51361..1fc4f454d 100644 --- a/src/common/slot_vector.h +++ b/src/common/slot_vector.h @@ -60,7 +60,7 @@ public: } template - [[nodiscard]] SlotId insert(Args&&... args) noexcept { + SlotId insert(Args&&... args) noexcept { const u32 index = FreeValueIndex(); new (&values[index].object) T(std::forward(args)...); SetStorageBit(index); diff --git a/src/video_core/buffer_cache/buffer.cpp b/src/video_core/buffer_cache/buffer.cpp index 15ef746cd..e5468dfc9 100644 --- a/src/video_core/buffer_cache/buffer.cpp +++ b/src/video_core/buffer_cache/buffer.cpp @@ -112,6 +112,103 @@ Buffer::Buffer(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; } +ImportedHostBuffer::ImportedHostBuffer(const Vulkan::Instance& instance_, + Vulkan::Scheduler& scheduler_, void* cpu_addr_, + u64 size_bytes_, vk::BufferUsageFlags flags) + : cpu_addr{cpu_addr_}, size_bytes{size_bytes_}, instance{&instance_}, scheduler{&scheduler_} { + ASSERT_MSG(size_bytes > 0, "Size must be greater than 0"); + ASSERT_MSG(cpu_addr != 0, "CPU address must not be null"); + const vk::DeviceSize alignment = instance->GetExternalHostMemoryHostAlignment(); + ASSERT_MSG(reinterpret_cast(cpu_addr) % alignment == 0, + "CPU address {:#x} is not aligned to {:#x}", cpu_addr, alignment); + ASSERT_MSG(size_bytes % alignment == 0, "Size {:#x} is not aligned to {:#x}", size_bytes, + alignment); + + const auto& mem_props = instance->GetMemoryProperties(); + auto ptr_props_result = instance->GetDevice().getMemoryHostPointerPropertiesEXT( + vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, cpu_addr); + ASSERT_MSG(ptr_props_result.result == vk::Result::eSuccess, + "Failed getting host pointer properties with error {}", + vk::to_string(ptr_props_result.result)); + auto ptr_props = ptr_props_result.value; + u32 memory_type_index = UINT32_MAX; + for (u32 i = 0; i < mem_props.memoryTypeCount; ++i) { + if ((ptr_props.memoryTypeBits & (1 << i)) != 0) { + if (mem_props.memoryTypes[i].propertyFlags & (vk::MemoryPropertyFlagBits::eHostVisible | + vk::MemoryPropertyFlagBits::eHostCoherent)) { + memory_type_index = i; + // We prefer cache coherent memory types. + if (mem_props.memoryTypes[i].propertyFlags & + vk::MemoryPropertyFlagBits::eHostCached) { + break; + } + } + } + } + ASSERT_MSG(memory_type_index != UINT32_MAX, + "Failed to find a host visible memory type for the imported host buffer"); + + const bool with_bda = bool(flags & vk::BufferUsageFlagBits::eShaderDeviceAddress); + vk::ExternalMemoryBufferCreateInfo external_info{ + .handleTypes = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, + }; + vk::BufferCreateInfo buffer_ci{ + .pNext = &external_info, + .size = size_bytes, + .usage = flags, + }; + vk::ImportMemoryHostPointerInfoEXT import_info{ + .handleType = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, + .pHostPointer = reinterpret_cast(cpu_addr), + }; + vk::MemoryAllocateFlagsInfo memory_flags_info{ + .pNext = &import_info, + .flags = with_bda ? vk::MemoryAllocateFlagBits::eDeviceAddress : vk::MemoryAllocateFlags{}, + }; + vk::MemoryAllocateInfo alloc_ci{ + .pNext = &memory_flags_info, + .allocationSize = size_bytes, + .memoryTypeIndex = memory_type_index, + }; + + auto buffer_result = instance->GetDevice().createBuffer(buffer_ci); + ASSERT_MSG(buffer_result.result == vk::Result::eSuccess, + "Failed creating imported host buffer with error {}", + vk::to_string(buffer_result.result)); + buffer = buffer_result.value; + + auto device_memory_result = instance->GetDevice().allocateMemory(alloc_ci); + if (device_memory_result.result != vk::Result::eSuccess) { + // May fail to import the host memory if it is backed by a file. (AMD on Linux) + instance->GetDevice().destroyBuffer(buffer); + has_failed = true; + return; + } + device_memory = device_memory_result.value; + + auto result = instance->GetDevice().bindBufferMemory(buffer, device_memory, 0); + ASSERT_MSG(result == vk::Result::eSuccess, + "Failed binding imported host buffer with error {}", + vk::to_string(result)); + + if (with_bda) { + vk::BufferDeviceAddressInfo bda_info{ + .buffer = buffer, + }; + bda_addr = instance->GetDevice().getBufferAddress(bda_info); + ASSERT_MSG(bda_addr != 0, "Failed getting buffer device address"); + } +} + +ImportedHostBuffer::~ImportedHostBuffer() { + if (!buffer) { + return; + } + const auto device = instance->GetDevice(); + device.destroyBuffer(buffer); + device.freeMemory(device_memory); +} + constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; diff --git a/src/video_core/buffer_cache/buffer.h b/src/video_core/buffer_cache/buffer.h index 188b4b2ca..3842f20c4 100644 --- a/src/video_core/buffer_cache/buffer.h +++ b/src/video_core/buffer_cache/buffer.h @@ -156,6 +156,72 @@ public: vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eAllCommands}; }; +class ImportedHostBuffer { +public: + ImportedHostBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, + void* cpu_addr_, u64 size_bytes_, vk::BufferUsageFlags flags); + ~ImportedHostBuffer(); + + + ImportedHostBuffer& operator=(const ImportedHostBuffer&) = delete; + ImportedHostBuffer(const ImportedHostBuffer&) = delete; + + ImportedHostBuffer(ImportedHostBuffer&& other) + : size_bytes{std::exchange(other.size_bytes, 0)}, + cpu_addr{std::exchange(other.cpu_addr, nullptr)}, + bda_addr{std::exchange(other.bda_addr, 0)}, + instance{other.instance}, scheduler{other.scheduler}, + buffer{std::exchange(other.buffer, VK_NULL_HANDLE)}, + device_memory{std::exchange(other.device_memory, VK_NULL_HANDLE)}, + has_failed{std::exchange(other.has_failed, false)} {} + ImportedHostBuffer& operator=(ImportedHostBuffer&& other) { + size_bytes = std::exchange(other.size_bytes, 0); + cpu_addr = std::exchange(other.cpu_addr, nullptr); + bda_addr = std::exchange(other.bda_addr, false); + instance = other.instance; + scheduler = other.scheduler; + buffer = std::exchange(other.buffer, VK_NULL_HANDLE); + device_memory = std::exchange(other.device_memory, VK_NULL_HANDLE); + has_failed = std::exchange(other.has_failed, false); + return *this; + } + + /// Returns the base CPU address of the buffer + void* CpuAddr() const noexcept { + return cpu_addr; + } + + // Returns the handle to the Vulkan buffer + vk::Buffer Handle() const noexcept { + return buffer; + } + + // Returns the size of the buffer in bytes + size_t SizeBytes() const noexcept { + return size_bytes; + } + + // Returns true if the buffer failed to be created + bool HasFailed() const noexcept { + return has_failed; + } + + // Returns the Buffer Device Address of the buffer + vk::DeviceAddress BufferDeviceAddress() const noexcept { + ASSERT_MSG(bda_addr != 0, "Can't get BDA from a non BDA buffer"); + return bda_addr; + } +private: + size_t size_bytes = 0; + void* cpu_addr = 0; + vk::DeviceAddress bda_addr = 0; + const Vulkan::Instance* instance; + Vulkan::Scheduler* scheduler; + vk::Buffer buffer; + vk::DeviceMemory device_memory; + bool has_failed = false; +}; + class StreamBuffer : public Buffer { public: explicit StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index cdf736a89..e4dc5c5e9 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -26,6 +26,7 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize}, stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize}, gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize}, + bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, BDA_PAGETABLE_SIZE}, memory_tracker{&tracker} { Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer"); @@ -206,44 +207,30 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo memcpy(std::bit_cast(address), value, num_bytes); return; } - scheduler.EndRendering(); - const Buffer* buffer = [&] { + Buffer* buffer = [&] { if (is_gds) { return &gds_buffer; } const BufferId buffer_id = FindBuffer(address, num_bytes); return &slot_buffers[buffer_id]; }(); - const auto cmdbuf = scheduler.CommandBuffer(); - const vk::BufferMemoryBarrier2 pre_barrier = { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, - .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, - .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, - .buffer = buffer->Handle(), - .offset = buffer->Offset(address), - .size = num_bytes, - }; - const vk::BufferMemoryBarrier2 post_barrier = { - .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, - .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, - .buffer = buffer->Handle(), - .offset = buffer->Offset(address), - .size = num_bytes, - }; - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &pre_barrier, - }); - cmdbuf.updateBuffer(buffer->Handle(), buffer->Offset(address), num_bytes, value); - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &post_barrier, - }); + InlineDataBuffer(*buffer, address, value, num_bytes); +} + +void BufferCache::WriteData(VAddr address, const void* value, u32 num_bytes, bool is_gds) { + ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned"); + if (!is_gds && !IsRegionRegistered(address, num_bytes)) { + memcpy(std::bit_cast(address), value, num_bytes); + return; + } + Buffer* buffer = [&] { + if (is_gds) { + return &gds_buffer; + } + const BufferId buffer_id = FindBuffer(address, num_bytes); + return &slot_buffers[buffer_id]; + }(); + WriteDataBuffer(*buffer, address, value, num_bytes); } std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written, @@ -336,6 +323,69 @@ BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) { return CreateBuffer(device_addr, size); } +void BufferCache::MapMemory(VAddr device_addr, u64 size) { + const u64 page_start = device_addr >> BDA_PAGEBITS; + const u64 page_end = Common::DivCeil(device_addr + size, BDA_PAGESIZE); + // We fill any holes within the given range + boost::container::small_vector bda_addrs; + bool importing_failed = false; + u64 range_start = page_start; + u64 range_end = page_start; + const auto import_range = [&]() { + // Import the host memory + void* cpu_addr = reinterpret_cast(range_start << BDA_PAGEBITS); + const u64 range_size = (range_end - range_start) << BDA_PAGEBITS; + ImportedHostBuffer buffer(instance, scheduler, cpu_addr, range_size, vk::BufferUsageFlagBits::eShaderDeviceAddress | vk::BufferUsageFlagBits::eStorageBuffer); + if (buffer.HasFailed()) { + importing_failed = true; + } + // Update BDA page table + u64 bda_addr = buffer.BufferDeviceAddress(); + u64 range = range_end - range_start; + bda_addrs.clear(); + bda_addrs.reserve(range); + for (u64 i = 0; i < range; ++i) { + // TODO: we may want to mark the page as host imported + // to let the shader know so that it can notify us if it + // accesses the page, so we can create a GPU local buffer. + bda_addrs.push_back(bda_addr + (i << BDA_PAGEBITS)); + } + WriteDataBuffer(bda_pagetable_buffer, range_start * sizeof(u64), bda_addrs.data(), + bda_addrs.size() * sizeof(u64)); + { + std::scoped_lock lk{mutex}; + imported_buffers.emplace_back(std::move(buffer)); + } + }; + for (; range_end < page_end; ++range_end) { + if (!bda_mapped_pages.test(range_end)) { + continue; + } + if (range_start != range_end) { + import_range(); + if (importing_failed) { + break; + } + } + range_start = range_end + 1; + } + if (!importing_failed && range_start != range_end) { + import_range(); + } + // Mark the pages as mapped + for (u64 page = page_start; page < page_end; ++page) { + bda_mapped_pages.set(page); + } + if (!importing_failed) { + return; + } + // If we failed to import the memory, fall back to copying the whole map + // to GPU memory. + LOG_INFO(Render_Vulkan, "Failed to import host memory at {:#x} size {:#x}, falling back to copying", + device_addr, size); + CreateBuffer(device_addr, size); +} + BufferCache::OverlapResult BufferCache::ResolveOverlaps(VAddr device_addr, u32 wanted_size) { static constexpr int STREAM_LEAP_THRESHOLD = 16; boost::container::small_vector overlap_ids; @@ -689,6 +739,99 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, return true; } +void BufferCache::InlineDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes) { + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + const vk::BufferMemoryBarrier2 pre_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = buffer.Handle(), + .offset = buffer.Offset(address), + .size = num_bytes, + }; + const vk::BufferMemoryBarrier2 post_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, + .buffer = buffer.Handle(), + .offset = buffer.Offset(address), + .size = num_bytes, + }; + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &pre_barrier, + }); + cmdbuf.updateBuffer(buffer.Handle(), buffer.Offset(address), num_bytes, value); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &post_barrier, + }); +} + +void BufferCache::WriteDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes) { + vk::BufferCopy copy = { + .srcOffset = 0, + .dstOffset = buffer.Offset(address), + .size = num_bytes, + }; + vk::Buffer src_buffer = staging_buffer.Handle(); + if (num_bytes < StagingBufferSize) { + const auto [staging, offset] = staging_buffer.Map(num_bytes); + std::memcpy(staging + offset, value, num_bytes); + copy.srcOffset = offset; + staging_buffer.Commit(); + } else { + // For large one time transfers use a temporary host buffer. + // RenderDoc can lag quite a bit if the stream buffer is too large. + Buffer temp_buffer{instance, + scheduler, + MemoryUsage::Upload, + 0, + vk::BufferUsageFlagBits::eTransferSrc, + num_bytes}; + src_buffer = temp_buffer.Handle(); + u8* const staging = temp_buffer.mapped_data.data(); + std::memcpy(staging, value, num_bytes); + scheduler.DeferOperation([buffer = std::move(temp_buffer)]() mutable {}); + } + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + const vk::BufferMemoryBarrier2 pre_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = buffer.Handle(), + .offset = buffer.Offset(address), + .size = num_bytes, + }; + const vk::BufferMemoryBarrier2 post_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, + .buffer = buffer.Handle(), + .offset = buffer.Offset(address), + .size = num_bytes, + }; + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &pre_barrier, + }); + cmdbuf.copyBuffer(src_buffer, buffer.Handle(), copy); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &post_barrier, + }); +} + void BufferCache::DeleteBuffer(BufferId buffer_id) { Buffer& buffer = slot_buffers[buffer_id]; Unregister(buffer_id); diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 71a6bed2a..dd22269aa 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include #include "common/div_ceil.h" @@ -42,6 +43,11 @@ public: static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS; static constexpr u64 DEVICE_PAGESIZE = 4_KB; + static constexpr u64 BDA_PAGEBITS = 16; + static constexpr u64 BDA_PAGESIZE = u64{1} << BDA_PAGEBITS; + static constexpr u64 BDA_NUMPAGES = (u64{1} << (u64(40) - BDA_PAGEBITS)); + static constexpr u64 BDA_PAGETABLE_SIZE = BDA_NUMPAGES * sizeof(u64); + struct Traits { using Entry = BufferId; static constexpr size_t AddressSpaceBits = 40; @@ -73,6 +79,11 @@ public: return stream_buffer; } + /// Retrieves the device local DBA page table buffer. + [[nodiscard]] Buffer& GetBdaPageTableBuffer() noexcept { + return bda_pagetable_buffer; + } + /// Retrieves the buffer with the specified id. [[nodiscard]] Buffer& GetBuffer(BufferId id) { return slot_buffers[id]; @@ -87,9 +98,12 @@ public: /// Bind host index buffer for the current draw. void BindIndexBuffer(u32 index_offset); - /// Writes a value to GPU buffer. + /// Writes a value to GPU buffer. (uses command buffer to temporarily store the data) void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); + /// Writes a value to GPU buffer. (uses staging buffer to temporarily store the data) + void WriteData(VAddr address, const void* value, u32 num_bytes, bool is_gds); + /// Obtains a buffer for the specified region. [[nodiscard]] std::pair ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written, bool is_texel_buffer = false, @@ -110,6 +124,8 @@ public: [[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size); + void MapMemory(VAddr device_addr, u64 size); + private: template void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) { @@ -134,7 +150,7 @@ private: void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); - [[nodiscard]] BufferId CreateBuffer(VAddr device_addr, u32 wanted_size); + BufferId CreateBuffer(VAddr device_addr, u32 wanted_size); void Register(BufferId buffer_id); @@ -147,6 +163,10 @@ private: bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size); + void InlineDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes); + + void WriteDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes); + void DeleteBuffer(BufferId buffer_id); const Vulkan::Instance& instance; @@ -157,6 +177,9 @@ private: StreamBuffer staging_buffer; StreamBuffer stream_buffer; Buffer gds_buffer; + Buffer bda_pagetable_buffer; + std::bitset bda_mapped_pages; + std::vector imported_buffers; std::shared_mutex mutex; Common::SlotVector slot_buffers; RangeSet gpu_modified_ranges; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 99f225d79..56229481a 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -147,6 +147,7 @@ Instance::Instance(Frontend::WindowSDL& window, s32 physical_device_index, available_extensions = GetSupportedExtensions(physical_device); format_properties = GetFormatProperties(physical_device); properties = physical_device.getProperties(); + memory_properties = physical_device.getMemoryProperties(); CollectDeviceParameters(); ASSERT_MSG(properties.apiVersion >= TargetVulkanApiVersion, "Vulkan {}.{} is required, but only {}.{} is supported by device!", @@ -216,10 +217,13 @@ bool Instance::CreateDevice() { const vk::StructureChain properties_chain = physical_device.getProperties2< vk::PhysicalDeviceProperties2, vk::PhysicalDeviceVulkan11Properties, - vk::PhysicalDeviceVulkan12Properties, vk::PhysicalDevicePushDescriptorPropertiesKHR>(); + vk::PhysicalDeviceVulkan12Properties, vk::PhysicalDevicePushDescriptorPropertiesKHR, + vk::PhysicalDeviceExternalMemoryHostPropertiesEXT>(); vk11_props = properties_chain.get(); vk12_props = properties_chain.get(); push_descriptor_props = properties_chain.get(); + external_memory_host_props = + properties_chain.get(); LOG_INFO(Render_Vulkan, "Physical device subgroup size {}", vk11_props.subgroupSize); if (available_extensions.empty()) { @@ -371,6 +375,7 @@ bool Instance::CreateDevice() { .separateDepthStencilLayouts = vk12_features.separateDepthStencilLayouts, .hostQueryReset = vk12_features.hostQueryReset, .timelineSemaphore = vk12_features.timelineSemaphore, + .bufferDeviceAddress = vk12_features.bufferDeviceAddress, }, vk::PhysicalDeviceVulkan13Features{ .robustImageAccess = vk13_features.robustImageAccess, @@ -500,6 +505,7 @@ void Instance::CreateAllocator() { }; const VmaAllocatorCreateInfo allocator_info = { + .flags = VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT, .physicalDevice = physical_device, .device = *device, .pVulkanFunctions = &functions, diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 573473869..fa24f1464 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -286,6 +286,11 @@ public: return vk12_props; } + /// Returns the memory properties of the physical device. + const vk::PhysicalDeviceMemoryProperties& GetMemoryProperties() const noexcept { + return memory_properties; + } + /// Returns true if shaders can declare the ClipDistance attribute bool IsShaderClipDistanceSupported() const { return features.shaderClipDistance; @@ -308,6 +313,11 @@ public: properties.limits.framebufferStencilSampleCounts; } + /// Returns the minimum alignment for imported host memory. + vk::DeviceSize GetExternalHostMemoryHostAlignment() const { + return external_memory_host_props.minImportedHostPointerAlignment; + } + /// Returns whether disabling primitive restart is supported. bool IsPrimitiveRestartDisableSupported() const { return driver_id != vk::DriverId::eMoltenvk; @@ -335,9 +345,11 @@ private: vk::PhysicalDevice physical_device; vk::UniqueDevice device; vk::PhysicalDeviceProperties properties; + vk::PhysicalDeviceMemoryProperties memory_properties; vk::PhysicalDeviceVulkan11Properties vk11_props; vk::PhysicalDeviceVulkan12Properties vk12_props; vk::PhysicalDevicePushDescriptorPropertiesKHR push_descriptor_props; + vk::PhysicalDeviceExternalMemoryHostPropertiesEXT external_memory_host_props; vk::PhysicalDeviceFeatures features; vk::PhysicalDevicePortabilitySubsetFeaturesKHR portability_features; vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3_features; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 4caa781b9..33ab54f1c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -946,6 +946,7 @@ void Rasterizer::MapMemory(VAddr addr, u64 size) { mapped_ranges += decltype(mapped_ranges)::interval_type::right_open(addr, addr + size); } page_manager.OnGpuMap(addr, size); + buffer_cache.MapMemory(addr, size); } void Rasterizer::UnmapMemory(VAddr addr, u64 size) {