From b1cecf6e878e4d72b9160ddf0013bbfc59f7824a Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Thu, 17 Oct 2024 23:46:06 +0300 Subject: [PATCH] renderer_vulkan: Commonize buffer binding --- .../libraries/kernel/thread_management.cpp | 9 + src/video_core/buffer_cache/buffer.h | 1 + src/video_core/buffer_cache/buffer_cache.cpp | 14 +- src/video_core/buffer_cache/buffer_cache.h | 12 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 160 ++++-------------- .../renderer_vulkan/vk_graphics_pipeline.cpp | 144 +++------------- .../renderer_vulkan/vk_pipeline_common.cpp | 149 +++++++++++++++- .../renderer_vulkan/vk_pipeline_common.h | 9 + .../renderer_vulkan/vk_platform.cpp | 2 +- .../renderer_vulkan/vk_rasterizer.cpp | 14 +- src/video_core/texture_cache/sampler.cpp | 3 + .../texture_cache/texture_cache.cpp | 2 +- 12 files changed, 249 insertions(+), 270 deletions(-) diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index 39c0eaf80..a8cee07c7 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -1075,7 +1075,16 @@ ScePthread PThreadPool::Create(const char* name) { } } +#ifdef _WIN64 auto* ret = new PthreadInternal{}; +#else + // TODO: Linux specific hack + static u8* hint_address = reinterpret_cast(0x7FFFFC000ULL); + auto* ret = reinterpret_cast( + mmap(hint_address, sizeof(PthreadInternal), PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0)); + hint_address += Common::AlignUp(sizeof(PthreadInternal), 4_KB); +#endif ret->is_free = false; ret->is_detached = false; ret->is_almost_done = false; diff --git a/src/video_core/buffer_cache/buffer.h b/src/video_core/buffer_cache/buffer.h index 403d4ed85..f67278f64 100644 --- a/src/video_core/buffer_cache/buffer.h +++ b/src/video_core/buffer_cache/buffer.h @@ -142,6 +142,7 @@ public: VAddr cpu_addr = 0; bool is_picked{}; bool is_coherent{}; + bool is_deleted{}; int stream_score = 0; size_t size_bytes = 0; std::span mapped_data; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index c2993f3d1..e4bdb5339 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -291,7 +291,7 @@ void BufferCache::InlineDataToGds(u32 gds_offset, u32 value) { } std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written, - bool is_texel_buffer) { + bool is_texel_buffer, BufferId buffer_id) { static constexpr u64 StreamThreshold = CACHING_PAGESIZE; const bool is_gpu_dirty = memory_tracker.IsRegionGpuModified(device_addr, size); if (!is_written && size <= StreamThreshold && !is_gpu_dirty) { @@ -301,16 +301,19 @@ std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b return {&stream_buffer, offset}; } - const BufferId buffer_id = FindBuffer(device_addr, size); + if (!buffer_id || slot_buffers[buffer_id].is_deleted) { + buffer_id = FindBuffer(device_addr, size); + } Buffer& buffer = slot_buffers[buffer_id]; SynchronizeBuffer(buffer, device_addr, size, is_texel_buffer); if (is_written) { memory_tracker.MarkRegionAsGpuModified(device_addr, size); + gpu_regions.Add(device_addr, size); } return {&buffer, buffer.Offset(device_addr)}; } -std::pair BufferCache::ObtainTempBuffer(VAddr gpu_addr, u32 size) { +std::pair BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size) { const u64 page = gpu_addr >> CACHING_PAGEBITS; const BufferId buffer_id = page_table[page]; if (buffer_id) { @@ -539,6 +542,8 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, largest_copy = std::max(largest_copy, range_size); }; memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) { + bool has_gpu = false; + gpu_regions.ForEachInRange(device_addr_out, range_size, [&](VAddr, VAddr) { has_gpu = true; }); add_copy(device_addr_out, range_size); // Prevent uploading to gpu modified regions. // gpu_modified_ranges.ForEachNotInRange(device_addr_out, range_size, add_copy); @@ -656,12 +661,13 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, void BufferCache::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { // Mark the whole buffer as CPU written to stop tracking CPU writes + Buffer& buffer = slot_buffers[buffer_id]; if (!do_not_mark) { - Buffer& buffer = slot_buffers[buffer_id]; memory_tracker.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes()); } Unregister(buffer_id); scheduler.DeferOperation([this, buffer_id] { slot_buffers.erase(buffer_id); }); + buffer.is_deleted = true; } } // namespace VideoCore diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 76309363a..510f2525c 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -12,6 +12,7 @@ #include "common/types.h" #include "video_core/buffer_cache/buffer.h" #include "video_core/buffer_cache/memory_tracker_base.h" +#include "video_core/buffer_cache/range_set.h" #include "video_core/multi_level_page_table.h" namespace AmdGpu { @@ -85,10 +86,10 @@ public: /// Obtains a buffer for the specified region. [[nodiscard]] std::pair ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written, - bool is_texel_buffer = false); + bool is_texel_buffer = false, BufferId buffer_id = {}); - /// Obtains a temporary buffer for usage in texture cache. - [[nodiscard]] std::pair ObtainTempBuffer(VAddr gpu_addr, u32 size); + /// Attempts to obtain a buffer without modifying the cache contents. + [[nodiscard]] std::pair ObtainViewBuffer(VAddr gpu_addr, u32 size); /// Return true when a region is registered on the cache [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); @@ -99,6 +100,8 @@ public: /// Return true when a CPU region is modified from the GPU [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); + [[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size); + private: template void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) { @@ -119,8 +122,6 @@ private: void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size); - [[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size); - [[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size); void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); @@ -150,6 +151,7 @@ private: Buffer gds_buffer; std::mutex mutex; Common::SlotVector slot_buffers; + RangeSet gpu_regions; vk::BufferView null_buffer_view; MemoryTracker memory_tracker; PageTable page_table; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 37a44ddac..b65ec4481 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -3,7 +3,6 @@ #include -#include "common/alignment.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -113,140 +112,43 @@ ComputePipeline::~ComputePipeline() = default; bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, VideoCore::TextureCache& texture_cache) const { // Bind resource buffers and textures. - boost::container::static_vector buffer_views; - boost::container::static_vector buffer_infos; boost::container::small_vector set_writes; - boost::container::small_vector buffer_barriers; + BufferBarriers buffer_barriers; Shader::PushData push_data{}; Shader::Backend::Bindings binding{}; - image_infos.clear(); - info->PushUd(binding, push_data); + + // Most of the time when a metadata is updated with a shader it gets cleared. It means + // we can skip the whole dispatch and update the tracked state instead. Also, it is not + // intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we + // will need its full emulation anyways. For cases of metadata read a warning will be logged. for (const auto& desc : info->buffers) { - bool is_storage = true; if (desc.is_gds_buffer) { - auto* vk_buffer = buffer_cache.GetGdsBuffer(); - buffer_infos.emplace_back(vk_buffer->Handle(), 0, vk_buffer->SizeBytes()); - } else { - const auto vsharp = desc.GetSharp(*info); - is_storage = desc.IsStorage(vsharp); - const VAddr address = vsharp.base_address; - // Most of the time when a metadata is updated with a shader it gets cleared. It means - // we can skip the whole dispatch and update the tracked state instead. Also, it is not - // intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we - // will need its full emulation anyways. For cases of metadata read a warning will be - // logged. - if (desc.is_written) { - if (texture_cache.TouchMeta(address, true)) { - LOG_TRACE(Render_Vulkan, "Metadata update skipped"); - return false; - } - } else { - if (texture_cache.IsMeta(address)) { - LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)"); - } - } - const u32 size = vsharp.GetSize(); - const u32 alignment = - is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment(); - const auto [vk_buffer, offset] = - buffer_cache.ObtainBuffer(address, size, desc.is_written); - const u32 offset_aligned = Common::AlignDown(offset, alignment); - const u32 adjust = offset - offset_aligned; - ASSERT(adjust % 4 == 0); - push_data.AddOffset(binding.buffer, adjust); - buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust); + continue; + } + const VAddr address = desc.GetSharp(*info).base_address; + if (desc.is_written) { + if (texture_cache.TouchMeta(address, true)) { + LOG_TRACE(Render_Vulkan, "Metadata update skipped"); + return false; + } + } else { + if (texture_cache.IsMeta(address)) { + LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)"); + } } - set_writes.push_back({ - .dstSet = VK_NULL_HANDLE, - .dstBinding = binding.unified++, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer - : vk::DescriptorType::eUniformBuffer, - .pBufferInfo = &buffer_infos.back(), - }); - ++binding.buffer; } - const auto null_buffer_view = - instance.IsNullDescriptorSupported() ? VK_NULL_HANDLE : buffer_cache.NullBufferView(); - for (const auto& desc : info->texture_buffers) { - const auto vsharp = desc.GetSharp(*info); - vk::BufferView& buffer_view = buffer_views.emplace_back(null_buffer_view); - const u32 size = vsharp.GetSize(); - if (vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid && size != 0) { - const VAddr address = vsharp.base_address; - if (desc.is_written) { - if (texture_cache.TouchMeta(address, true)) { - LOG_TRACE(Render_Vulkan, "Metadata update skipped"); - return false; - } - } else { - if (texture_cache.IsMeta(address)) { - LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)"); - } - } - const u32 alignment = instance.TexelBufferMinAlignment(); - const auto [vk_buffer, offset] = - buffer_cache.ObtainBuffer(address, size, desc.is_written, true); - const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3; - ASSERT_MSG(fmt_stride == vsharp.GetStride(), - "Texel buffer stride must match format stride"); - const u32 offset_aligned = Common::AlignDown(offset, alignment); - const u32 adjust = offset - offset_aligned; - ASSERT(adjust % fmt_stride == 0); - push_data.AddOffset(binding.buffer, adjust / fmt_stride); - buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written, - vsharp.GetDataFmt(), vsharp.GetNumberFmt()); - if (auto barrier = - vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite - : vk::AccessFlagBits2::eShaderRead, - vk::PipelineStageFlagBits2::eComputeShader)) { - buffer_barriers.emplace_back(*barrier); - } - if (desc.is_written) { - texture_cache.InvalidateMemoryFromGPU(address, size); - } - } - set_writes.push_back({ - .dstSet = VK_NULL_HANDLE, - .dstBinding = binding.unified++, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer - : vk::DescriptorType::eUniformTexelBuffer, - .pTexelBufferView = &buffer_view, - }); - ++binding.buffer; - } + BindBuffers(buffer_cache, texture_cache, *info, binding, push_data, + set_writes, buffer_barriers); BindTextures(texture_cache, *info, binding, set_writes); - for (const auto& sampler : info->samplers) { - const auto ssharp = sampler.GetSharp(*info); - if (ssharp.force_degamma) { - LOG_WARNING(Render_Vulkan, "Texture requires gamma correction"); - } - const auto vk_sampler = texture_cache.GetSampler(ssharp); - image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); - set_writes.push_back({ - .dstSet = VK_NULL_HANDLE, - .dstBinding = binding.unified++, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = vk::DescriptorType::eSampler, - .pImageInfo = &image_infos.back(), - }); - } - if (set_writes.empty()) { return false; } - const auto cmdbuf = scheduler.CommandBuffer(); - if (!buffer_barriers.empty()) { const auto dependencies = vk::DependencyInfo{ .dependencyFlags = vk::DependencyFlagBits::eByRegion, @@ -256,22 +158,22 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, scheduler.EndRendering(); cmdbuf.pipelineBarrier2(dependencies); } - + cmdbuf.pushConstants(*pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(push_data), + &push_data); + // Bind descriptor set. if (uses_push_descriptors) { cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, set_writes); - } else { - const auto desc_set = desc_heap.Commit(*desc_layout); - for (auto& set_write : set_writes) { - set_write.dstSet = desc_set; - } - instance.GetDevice().updateDescriptorSets(set_writes, {}); - cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, desc_set, - {}); + return true; } + const auto desc_set = desc_heap.Commit(*desc_layout); + for (auto& set_write : set_writes) { + set_write.dstSet = desc_set; + } + instance.GetDevice().updateDescriptorSets(set_writes, {}); + cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, desc_set, + {}); - cmdbuf.pushConstants(*pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(push_data), - &push_data); return true; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index cbc0fc5ec..48854b418 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -5,7 +5,7 @@ #include #include -#include "common/alignment.h" +#include "common/scope_exit.h" #include "common/assert.h" #include "video_core/amdgpu/resource.h" #include "video_core/buffer_cache/buffer_cache.h" @@ -384,15 +384,11 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, VideoCore::BufferCache& buffer_cache, VideoCore::TextureCache& texture_cache) const { // Bind resource buffers and textures. - boost::container::static_vector buffer_views; - boost::container::static_vector buffer_infos; boost::container::small_vector set_writes; - boost::container::small_vector buffer_barriers; + BufferBarriers buffer_barriers; Shader::PushData push_data{}; Shader::Backend::Bindings binding{}; - image_infos.clear(); - for (const auto* stage : stages) { if (!stage) { continue; @@ -402,112 +398,22 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, push_data.step1 = regs.vgt_instance_step_rate_1; } stage->PushUd(binding, push_data); - for (const auto& buffer : stage->buffers) { - const auto vsharp = buffer.GetSharp(*stage); - const bool is_storage = buffer.IsStorage(vsharp); - if (vsharp && vsharp.GetSize() > 0) { - const VAddr address = vsharp.base_address; - if (texture_cache.IsMeta(address)) { - LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (buffer)"); - } - const u32 size = vsharp.GetSize(); - const u32 alignment = - is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment(); - const auto [vk_buffer, offset] = - buffer_cache.ObtainBuffer(address, size, buffer.is_written); - const u32 offset_aligned = Common::AlignDown(offset, alignment); - const u32 adjust = offset - offset_aligned; - ASSERT(adjust % 4 == 0); - push_data.AddOffset(binding.buffer, adjust); - buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust); - } else if (instance.IsNullDescriptorSupported()) { - buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE); - } else { - auto& null_buffer = buffer_cache.GetBuffer(VideoCore::NULL_BUFFER_ID); - buffer_infos.emplace_back(null_buffer.Handle(), 0, VK_WHOLE_SIZE); - } - set_writes.push_back({ - .dstSet = VK_NULL_HANDLE, - .dstBinding = binding.unified++, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer - : vk::DescriptorType::eUniformBuffer, - .pBufferInfo = &buffer_infos.back(), - }); - ++binding.buffer; - } - const auto null_buffer_view = - instance.IsNullDescriptorSupported() ? VK_NULL_HANDLE : buffer_cache.NullBufferView(); - for (const auto& desc : stage->texture_buffers) { - const auto vsharp = desc.GetSharp(*stage); - vk::BufferView& buffer_view = buffer_views.emplace_back(null_buffer_view); - const u32 size = vsharp.GetSize(); - if (vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid && size != 0) { - const VAddr address = vsharp.base_address; - const u32 alignment = instance.TexelBufferMinAlignment(); - const auto [vk_buffer, offset] = - buffer_cache.ObtainBuffer(address, size, desc.is_written, true); - const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3; - ASSERT_MSG(fmt_stride == vsharp.GetStride(), - "Texel buffer stride must match format stride"); - const u32 offset_aligned = Common::AlignDown(offset, alignment); - const u32 adjust = offset - offset_aligned; - ASSERT(adjust % fmt_stride == 0); - push_data.AddOffset(binding.buffer, adjust / fmt_stride); - buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written, - vsharp.GetDataFmt(), vsharp.GetNumberFmt()); - const auto dst_access = desc.is_written ? vk::AccessFlagBits2::eShaderWrite - : vk::AccessFlagBits2::eShaderRead; - if (auto barrier = vk_buffer->GetBarrier( - dst_access, vk::PipelineStageFlagBits2::eVertexShader)) { - buffer_barriers.emplace_back(*barrier); - } - if (desc.is_written) { - texture_cache.InvalidateMemoryFromGPU(address, size); - } - } - set_writes.push_back({ - .dstSet = VK_NULL_HANDLE, - .dstBinding = binding.unified++, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer - : vk::DescriptorType::eUniformTexelBuffer, - .pTexelBufferView = &buffer_view, - }); - ++binding.buffer; - } + BindBuffers(buffer_cache, texture_cache, *stage, binding, push_data, + set_writes, buffer_barriers); BindTextures(texture_cache, *stage, binding, set_writes); - - for (const auto& sampler : stage->samplers) { - auto ssharp = sampler.GetSharp(*stage); - if (ssharp.force_degamma) { - LOG_WARNING(Render_Vulkan, "Texture requires gamma correction"); - } - if (sampler.disable_aniso) { - const auto& tsharp = stage->images[sampler.associated_image].GetSharp(*stage); - if (tsharp.base_level == 0 && tsharp.last_level == 0) { - ssharp.max_aniso.Assign(AmdGpu::AnisoRatio::One); - } - } - const auto vk_sampler = texture_cache.GetSampler(ssharp); - image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); - set_writes.push_back({ - .dstSet = VK_NULL_HANDLE, - .dstBinding = binding.unified++, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = vk::DescriptorType::eSampler, - .pImageInfo = &image_infos.back(), - }); - } } const auto cmdbuf = scheduler.CommandBuffer(); + SCOPE_EXIT { + cmdbuf.pushConstants(*pipeline_layout, gp_stage_flags, 0U, sizeof(push_data), &push_data); + cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, Handle()); + }; + if (set_writes.empty()) { + return; + } if (!buffer_barriers.empty()) { const auto dependencies = vk::DependencyInfo{ .dependencyFlags = vk::DependencyFlagBits::eByRegion, @@ -517,23 +423,19 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, scheduler.EndRendering(); cmdbuf.pipelineBarrier2(dependencies); } - - if (!set_writes.empty()) { - if (uses_push_descriptors) { - cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, - set_writes); - } else { - const auto desc_set = desc_heap.Commit(*desc_layout); - for (auto& set_write : set_writes) { - set_write.dstSet = desc_set; - } - instance.GetDevice().updateDescriptorSets(set_writes, {}); - cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, - desc_set, {}); - } + // Bind descriptor set. + if (uses_push_descriptors) { + cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, + set_writes); + return; } - cmdbuf.pushConstants(*pipeline_layout, gp_stage_flags, 0U, sizeof(push_data), &push_data); - cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, Handle()); + const auto desc_set = desc_heap.Commit(*desc_layout); + for (auto& set_write : set_writes) { + set_write.dstSet = desc_set; + } + instance.GetDevice().updateDescriptorSets(set_writes, {}); + cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, + desc_set, {}); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp index 61e564150..8af7ec55e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp @@ -8,10 +8,13 @@ #include "video_core/renderer_vulkan/vk_pipeline_common.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/texture_cache/texture_cache.h" +#include "video_core/buffer_cache/buffer_cache.h" namespace Vulkan { boost::container::static_vector Pipeline::image_infos; +boost::container::static_vector Pipeline::buffer_views; +boost::container::static_vector Pipeline::buffer_infos; Pipeline::Pipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_, vk::PipelineCache pipeline_cache) @@ -19,12 +22,136 @@ Pipeline::Pipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorH Pipeline::~Pipeline() = default; +void Pipeline::BindBuffers(VideoCore::BufferCache& buffer_cache, VideoCore::TextureCache& texture_cache, + const Shader::Info& stage, + Shader::Backend::Bindings& binding, Shader::PushData& push_data, + DescriptorWrites& set_writes, BufferBarriers& buffer_barriers) const { + using BufferBindingInfo = std::pair; + static boost::container::static_vector buffer_bindings; + + buffer_bindings.clear(); + buffer_infos.clear(); + + for (const auto& desc : stage.buffers) { + const auto vsharp = desc.GetSharp(stage); + if (!desc.is_gds_buffer && vsharp.base_address != 0 && vsharp.GetSize() > 0) { + const auto buffer_id = buffer_cache.FindBuffer(vsharp.base_address, vsharp.GetSize()); + buffer_bindings.emplace_back(buffer_id, vsharp); + } else { + buffer_bindings.emplace_back(VideoCore::BufferId{}, vsharp); + } + } + + using TexBufferBindingInfo = std::pair; + static boost::container::static_vector texbuffer_bindings; + + texbuffer_bindings.clear(); + buffer_views.clear(); + + for (const auto& desc : stage.texture_buffers) { + const auto vsharp = desc.GetSharp(stage); + if (vsharp.base_address != 0 && vsharp.GetSize() > 0 && + vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) { + const auto buffer_id = buffer_cache.FindBuffer(vsharp.base_address, vsharp.GetSize()); + texbuffer_bindings.emplace_back(buffer_id, vsharp); + } else { + texbuffer_bindings.emplace_back(VideoCore::BufferId{}, vsharp); + } + } + + // Second pass to re-bind buffers that were updated after binding + for (u32 i = 0; i < buffer_bindings.size(); i++) { + const auto& [buffer_id, vsharp] = buffer_bindings[i]; + const auto& desc = stage.buffers[i]; + const bool is_storage = desc.IsStorage(vsharp); + if (!buffer_id) { + if (desc.is_gds_buffer) { + const auto* gds_buf = buffer_cache.GetGdsBuffer(); + buffer_infos.emplace_back(gds_buf->Handle(), 0, gds_buf->SizeBytes()); + } else if (instance.IsNullDescriptorSupported()) { + buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE); + } else { + auto& null_buffer = buffer_cache.GetBuffer(VideoCore::NULL_BUFFER_ID); + buffer_infos.emplace_back(null_buffer.Handle(), 0, VK_WHOLE_SIZE); + } + } else { + const auto [vk_buffer, offset] = + buffer_cache.ObtainBuffer(vsharp.base_address, vsharp.GetSize(), desc.is_written, + false, buffer_id); + const u32 alignment = + is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment(); + const u32 offset_aligned = Common::AlignDown(offset, alignment); + const u32 adjust = offset - offset_aligned; + ASSERT(adjust % 4 == 0); + push_data.AddOffset(binding.buffer, adjust); + buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, vsharp.GetSize() + adjust); + } + + set_writes.push_back({ + .dstSet = VK_NULL_HANDLE, + .dstBinding = binding.unified++, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer + : vk::DescriptorType::eUniformBuffer, + .pBufferInfo = &buffer_infos.back(), + }); + ++binding.buffer; + } + + const auto null_buffer_view = + instance.IsNullDescriptorSupported() ? VK_NULL_HANDLE : buffer_cache.NullBufferView(); + for (u32 i = 0; i < texbuffer_bindings.size(); i++) { + const auto& [buffer_id, vsharp] = texbuffer_bindings[i]; + const auto& desc = stage.texture_buffers[i]; + vk::BufferView& buffer_view = buffer_views.emplace_back(null_buffer_view); + if (buffer_id) { + const u32 alignment = instance.TexelBufferMinAlignment(); + const auto [vk_buffer, offset] = + buffer_cache.ObtainBuffer(vsharp.base_address, vsharp.GetSize(), desc.is_written, true, + buffer_id); + const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3; + ASSERT_MSG(fmt_stride == vsharp.GetStride(), + "Texel buffer stride must match format stride"); + const u32 offset_aligned = Common::AlignDown(offset, alignment); + const u32 adjust = offset - offset_aligned; + ASSERT(adjust % fmt_stride == 0); + push_data.AddOffset(binding.buffer, adjust / fmt_stride); + buffer_view = vk_buffer->View(offset_aligned, vsharp.GetSize() + adjust, desc.is_written, + vsharp.GetDataFmt(), vsharp.GetNumberFmt()); + if (auto barrier = + vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite + : vk::AccessFlagBits2::eShaderRead, + vk::PipelineStageFlagBits2::eComputeShader)) { + buffer_barriers.emplace_back(*barrier); + } + if (desc.is_written) { + texture_cache.InvalidateMemoryFromGPU(vsharp.base_address, vsharp.GetSize()); + } + } + + set_writes.push_back({ + .dstSet = VK_NULL_HANDLE, + .dstBinding = binding.unified++, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer + : vk::DescriptorType::eUniformTexelBuffer, + .pTexelBufferView = &buffer_view, + }); + ++binding.buffer; + } +} + void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage, Shader::Backend::Bindings& binding, DescriptorWrites& set_writes) const { using ImageBindingInfo = std::tuple; - boost::container::static_vector image_bindings; + static boost::container::static_vector image_bindings; + + image_bindings.clear(); + image_infos.clear(); for (const auto& image_desc : stage.images) { const auto tsharp = image_desc.GetSharp(stage); @@ -76,6 +203,26 @@ void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader .pImageInfo = &image_infos.back(), }); } + + for (const auto& sampler : stage.samplers) { + auto ssharp = sampler.GetSharp(stage); + if (sampler.disable_aniso) { + const auto& tsharp = stage.images[sampler.associated_image].GetSharp(stage); + if (tsharp.base_level == 0 && tsharp.last_level == 0) { + ssharp.max_aniso.Assign(AmdGpu::AnisoRatio::One); + } + } + const auto vk_sampler = texture_cache.GetSampler(ssharp); + image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); + set_writes.push_back({ + .dstSet = VK_NULL_HANDLE, + .dstBinding = binding.unified++, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eSampler, + .pImageInfo = &image_infos.back(), + }); + } } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.h b/src/video_core/renderer_vulkan/vk_pipeline_common.h index ab99e7b33..75764bfa6 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_common.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.h @@ -33,6 +33,13 @@ public: } using DescriptorWrites = boost::container::small_vector; + using BufferBarriers = boost::container::small_vector; + + void BindBuffers(VideoCore::BufferCache& buffer_cache, VideoCore::TextureCache& texture_cache, + const Shader::Info& stage, Shader::Backend::Bindings& binding, + Shader::PushData& push_data, DescriptorWrites& set_writes, + BufferBarriers& buffer_barriers) const; + void BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage, Shader::Backend::Bindings& binding, DescriptorWrites& set_writes) const; @@ -44,6 +51,8 @@ protected: vk::UniquePipelineLayout pipeline_layout; vk::UniqueDescriptorSetLayout desc_layout; static boost::container::static_vector image_infos; + static boost::container::static_vector buffer_views; + static boost::container::static_vector buffer_infos; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index 0eb7e0759..25b62039f 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -1,6 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later - +#pragma clang optimize off // Include the vulkan platform specific header #if defined(ANDROID) #define VK_USE_PLATFORM_ANDROID_KHR diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index b3c42fcb6..88e91696a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -98,10 +98,9 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr address, u32 offset, u32 si const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); buffer_cache.BindVertexBuffers(vs_info); - const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, 0); + buffer_cache.BindIndexBuffer(is_indexed, 0); - const auto [buffer, base] = buffer_cache.ObtainBuffer(address, size, true); - const auto total_offset = base + offset; + const auto [buffer, base] = buffer_cache.ObtainBuffer(address + offset, size, false); BeginRendering(*pipeline); UpdateDynamicState(*pipeline); @@ -110,9 +109,9 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr address, u32 offset, u32 si // instance offsets will be automatically applied by Vulkan from indirect args buffer. if (is_indexed) { - cmdbuf.drawIndexedIndirect(buffer->Handle(), total_offset, 1, 0); + cmdbuf.drawIndexedIndirect(buffer->Handle(), base, 1, 0); } else { - cmdbuf.drawIndirect(buffer->Handle(), total_offset, 1, 0); + cmdbuf.drawIndirect(buffer->Handle(), base, 1, 0); } } @@ -161,9 +160,8 @@ void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) { scheduler.EndRendering(); cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle()); - const auto [buffer, base] = buffer_cache.ObtainBuffer(address, size, true); - const auto total_offset = base + offset; - cmdbuf.dispatchIndirect(buffer->Handle(), total_offset); + const auto [buffer, base] = buffer_cache.ObtainBuffer(address + offset, size, false); + cmdbuf.dispatchIndirect(buffer->Handle(), base); } u64 Rasterizer::Flush() { diff --git a/src/video_core/texture_cache/sampler.cpp b/src/video_core/texture_cache/sampler.cpp index 179dd6646..e47f53abf 100644 --- a/src/video_core/texture_cache/sampler.cpp +++ b/src/video_core/texture_cache/sampler.cpp @@ -8,6 +8,9 @@ namespace VideoCore { Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sampler) { + if (sampler.force_degamma) { + LOG_WARNING(Render_Vulkan, "Texture requires gamma correction"); + } using namespace Vulkan; const vk::SamplerCreateInfo sampler_ci = { .magFilter = LiverpoolToVK::Filter(sampler.xy_mag_filter), diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 4813a3c57..a7734d6a4 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -417,7 +417,7 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule const VAddr image_addr = image.info.guest_address; const size_t image_size = image.info.guest_size_bytes; - const auto [vk_buffer, buf_offset] = buffer_cache.ObtainTempBuffer(image_addr, image_size); + const auto [vk_buffer, buf_offset] = buffer_cache.ObtainViewBuffer(image_addr, image_size); // The obtained buffer may be written by a shader so we need to emit a barrier to prevent RAW // hazard if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead,