From f4966ba4afb8f0369e4edf1588527dff0937f980 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Fri, 6 Sep 2024 18:52:18 +0300 Subject: [PATCH] buffer_cache: Simplify invalidation scheme --- src/video_core/buffer_cache/buffer_cache.cpp | 89 ++++++++----------- .../renderer_vulkan/vk_compute_pipeline.cpp | 5 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 - .../texture_cache/texture_cache.cpp | 24 +++-- src/video_core/texture_cache/texture_cache.h | 3 + 5 files changed, 58 insertions(+), 66 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index fa9a8dde5..d0f133ebd 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -552,64 +552,45 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, } bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) { - boost::container::small_vector image_ids; - const u32 inv_size = std::min(size, MaxInvalidateDist); - texture_cache.ForEachImageInRegion(device_addr, inv_size, [&](ImageId image_id, Image& image) { - // Only consider GPU modified images, i.e render targets or storage images. - // Also avoid any CPU modified images as the image data is likely to be stale. - if (True(image.flags & ImageFlagBits::CpuModified) || - False(image.flags & ImageFlagBits::GpuModified)) { - return; - } - // Image must fully overlap with the provided buffer range. - if (image.cpu_addr < device_addr || image.cpu_addr_end > device_addr + size) { - return; - } - image_ids.push_back(image_id); - }); - if (image_ids.empty()) { + static constexpr FindFlags find_flags = FindFlags::NoCreate | FindFlags::RelaxDim | + FindFlags::RelaxFmt | FindFlags::RelaxSize; + ImageInfo info{}; + info.guest_address = device_addr; + info.guest_size_bytes = size; + const ImageId image_id = texture_cache.FindImage(info, find_flags); + if (!image_id) { return false; } - // Sort images by modification tick. If there are overlaps we want to - // copy from least to most recently modified. - std::ranges::sort(image_ids, [&](ImageId lhs_id, ImageId rhs_id) { - const Image& lhs = texture_cache.GetImage(lhs_id); - const Image& rhs = texture_cache.GetImage(rhs_id); - return lhs.tick_accessed_last < rhs.tick_accessed_last; - }); + Image& image = texture_cache.GetImage(image_id); boost::container::small_vector copies; - for (const ImageId image_id : image_ids) { - copies.clear(); - Image& image = texture_cache.GetImage(image_id); - u32 offset = buffer.Offset(image.cpu_addr); - const u32 num_layers = image.info.resources.layers; - for (u32 m = 0; m < image.info.resources.levels; m++) { - const u32 width = std::max(image.info.size.width >> m, 1u); - const u32 height = std::max(image.info.size.height >> m, 1u); - const u32 depth = - image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; - const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; - copies.push_back({ - .bufferOffset = offset, - .bufferRowLength = static_cast(mip_pitch), - .bufferImageHeight = static_cast(mip_height), - .imageSubresource{ - .aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil, - .mipLevel = m, - .baseArrayLayer = 0, - .layerCount = num_layers, - }, - .imageOffset = {0, 0, 0}, - .imageExtent = {width, height, depth}, - }); - offset += mip_ofs * num_layers; - } - scheduler.EndRendering(); - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead); - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, - copies); + u32 offset = buffer.Offset(image.cpu_addr); + const u32 num_layers = image.info.resources.layers; + for (u32 m = 0; m < image.info.resources.levels; m++) { + const u32 width = std::max(image.info.size.width >> m, 1u); + const u32 height = std::max(image.info.size.height >> m, 1u); + const u32 depth = + image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; + const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; + copies.push_back({ + .bufferOffset = offset, + .bufferRowLength = static_cast(mip_pitch), + .bufferImageHeight = static_cast(mip_height), + .imageSubresource{ + .aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil, + .mipLevel = m, + .baseArrayLayer = 0, + .layerCount = num_layers, + }, + .imageOffset = {0, 0, 0}, + .imageExtent = {width, height, depth}, + }); + offset += mip_ofs * num_layers; } + scheduler.EndRendering(); + image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead); + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, + copies); return true; } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index fde4c3ab5..aeae08138 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -133,9 +133,6 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, } } const u32 size = vsharp.GetSize(); - if (desc.is_written) { - texture_cache.InvalidateMemory(address, size); - } const u32 alignment = is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment(); const auto [vk_buffer, offset] = @@ -196,7 +193,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, buffer_barriers.emplace_back(*barrier); } if (desc.is_written) { - texture_cache.InvalidateMemory(address, size); + texture_cache.MarkWritten(address, size); } } set_writes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 6ac4dcf14..18f262bc0 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -431,9 +431,6 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, dst_access, vk::PipelineStageFlagBits2::eVertexShader)) { buffer_barriers.emplace_back(*barrier); } - if (desc.is_written) { - texture_cache.InvalidateMemory(address, size); - } } set_writes.push_back({ .dstSet = VK_NULL_HANDLE, diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 996fcad04..fa5e2dc16 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -51,6 +51,20 @@ void TextureCache::InvalidateMemory(VAddr address, size_t size) { }); } +void TextureCache::MarkWritten(VAddr address, size_t max_size) { + static constexpr FindFlags find_flags = FindFlags::NoCreate | FindFlags::RelaxDim | + FindFlags::RelaxFmt | FindFlags::RelaxSize; + ImageInfo info{}; + info.guest_address = address; + info.guest_size_bytes = max_size; + const ImageId image_id = FindImage(info, find_flags); + if (!image_id) { + return; + } + // Ensure image is copied when accessed again. + slot_images[image_id].flags |= ImageFlagBits::CpuModified; +} + void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) { std::scoped_lock lk{mutex}; @@ -199,10 +213,14 @@ ImageId TextureCache::FindImage(const ImageInfo& info, FindFlags flags) { !IsVulkanFormatCompatible(info.pixel_format, cache_image.info.pixel_format)) { continue; } - ASSERT(cache_image.info.type == info.type); + ASSERT(cache_image.info.type == info.type || True(flags & FindFlags::RelaxFmt)); image_id = cache_id; } + if (True(flags & FindFlags::NoCreate) && !image_id) { + return {}; + } + // Try to resolve overlaps (if any) if (!image_id) { for (const auto& cache_id : image_ids) { @@ -211,10 +229,6 @@ ImageId TextureCache::FindImage(const ImageInfo& info, FindFlags flags) { } } - if (True(flags & FindFlags::NoCreate) && !image_id) { - return {}; - } - // Create and register a new image if (!image_id) { image_id = slot_images.insert(instance, scheduler, info); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 44bc2b431..cc19ac4a8 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -50,6 +50,9 @@ public: /// Invalidates any image in the logical page range. void InvalidateMemory(VAddr address, size_t size); + /// Marks an image as dirty if it exists at the provided address. + void MarkWritten(VAddr address, size_t max_size); + /// Evicts any images that overlap the unmapped range. void UnmapMemory(VAddr cpu_addr, size_t size);