diff --git a/src/video_core/buffer_cache/buffer.cpp b/src/video_core/buffer_cache/buffer.cpp index e85a6eb18..5ff80facd 100644 --- a/src/video_core/buffer_cache/buffer.cpp +++ b/src/video_core/buffer_cache/buffer.cpp @@ -124,6 +124,42 @@ Buffer::Buffer(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; } +void Buffer::Fill(u64 offset, u32 num_bytes, u32 value) { + scheduler->EndRendering(); + ASSERT_MSG(offset % 4 == 0 && num_bytes % 4 == 0, + "FillBuffer size must be a multiple of 4 bytes"); + const auto cmdbuf = scheduler->CommandBuffer(); + const vk::BufferMemoryBarrier2 pre_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = buffer, + .offset = offset, + .size = num_bytes, + }; + const vk::BufferMemoryBarrier2 post_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, + .buffer = buffer, + .offset = offset, + .size = num_bytes, + }; + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &pre_barrier, + }); + cmdbuf.fillBuffer(buffer, offset, num_bytes, value); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &post_barrier, + }); +} + constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; diff --git a/src/video_core/buffer_cache/buffer.h b/src/video_core/buffer_cache/buffer.h index b02f8c181..1f661ba13 100644 --- a/src/video_core/buffer_cache/buffer.h +++ b/src/video_core/buffer_cache/buffer.h @@ -83,29 +83,24 @@ public: Buffer& operator=(Buffer&&) = default; Buffer(Buffer&&) = default; - /// Increases the likeliness of this being a stream buffer void IncreaseStreamScore(int score) noexcept { stream_score += score; } - /// Returns the likeliness of this being a stream buffer [[nodiscard]] int StreamScore() const noexcept { return stream_score; } - /// Returns true when vaddr -> vaddr+size is fully contained in the buffer [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept { return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes(); } - /// Returns the base CPU address of the buffer [[nodiscard]] VAddr CpuAddr() const noexcept { return cpu_addr; } - /// Returns the offset relative to the given CPU address - [[nodiscard]] u32 Offset(VAddr other_cpu_addr) const noexcept { - return static_cast(other_cpu_addr - cpu_addr); + [[nodiscard]] u64 Offset(VAddr other_cpu_addr) const noexcept { + return other_cpu_addr - cpu_addr; } size_t SizeBytes() const { @@ -129,16 +124,16 @@ public: return buffer.bda_addr; } - std::optional GetBarrier( - vk::Flags dst_acess_mask, vk::PipelineStageFlagBits2 dst_stage, - u32 offset = 0) { + std::optional GetBarrier(vk::AccessFlags2 dst_acess_mask, + vk::PipelineStageFlagBits2 dst_stage, + u32 offset = 0) { if (dst_acess_mask == access_mask && stage == dst_stage) { return {}; } DEBUG_ASSERT(offset < size_bytes); - auto barrier = vk::BufferMemoryBarrier2{ + const auto barrier = vk::BufferMemoryBarrier2{ .srcStageMask = stage, .srcAccessMask = access_mask, .dstStageMask = dst_stage, @@ -152,6 +147,8 @@ public: return barrier; } + void Fill(u64 offset, u32 num_bytes, u32 value); + public: VAddr cpu_addr = 0; bool is_picked{}; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 6fabd5d10..bc19c97e1 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -364,53 +364,28 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo InlineDataBuffer(*buffer, address, value, num_bytes); } -void BufferCache::WriteData(VAddr address, const void* value, u32 num_bytes, bool is_gds) { - ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned"); - if (!is_gds && !IsRegionRegistered(address, num_bytes)) { - memcpy(std::bit_cast(address), value, num_bytes); - return; - } - Buffer* buffer = [&] { - if (is_gds) { - return &gds_buffer; - } - const BufferId buffer_id = FindBuffer(address, num_bytes); - return &slot_buffers[buffer_id]; - }(); - WriteDataBuffer(*buffer, address, value, num_bytes); -} - void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) { - if (!dst_gds && !IsRegionGpuModified(dst, num_bytes)) { - if (!src_gds && !IsRegionGpuModified(src, num_bytes)) { - // Both buffers were not transferred to GPU yet. Can safely copy in host memory. - memcpy(std::bit_cast(dst), std::bit_cast(src), num_bytes); - return; - } - // Without a readback there's nothing we can do with this - // Fallback to creating dst buffer on GPU to at least have this data there - } + texture_cache.InvalidateMemoryFromGPU(dst, num_bytes); auto& src_buffer = [&] -> const Buffer& { if (src_gds) { return gds_buffer; } - // Avoid using ObtainBuffer here as that might give us the stream buffer. - const BufferId buffer_id = FindBuffer(src, num_bytes); + const auto buffer_id = FindBuffer(src, num_bytes); auto& buffer = slot_buffers[buffer_id]; - if (SynchronizeBuffer(buffer, src, num_bytes, false, true)) { - texture_cache.InvalidateMemoryFromGPU(dst, num_bytes); - } + SynchronizeBuffer(buffer, src, num_bytes, false, true); return buffer; }(); auto& dst_buffer = [&] -> const Buffer& { if (dst_gds) { return gds_buffer; } - // Prefer using ObtainBuffer here as that will auto-mark the region as GPU modified. - const auto [buffer, offset] = ObtainBuffer(dst, num_bytes, true); - return *buffer; + const auto buffer_id = FindBuffer(dst, num_bytes); + auto& buffer = slot_buffers[buffer_id]; + SynchronizeBuffer(buffer, dst, num_bytes, true, true); + gpu_modified_ranges.Add(dst, num_bytes); + return buffer; }(); - vk::BufferCopy region{ + const vk::BufferCopy region = { .srcOffset = src_buffer.Offset(src), .dstOffset = dst_buffer.Offset(dst), .size = num_bytes, @@ -680,8 +655,6 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) { auto& new_buffer = slot_buffers[new_buffer_id]; const size_t size_bytes = new_buffer.SizeBytes(); const auto cmdbuf = scheduler.CommandBuffer(); - scheduler.EndRendering(); - cmdbuf.fillBuffer(new_buffer.buffer, 0, size_bytes, 0); for (const BufferId overlap_id : overlap.ids) { JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); } @@ -851,8 +824,8 @@ void BufferCache::ChangeRegister(BufferId buffer_id) { } else { total_used_memory -= Common::AlignUp(size, CACHING_PAGESIZE); lru_cache.Free(buffer.LRUId()); - FillBuffer(bda_pagetable_buffer, page_begin * sizeof(vk::DeviceAddress), - size_pages * sizeof(vk::DeviceAddress), 0); + const u64 offset = bda_pagetable_buffer.Offset(page_begin * sizeof(vk::DeviceAddress)); + bda_pagetable_buffer.Fill(offset, size_pages * sizeof(vk::DeviceAddress), 0); buffer_ranges.Subtract(buffer.CpuAddr(), buffer.SizeBytes()); } } @@ -1004,10 +977,6 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) { } void BufferCache::MemoryBarrier() { - // Vulkan doesn't know which buffer we access in a shader if we use - // BufferDeviceAddress. We need a full memory barrier. - // For now, we only read memory using BDA. If we want to write to it, - // we might need to change this. scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); vk::MemoryBarrier2 barrier = { @@ -1121,41 +1090,6 @@ void BufferCache::WriteDataBuffer(Buffer& buffer, VAddr address, const void* val }); } -void BufferCache::FillBuffer(Buffer& buffer, VAddr address, u32 num_bytes, u32 value) { - scheduler.EndRendering(); - ASSERT_MSG(num_bytes % 4 == 0, "FillBuffer size must be a multiple of 4 bytes"); - const auto cmdbuf = scheduler.CommandBuffer(); - const vk::BufferMemoryBarrier2 pre_barrier = { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, - .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, - .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, - .buffer = buffer.Handle(), - .offset = buffer.Offset(address), - .size = num_bytes, - }; - const vk::BufferMemoryBarrier2 post_barrier = { - .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, - .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, - .buffer = buffer.Handle(), - .offset = buffer.Offset(address), - .size = num_bytes, - }; - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &pre_barrier, - }); - cmdbuf.fillBuffer(buffer.Handle(), buffer.Offset(address), num_bytes, value); - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &post_barrier, - }); -} - void BufferCache::RunGarbageCollector() { SCOPE_EXIT { ++gc_tick; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index aecc97db0..ccf77b4f5 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -128,9 +128,6 @@ public: /// Writes a value to GPU buffer. (uses command buffer to temporarily store the data) void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); - /// Writes a value to GPU buffer. (uses staging buffer to temporarily store the data) - void WriteData(VAddr address, const void* value, u32 num_bytes, bool is_gds); - /// Performs buffer to buffer data copy on the GPU. void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds); @@ -211,8 +208,6 @@ private: void WriteDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes); - void FillBuffer(Buffer& buffer, VAddr address, u32 num_bytes, u32 value); - void TouchBuffer(const Buffer& buffer); void DeleteBuffer(BufferId buffer_id);