diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 85bfeb1a1..3f307c51b 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -655,8 +655,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spansrc_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Gds) { - rasterizer->InlineData(dma_data->dst_addr_lo, &dma_data->data, sizeof(u32), - true); + rasterizer->FillBuffer(dma_data->dst_addr_lo, dma_data->NumBytes(), + dma_data->data, true); } else if ((dma_data->src_sel == DmaDataSrc::Memory || dma_data->src_sel == DmaDataSrc::MemoryUsingL2) && dma_data->dst_sel == DmaDataDst::Gds) { @@ -665,8 +665,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spansrc_sel == DmaDataSrc::Data && (dma_data->dst_sel == DmaDataDst::Memory || dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) { - rasterizer->InlineData(dma_data->DstAddress(), &dma_data->data, - sizeof(u32), false); + rasterizer->FillBuffer(dma_data->DstAddress(), dma_data->NumBytes(), + dma_data->data, false); } else if (dma_data->src_sel == DmaDataSrc::Gds && (dma_data->dst_sel == DmaDataDst::Memory || dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) { @@ -898,7 +898,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { break; } if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Gds) { - rasterizer->InlineData(dma_data->dst_addr_lo, &dma_data->data, sizeof(u32), true); + rasterizer->FillBuffer(dma_data->dst_addr_lo, dma_data->NumBytes(), dma_data->data, + true); } else if ((dma_data->src_sel == DmaDataSrc::Memory || dma_data->src_sel == DmaDataSrc::MemoryUsingL2) && dma_data->dst_sel == DmaDataDst::Gds) { @@ -907,8 +908,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { } else if (dma_data->src_sel == DmaDataSrc::Data && (dma_data->dst_sel == DmaDataDst::Memory || dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) { - rasterizer->InlineData(dma_data->DstAddress(), &dma_data->data, sizeof(u32), - false); + rasterizer->FillBuffer(dma_data->DstAddress(), dma_data->NumBytes(), + dma_data->data, false); } else if (dma_data->src_sel == DmaDataSrc::Gds && (dma_data->dst_sel == DmaDataDst::Memory || dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) { diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index ac3fac5b1..7347e99a2 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -261,14 +261,13 @@ void BufferCache::BindIndexBuffer(u32 index_offset) { cmdbuf.bindIndexBuffer(vk_buffer->Handle(), offset, index_type); } -void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) { +void BufferCache::FillBuffer(VAddr address, u32 num_bytes, u32 value, bool is_gds) { ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned"); if (!is_gds) { - if (!memory->TryWriteBacking(std::bit_cast(address), value, num_bytes)) { - std::memcpy(std::bit_cast(address), value, num_bytes); - return; - } - if (!IsRegionRegistered(address, num_bytes)) { + texture_cache.ClearMeta(address); + if (!IsRegionGpuModified(address, num_bytes)) { + u32* buffer = std::bit_cast(address); + std::fill(buffer, buffer + num_bytes / sizeof(u32), value); return; } } @@ -276,10 +275,10 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo if (is_gds) { return &gds_buffer; } - const BufferId buffer_id = FindBuffer(address, num_bytes); - return &slot_buffers[buffer_id]; + const auto [buffer, offset] = ObtainBuffer(address, num_bytes, true); + return buffer; }(); - InlineDataBuffer(*buffer, address, value, num_bytes); + buffer->Fill(buffer->Offset(address), num_bytes, value); } void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) { @@ -778,49 +777,6 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) { }); } -void BufferCache::InlineDataBuffer(Buffer& buffer, VAddr address, const void* value, - u32 num_bytes) { - scheduler.EndRendering(); - const auto cmdbuf = scheduler.CommandBuffer(); - const vk::BufferMemoryBarrier2 pre_barrier = { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, - .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, - .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, - .buffer = buffer.Handle(), - .offset = buffer.Offset(address), - .size = num_bytes, - }; - const vk::BufferMemoryBarrier2 post_barrier = { - .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, - .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, - .buffer = buffer.Handle(), - .offset = buffer.Offset(address), - .size = num_bytes, - }; - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &pre_barrier, - }); - // vkCmdUpdateBuffer can only copy up to 65536 bytes at a time. - static constexpr u32 UpdateBufferMaxSize = 65536; - const auto dst_offset = buffer.Offset(address); - for (u32 offset = 0; offset < num_bytes; offset += UpdateBufferMaxSize) { - const auto* update_src = static_cast(value) + offset; - const auto update_dst = dst_offset + offset; - const auto update_size = std::min(num_bytes - offset, UpdateBufferMaxSize); - cmdbuf.updateBuffer(buffer.Handle(), update_dst, update_size, update_src); - } - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &post_barrier, - }); -} - void BufferCache::WriteDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes) { vk::BufferCopy copy = { .srcOffset = 0, diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6954f979e..73d70704e 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -118,7 +118,7 @@ public: void BindIndexBuffer(u32 index_offset); /// Writes a value to GPU buffer. (uses command buffer to temporarily store the data) - void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); + void FillBuffer(VAddr address, u32 num_bytes, u32 value, bool is_gds); /// Performs buffer to buffer data copy on the GPU. void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds); @@ -193,8 +193,6 @@ private: bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size); - void InlineDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes); - void WriteDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes); void TouchBuffer(const Buffer& buffer); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 8d00ff2d0..214d6d697 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -976,8 +976,8 @@ void Rasterizer::DepthStencilCopy(bool is_depth, bool is_stencil) { ScopeMarkerEnd(); } -void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) { - buffer_cache.InlineData(address, value, num_bytes, is_gds); +void Rasterizer::FillBuffer(VAddr address, u32 num_bytes, u32 value, bool is_gds) { + buffer_cache.FillBuffer(address, num_bytes, value, is_gds); } void Rasterizer::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 96a3c95e8..c73626f3f 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -55,7 +55,7 @@ public: void ScopedMarkerInsertColor(const std::string_view& str, const u32 color, bool from_guest = false); - void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); + void FillBuffer(VAddr address, u32 num_bytes, u32 value, bool is_gds); void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds); u32 ReadDataFromGds(u32 gsd_offset); bool InvalidateMemory(VAddr addr, u64 size);