video_core: Reimplement inline data as buffer fill (#3825)

This commit is contained in:
TheTurtle
2025-11-24 23:25:22 +02:00
committed by GitHub
parent 2577dfde7e
commit 14d71a155a
5 changed files with 20 additions and 65 deletions

View File

@@ -655,8 +655,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
break;
}
if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Gds) {
rasterizer->InlineData(dma_data->dst_addr_lo, &dma_data->data, sizeof(u32),
true);
rasterizer->FillBuffer(dma_data->dst_addr_lo, dma_data->NumBytes(),
dma_data->data, true);
} else if ((dma_data->src_sel == DmaDataSrc::Memory ||
dma_data->src_sel == DmaDataSrc::MemoryUsingL2) &&
dma_data->dst_sel == DmaDataDst::Gds) {
@@ -665,8 +665,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
} else if (dma_data->src_sel == DmaDataSrc::Data &&
(dma_data->dst_sel == DmaDataDst::Memory ||
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
rasterizer->InlineData(dma_data->DstAddress<VAddr>(), &dma_data->data,
sizeof(u32), false);
rasterizer->FillBuffer(dma_data->DstAddress<VAddr>(), dma_data->NumBytes(),
dma_data->data, false);
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
(dma_data->dst_sel == DmaDataDst::Memory ||
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
@@ -898,7 +898,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
break;
}
if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Gds) {
rasterizer->InlineData(dma_data->dst_addr_lo, &dma_data->data, sizeof(u32), true);
rasterizer->FillBuffer(dma_data->dst_addr_lo, dma_data->NumBytes(), dma_data->data,
true);
} else if ((dma_data->src_sel == DmaDataSrc::Memory ||
dma_data->src_sel == DmaDataSrc::MemoryUsingL2) &&
dma_data->dst_sel == DmaDataDst::Gds) {
@@ -907,8 +908,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
} else if (dma_data->src_sel == DmaDataSrc::Data &&
(dma_data->dst_sel == DmaDataDst::Memory ||
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
rasterizer->InlineData(dma_data->DstAddress<VAddr>(), &dma_data->data, sizeof(u32),
false);
rasterizer->FillBuffer(dma_data->DstAddress<VAddr>(), dma_data->NumBytes(),
dma_data->data, false);
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
(dma_data->dst_sel == DmaDataDst::Memory ||
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {

View File

@@ -261,14 +261,13 @@ void BufferCache::BindIndexBuffer(u32 index_offset) {
cmdbuf.bindIndexBuffer(vk_buffer->Handle(), offset, index_type);
}
void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
void BufferCache::FillBuffer(VAddr address, u32 num_bytes, u32 value, bool is_gds) {
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
if (!is_gds) {
if (!memory->TryWriteBacking(std::bit_cast<void*>(address), value, num_bytes)) {
std::memcpy(std::bit_cast<void*>(address), value, num_bytes);
return;
}
if (!IsRegionRegistered(address, num_bytes)) {
texture_cache.ClearMeta(address);
if (!IsRegionGpuModified(address, num_bytes)) {
u32* buffer = std::bit_cast<u32*>(address);
std::fill(buffer, buffer + num_bytes / sizeof(u32), value);
return;
}
}
@@ -276,10 +275,10 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
if (is_gds) {
return &gds_buffer;
}
const BufferId buffer_id = FindBuffer(address, num_bytes);
return &slot_buffers[buffer_id];
const auto [buffer, offset] = ObtainBuffer(address, num_bytes, true);
return buffer;
}();
InlineDataBuffer(*buffer, address, value, num_bytes);
buffer->Fill(buffer->Offset(address), num_bytes, value);
}
void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) {
@@ -778,49 +777,6 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
});
}
void BufferCache::InlineDataBuffer(Buffer& buffer, VAddr address, const void* value,
u32 num_bytes) {
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
const vk::BufferMemoryBarrier2 pre_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = buffer.Handle(),
.offset = buffer.Offset(address),
.size = num_bytes,
};
const vk::BufferMemoryBarrier2 post_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
.buffer = buffer.Handle(),
.offset = buffer.Offset(address),
.size = num_bytes,
};
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &pre_barrier,
});
// vkCmdUpdateBuffer can only copy up to 65536 bytes at a time.
static constexpr u32 UpdateBufferMaxSize = 65536;
const auto dst_offset = buffer.Offset(address);
for (u32 offset = 0; offset < num_bytes; offset += UpdateBufferMaxSize) {
const auto* update_src = static_cast<const u8*>(value) + offset;
const auto update_dst = dst_offset + offset;
const auto update_size = std::min(num_bytes - offset, UpdateBufferMaxSize);
cmdbuf.updateBuffer(buffer.Handle(), update_dst, update_size, update_src);
}
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &post_barrier,
});
}
void BufferCache::WriteDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes) {
vk::BufferCopy copy = {
.srcOffset = 0,

View File

@@ -118,7 +118,7 @@ public:
void BindIndexBuffer(u32 index_offset);
/// Writes a value to GPU buffer. (uses command buffer to temporarily store the data)
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
void FillBuffer(VAddr address, u32 num_bytes, u32 value, bool is_gds);
/// Performs buffer to buffer data copy on the GPU.
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
@@ -193,8 +193,6 @@ private:
bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size);
void InlineDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes);
void WriteDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes);
void TouchBuffer(const Buffer& buffer);

View File

@@ -976,8 +976,8 @@ void Rasterizer::DepthStencilCopy(bool is_depth, bool is_stencil) {
ScopeMarkerEnd();
}
void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
buffer_cache.InlineData(address, value, num_bytes, is_gds);
void Rasterizer::FillBuffer(VAddr address, u32 num_bytes, u32 value, bool is_gds) {
buffer_cache.FillBuffer(address, num_bytes, value, is_gds);
}
void Rasterizer::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) {

View File

@@ -55,7 +55,7 @@ public:
void ScopedMarkerInsertColor(const std::string_view& str, const u32 color,
bool from_guest = false);
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
void FillBuffer(VAddr address, u32 num_bytes, u32 value, bool is_gds);
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
u32 ReadDataFromGds(u32 gsd_offset);
bool InvalidateMemory(VAddr addr, u64 size);