mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-08 20:58:41 +00:00
video_core: Reimplement inline data as buffer fill (#3825)
This commit is contained in:
@@ -655,8 +655,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Gds) {
|
if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Gds) {
|
||||||
rasterizer->InlineData(dma_data->dst_addr_lo, &dma_data->data, sizeof(u32),
|
rasterizer->FillBuffer(dma_data->dst_addr_lo, dma_data->NumBytes(),
|
||||||
true);
|
dma_data->data, true);
|
||||||
} else if ((dma_data->src_sel == DmaDataSrc::Memory ||
|
} else if ((dma_data->src_sel == DmaDataSrc::Memory ||
|
||||||
dma_data->src_sel == DmaDataSrc::MemoryUsingL2) &&
|
dma_data->src_sel == DmaDataSrc::MemoryUsingL2) &&
|
||||||
dma_data->dst_sel == DmaDataDst::Gds) {
|
dma_data->dst_sel == DmaDataDst::Gds) {
|
||||||
@@ -665,8 +665,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||||||
} else if (dma_data->src_sel == DmaDataSrc::Data &&
|
} else if (dma_data->src_sel == DmaDataSrc::Data &&
|
||||||
(dma_data->dst_sel == DmaDataDst::Memory ||
|
(dma_data->dst_sel == DmaDataDst::Memory ||
|
||||||
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
||||||
rasterizer->InlineData(dma_data->DstAddress<VAddr>(), &dma_data->data,
|
rasterizer->FillBuffer(dma_data->DstAddress<VAddr>(), dma_data->NumBytes(),
|
||||||
sizeof(u32), false);
|
dma_data->data, false);
|
||||||
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
|
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
|
||||||
(dma_data->dst_sel == DmaDataDst::Memory ||
|
(dma_data->dst_sel == DmaDataDst::Memory ||
|
||||||
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
||||||
@@ -898,7 +898,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Gds) {
|
if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Gds) {
|
||||||
rasterizer->InlineData(dma_data->dst_addr_lo, &dma_data->data, sizeof(u32), true);
|
rasterizer->FillBuffer(dma_data->dst_addr_lo, dma_data->NumBytes(), dma_data->data,
|
||||||
|
true);
|
||||||
} else if ((dma_data->src_sel == DmaDataSrc::Memory ||
|
} else if ((dma_data->src_sel == DmaDataSrc::Memory ||
|
||||||
dma_data->src_sel == DmaDataSrc::MemoryUsingL2) &&
|
dma_data->src_sel == DmaDataSrc::MemoryUsingL2) &&
|
||||||
dma_data->dst_sel == DmaDataDst::Gds) {
|
dma_data->dst_sel == DmaDataDst::Gds) {
|
||||||
@@ -907,8 +908,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
|
|||||||
} else if (dma_data->src_sel == DmaDataSrc::Data &&
|
} else if (dma_data->src_sel == DmaDataSrc::Data &&
|
||||||
(dma_data->dst_sel == DmaDataDst::Memory ||
|
(dma_data->dst_sel == DmaDataDst::Memory ||
|
||||||
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
||||||
rasterizer->InlineData(dma_data->DstAddress<VAddr>(), &dma_data->data, sizeof(u32),
|
rasterizer->FillBuffer(dma_data->DstAddress<VAddr>(), dma_data->NumBytes(),
|
||||||
false);
|
dma_data->data, false);
|
||||||
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
|
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
|
||||||
(dma_data->dst_sel == DmaDataDst::Memory ||
|
(dma_data->dst_sel == DmaDataDst::Memory ||
|
||||||
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
|
||||||
|
|||||||
@@ -261,14 +261,13 @@ void BufferCache::BindIndexBuffer(u32 index_offset) {
|
|||||||
cmdbuf.bindIndexBuffer(vk_buffer->Handle(), offset, index_type);
|
cmdbuf.bindIndexBuffer(vk_buffer->Handle(), offset, index_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
|
void BufferCache::FillBuffer(VAddr address, u32 num_bytes, u32 value, bool is_gds) {
|
||||||
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
|
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
|
||||||
if (!is_gds) {
|
if (!is_gds) {
|
||||||
if (!memory->TryWriteBacking(std::bit_cast<void*>(address), value, num_bytes)) {
|
texture_cache.ClearMeta(address);
|
||||||
std::memcpy(std::bit_cast<void*>(address), value, num_bytes);
|
if (!IsRegionGpuModified(address, num_bytes)) {
|
||||||
return;
|
u32* buffer = std::bit_cast<u32*>(address);
|
||||||
}
|
std::fill(buffer, buffer + num_bytes / sizeof(u32), value);
|
||||||
if (!IsRegionRegistered(address, num_bytes)) {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -276,10 +275,10 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
|
|||||||
if (is_gds) {
|
if (is_gds) {
|
||||||
return &gds_buffer;
|
return &gds_buffer;
|
||||||
}
|
}
|
||||||
const BufferId buffer_id = FindBuffer(address, num_bytes);
|
const auto [buffer, offset] = ObtainBuffer(address, num_bytes, true);
|
||||||
return &slot_buffers[buffer_id];
|
return buffer;
|
||||||
}();
|
}();
|
||||||
InlineDataBuffer(*buffer, address, value, num_bytes);
|
buffer->Fill(buffer->Offset(address), num_bytes, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) {
|
void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) {
|
||||||
@@ -778,49 +777,6 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferCache::InlineDataBuffer(Buffer& buffer, VAddr address, const void* value,
|
|
||||||
u32 num_bytes) {
|
|
||||||
scheduler.EndRendering();
|
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
|
||||||
const vk::BufferMemoryBarrier2 pre_barrier = {
|
|
||||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
|
||||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
|
||||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
|
||||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
|
||||||
.buffer = buffer.Handle(),
|
|
||||||
.offset = buffer.Offset(address),
|
|
||||||
.size = num_bytes,
|
|
||||||
};
|
|
||||||
const vk::BufferMemoryBarrier2 post_barrier = {
|
|
||||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
|
||||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
|
||||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
|
||||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
|
||||||
.buffer = buffer.Handle(),
|
|
||||||
.offset = buffer.Offset(address),
|
|
||||||
.size = num_bytes,
|
|
||||||
};
|
|
||||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
|
||||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
|
||||||
.bufferMemoryBarrierCount = 1,
|
|
||||||
.pBufferMemoryBarriers = &pre_barrier,
|
|
||||||
});
|
|
||||||
// vkCmdUpdateBuffer can only copy up to 65536 bytes at a time.
|
|
||||||
static constexpr u32 UpdateBufferMaxSize = 65536;
|
|
||||||
const auto dst_offset = buffer.Offset(address);
|
|
||||||
for (u32 offset = 0; offset < num_bytes; offset += UpdateBufferMaxSize) {
|
|
||||||
const auto* update_src = static_cast<const u8*>(value) + offset;
|
|
||||||
const auto update_dst = dst_offset + offset;
|
|
||||||
const auto update_size = std::min(num_bytes - offset, UpdateBufferMaxSize);
|
|
||||||
cmdbuf.updateBuffer(buffer.Handle(), update_dst, update_size, update_src);
|
|
||||||
}
|
|
||||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
|
||||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
|
||||||
.bufferMemoryBarrierCount = 1,
|
|
||||||
.pBufferMemoryBarriers = &post_barrier,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
void BufferCache::WriteDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes) {
|
void BufferCache::WriteDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes) {
|
||||||
vk::BufferCopy copy = {
|
vk::BufferCopy copy = {
|
||||||
.srcOffset = 0,
|
.srcOffset = 0,
|
||||||
|
|||||||
@@ -118,7 +118,7 @@ public:
|
|||||||
void BindIndexBuffer(u32 index_offset);
|
void BindIndexBuffer(u32 index_offset);
|
||||||
|
|
||||||
/// Writes a value to GPU buffer. (uses command buffer to temporarily store the data)
|
/// Writes a value to GPU buffer. (uses command buffer to temporarily store the data)
|
||||||
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
void FillBuffer(VAddr address, u32 num_bytes, u32 value, bool is_gds);
|
||||||
|
|
||||||
/// Performs buffer to buffer data copy on the GPU.
|
/// Performs buffer to buffer data copy on the GPU.
|
||||||
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
|
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
|
||||||
@@ -193,8 +193,6 @@ private:
|
|||||||
|
|
||||||
bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size);
|
bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size);
|
||||||
|
|
||||||
void InlineDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes);
|
|
||||||
|
|
||||||
void WriteDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes);
|
void WriteDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes);
|
||||||
|
|
||||||
void TouchBuffer(const Buffer& buffer);
|
void TouchBuffer(const Buffer& buffer);
|
||||||
|
|||||||
@@ -976,8 +976,8 @@ void Rasterizer::DepthStencilCopy(bool is_depth, bool is_stencil) {
|
|||||||
ScopeMarkerEnd();
|
ScopeMarkerEnd();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
|
void Rasterizer::FillBuffer(VAddr address, u32 num_bytes, u32 value, bool is_gds) {
|
||||||
buffer_cache.InlineData(address, value, num_bytes, is_gds);
|
buffer_cache.FillBuffer(address, num_bytes, value, is_gds);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Rasterizer::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) {
|
void Rasterizer::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) {
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ public:
|
|||||||
void ScopedMarkerInsertColor(const std::string_view& str, const u32 color,
|
void ScopedMarkerInsertColor(const std::string_view& str, const u32 color,
|
||||||
bool from_guest = false);
|
bool from_guest = false);
|
||||||
|
|
||||||
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
void FillBuffer(VAddr address, u32 num_bytes, u32 value, bool is_gds);
|
||||||
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
|
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
|
||||||
u32 ReadDataFromGds(u32 gsd_offset);
|
u32 ReadDataFromGds(u32 gsd_offset);
|
||||||
bool InvalidateMemory(VAddr addr, u64 size);
|
bool InvalidateMemory(VAddr addr, u64 size);
|
||||||
|
|||||||
Reference in New Issue
Block a user