mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-08 20:58:41 +00:00
video_core: Better handling of image copies with DmaData (#3672)
This commit is contained in:
@@ -124,6 +124,42 @@ Buffer::Buffer(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||
is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
}
|
||||
|
||||
void Buffer::Fill(u64 offset, u32 num_bytes, u32 value) {
|
||||
scheduler->EndRendering();
|
||||
ASSERT_MSG(offset % 4 == 0 && num_bytes % 4 == 0,
|
||||
"FillBuffer size must be a multiple of 4 bytes");
|
||||
const auto cmdbuf = scheduler->CommandBuffer();
|
||||
const vk::BufferMemoryBarrier2 pre_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.buffer = buffer,
|
||||
.offset = offset,
|
||||
.size = num_bytes,
|
||||
};
|
||||
const vk::BufferMemoryBarrier2 post_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
||||
.buffer = buffer,
|
||||
.offset = offset,
|
||||
.size = num_bytes,
|
||||
};
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &pre_barrier,
|
||||
});
|
||||
cmdbuf.fillBuffer(buffer, offset, num_bytes, value);
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &post_barrier,
|
||||
});
|
||||
}
|
||||
|
||||
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
|
||||
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
|
||||
|
||||
|
||||
@@ -83,29 +83,24 @@ public:
|
||||
Buffer& operator=(Buffer&&) = default;
|
||||
Buffer(Buffer&&) = default;
|
||||
|
||||
/// Increases the likeliness of this being a stream buffer
|
||||
void IncreaseStreamScore(int score) noexcept {
|
||||
stream_score += score;
|
||||
}
|
||||
|
||||
/// Returns the likeliness of this being a stream buffer
|
||||
[[nodiscard]] int StreamScore() const noexcept {
|
||||
return stream_score;
|
||||
}
|
||||
|
||||
/// Returns true when vaddr -> vaddr+size is fully contained in the buffer
|
||||
[[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept {
|
||||
return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes();
|
||||
}
|
||||
|
||||
/// Returns the base CPU address of the buffer
|
||||
[[nodiscard]] VAddr CpuAddr() const noexcept {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
/// Returns the offset relative to the given CPU address
|
||||
[[nodiscard]] u32 Offset(VAddr other_cpu_addr) const noexcept {
|
||||
return static_cast<u32>(other_cpu_addr - cpu_addr);
|
||||
[[nodiscard]] u64 Offset(VAddr other_cpu_addr) const noexcept {
|
||||
return other_cpu_addr - cpu_addr;
|
||||
}
|
||||
|
||||
size_t SizeBytes() const {
|
||||
@@ -129,16 +124,16 @@ public:
|
||||
return buffer.bda_addr;
|
||||
}
|
||||
|
||||
std::optional<vk::BufferMemoryBarrier2> GetBarrier(
|
||||
vk::Flags<vk::AccessFlagBits2> dst_acess_mask, vk::PipelineStageFlagBits2 dst_stage,
|
||||
u32 offset = 0) {
|
||||
std::optional<vk::BufferMemoryBarrier2> GetBarrier(vk::AccessFlags2 dst_acess_mask,
|
||||
vk::PipelineStageFlagBits2 dst_stage,
|
||||
u32 offset = 0) {
|
||||
if (dst_acess_mask == access_mask && stage == dst_stage) {
|
||||
return {};
|
||||
}
|
||||
|
||||
DEBUG_ASSERT(offset < size_bytes);
|
||||
|
||||
auto barrier = vk::BufferMemoryBarrier2{
|
||||
const auto barrier = vk::BufferMemoryBarrier2{
|
||||
.srcStageMask = stage,
|
||||
.srcAccessMask = access_mask,
|
||||
.dstStageMask = dst_stage,
|
||||
@@ -152,6 +147,8 @@ public:
|
||||
return barrier;
|
||||
}
|
||||
|
||||
void Fill(u64 offset, u32 num_bytes, u32 value);
|
||||
|
||||
public:
|
||||
VAddr cpu_addr = 0;
|
||||
bool is_picked{};
|
||||
|
||||
@@ -364,53 +364,28 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
|
||||
InlineDataBuffer(*buffer, address, value, num_bytes);
|
||||
}
|
||||
|
||||
void BufferCache::WriteData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
|
||||
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
|
||||
if (!is_gds && !IsRegionRegistered(address, num_bytes)) {
|
||||
memcpy(std::bit_cast<void*>(address), value, num_bytes);
|
||||
return;
|
||||
}
|
||||
Buffer* buffer = [&] {
|
||||
if (is_gds) {
|
||||
return &gds_buffer;
|
||||
}
|
||||
const BufferId buffer_id = FindBuffer(address, num_bytes);
|
||||
return &slot_buffers[buffer_id];
|
||||
}();
|
||||
WriteDataBuffer(*buffer, address, value, num_bytes);
|
||||
}
|
||||
|
||||
void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) {
|
||||
if (!dst_gds && !IsRegionGpuModified(dst, num_bytes)) {
|
||||
if (!src_gds && !IsRegionGpuModified(src, num_bytes)) {
|
||||
// Both buffers were not transferred to GPU yet. Can safely copy in host memory.
|
||||
memcpy(std::bit_cast<void*>(dst), std::bit_cast<void*>(src), num_bytes);
|
||||
return;
|
||||
}
|
||||
// Without a readback there's nothing we can do with this
|
||||
// Fallback to creating dst buffer on GPU to at least have this data there
|
||||
}
|
||||
texture_cache.InvalidateMemoryFromGPU(dst, num_bytes);
|
||||
auto& src_buffer = [&] -> const Buffer& {
|
||||
if (src_gds) {
|
||||
return gds_buffer;
|
||||
}
|
||||
// Avoid using ObtainBuffer here as that might give us the stream buffer.
|
||||
const BufferId buffer_id = FindBuffer(src, num_bytes);
|
||||
const auto buffer_id = FindBuffer(src, num_bytes);
|
||||
auto& buffer = slot_buffers[buffer_id];
|
||||
if (SynchronizeBuffer(buffer, src, num_bytes, false, true)) {
|
||||
texture_cache.InvalidateMemoryFromGPU(dst, num_bytes);
|
||||
}
|
||||
SynchronizeBuffer(buffer, src, num_bytes, false, true);
|
||||
return buffer;
|
||||
}();
|
||||
auto& dst_buffer = [&] -> const Buffer& {
|
||||
if (dst_gds) {
|
||||
return gds_buffer;
|
||||
}
|
||||
// Prefer using ObtainBuffer here as that will auto-mark the region as GPU modified.
|
||||
const auto [buffer, offset] = ObtainBuffer(dst, num_bytes, true);
|
||||
return *buffer;
|
||||
const auto buffer_id = FindBuffer(dst, num_bytes);
|
||||
auto& buffer = slot_buffers[buffer_id];
|
||||
SynchronizeBuffer(buffer, dst, num_bytes, true, true);
|
||||
gpu_modified_ranges.Add(dst, num_bytes);
|
||||
return buffer;
|
||||
}();
|
||||
vk::BufferCopy region{
|
||||
const vk::BufferCopy region = {
|
||||
.srcOffset = src_buffer.Offset(src),
|
||||
.dstOffset = dst_buffer.Offset(dst),
|
||||
.size = num_bytes,
|
||||
@@ -680,8 +655,6 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
|
||||
auto& new_buffer = slot_buffers[new_buffer_id];
|
||||
const size_t size_bytes = new_buffer.SizeBytes();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
scheduler.EndRendering();
|
||||
cmdbuf.fillBuffer(new_buffer.buffer, 0, size_bytes, 0);
|
||||
for (const BufferId overlap_id : overlap.ids) {
|
||||
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
|
||||
}
|
||||
@@ -851,8 +824,8 @@ void BufferCache::ChangeRegister(BufferId buffer_id) {
|
||||
} else {
|
||||
total_used_memory -= Common::AlignUp(size, CACHING_PAGESIZE);
|
||||
lru_cache.Free(buffer.LRUId());
|
||||
FillBuffer(bda_pagetable_buffer, page_begin * sizeof(vk::DeviceAddress),
|
||||
size_pages * sizeof(vk::DeviceAddress), 0);
|
||||
const u64 offset = bda_pagetable_buffer.Offset(page_begin * sizeof(vk::DeviceAddress));
|
||||
bda_pagetable_buffer.Fill(offset, size_pages * sizeof(vk::DeviceAddress), 0);
|
||||
buffer_ranges.Subtract(buffer.CpuAddr(), buffer.SizeBytes());
|
||||
}
|
||||
}
|
||||
@@ -1004,10 +977,6 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
|
||||
}
|
||||
|
||||
void BufferCache::MemoryBarrier() {
|
||||
// Vulkan doesn't know which buffer we access in a shader if we use
|
||||
// BufferDeviceAddress. We need a full memory barrier.
|
||||
// For now, we only read memory using BDA. If we want to write to it,
|
||||
// we might need to change this.
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
vk::MemoryBarrier2 barrier = {
|
||||
@@ -1121,41 +1090,6 @@ void BufferCache::WriteDataBuffer(Buffer& buffer, VAddr address, const void* val
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCache::FillBuffer(Buffer& buffer, VAddr address, u32 num_bytes, u32 value) {
|
||||
scheduler.EndRendering();
|
||||
ASSERT_MSG(num_bytes % 4 == 0, "FillBuffer size must be a multiple of 4 bytes");
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
const vk::BufferMemoryBarrier2 pre_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.buffer = buffer.Handle(),
|
||||
.offset = buffer.Offset(address),
|
||||
.size = num_bytes,
|
||||
};
|
||||
const vk::BufferMemoryBarrier2 post_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
||||
.buffer = buffer.Handle(),
|
||||
.offset = buffer.Offset(address),
|
||||
.size = num_bytes,
|
||||
};
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &pre_barrier,
|
||||
});
|
||||
cmdbuf.fillBuffer(buffer.Handle(), buffer.Offset(address), num_bytes, value);
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &post_barrier,
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCache::RunGarbageCollector() {
|
||||
SCOPE_EXIT {
|
||||
++gc_tick;
|
||||
|
||||
@@ -128,9 +128,6 @@ public:
|
||||
/// Writes a value to GPU buffer. (uses command buffer to temporarily store the data)
|
||||
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
||||
|
||||
/// Writes a value to GPU buffer. (uses staging buffer to temporarily store the data)
|
||||
void WriteData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
||||
|
||||
/// Performs buffer to buffer data copy on the GPU.
|
||||
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
|
||||
|
||||
@@ -211,8 +208,6 @@ private:
|
||||
|
||||
void WriteDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes);
|
||||
|
||||
void FillBuffer(Buffer& buffer, VAddr address, u32 num_bytes, u32 value);
|
||||
|
||||
void TouchBuffer(const Buffer& buffer);
|
||||
|
||||
void DeleteBuffer(BufferId buffer_id);
|
||||
|
||||
Reference in New Issue
Block a user