mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-09 21:31:04 +00:00
video_core: Better handling of image copies with DmaData (#3672)
This commit is contained in:
@@ -124,6 +124,42 @@ Buffer::Buffer(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
|||||||
is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Buffer::Fill(u64 offset, u32 num_bytes, u32 value) {
|
||||||
|
scheduler->EndRendering();
|
||||||
|
ASSERT_MSG(offset % 4 == 0 && num_bytes % 4 == 0,
|
||||||
|
"FillBuffer size must be a multiple of 4 bytes");
|
||||||
|
const auto cmdbuf = scheduler->CommandBuffer();
|
||||||
|
const vk::BufferMemoryBarrier2 pre_barrier = {
|
||||||
|
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
|
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||||
|
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||||
|
.buffer = buffer,
|
||||||
|
.offset = offset,
|
||||||
|
.size = num_bytes,
|
||||||
|
};
|
||||||
|
const vk::BufferMemoryBarrier2 post_barrier = {
|
||||||
|
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||||
|
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||||
|
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
||||||
|
.buffer = buffer,
|
||||||
|
.offset = offset,
|
||||||
|
.size = num_bytes,
|
||||||
|
};
|
||||||
|
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||||
|
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||||
|
.bufferMemoryBarrierCount = 1,
|
||||||
|
.pBufferMemoryBarriers = &pre_barrier,
|
||||||
|
});
|
||||||
|
cmdbuf.fillBuffer(buffer, offset, num_bytes, value);
|
||||||
|
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||||
|
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||||
|
.bufferMemoryBarrierCount = 1,
|
||||||
|
.pBufferMemoryBarriers = &post_barrier,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
|
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
|
||||||
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
|
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
|
||||||
|
|
||||||
|
|||||||
@@ -83,29 +83,24 @@ public:
|
|||||||
Buffer& operator=(Buffer&&) = default;
|
Buffer& operator=(Buffer&&) = default;
|
||||||
Buffer(Buffer&&) = default;
|
Buffer(Buffer&&) = default;
|
||||||
|
|
||||||
/// Increases the likeliness of this being a stream buffer
|
|
||||||
void IncreaseStreamScore(int score) noexcept {
|
void IncreaseStreamScore(int score) noexcept {
|
||||||
stream_score += score;
|
stream_score += score;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the likeliness of this being a stream buffer
|
|
||||||
[[nodiscard]] int StreamScore() const noexcept {
|
[[nodiscard]] int StreamScore() const noexcept {
|
||||||
return stream_score;
|
return stream_score;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true when vaddr -> vaddr+size is fully contained in the buffer
|
|
||||||
[[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept {
|
[[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept {
|
||||||
return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes();
|
return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the base CPU address of the buffer
|
|
||||||
[[nodiscard]] VAddr CpuAddr() const noexcept {
|
[[nodiscard]] VAddr CpuAddr() const noexcept {
|
||||||
return cpu_addr;
|
return cpu_addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the offset relative to the given CPU address
|
[[nodiscard]] u64 Offset(VAddr other_cpu_addr) const noexcept {
|
||||||
[[nodiscard]] u32 Offset(VAddr other_cpu_addr) const noexcept {
|
return other_cpu_addr - cpu_addr;
|
||||||
return static_cast<u32>(other_cpu_addr - cpu_addr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t SizeBytes() const {
|
size_t SizeBytes() const {
|
||||||
@@ -129,16 +124,16 @@ public:
|
|||||||
return buffer.bda_addr;
|
return buffer.bda_addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<vk::BufferMemoryBarrier2> GetBarrier(
|
std::optional<vk::BufferMemoryBarrier2> GetBarrier(vk::AccessFlags2 dst_acess_mask,
|
||||||
vk::Flags<vk::AccessFlagBits2> dst_acess_mask, vk::PipelineStageFlagBits2 dst_stage,
|
vk::PipelineStageFlagBits2 dst_stage,
|
||||||
u32 offset = 0) {
|
u32 offset = 0) {
|
||||||
if (dst_acess_mask == access_mask && stage == dst_stage) {
|
if (dst_acess_mask == access_mask && stage == dst_stage) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_ASSERT(offset < size_bytes);
|
DEBUG_ASSERT(offset < size_bytes);
|
||||||
|
|
||||||
auto barrier = vk::BufferMemoryBarrier2{
|
const auto barrier = vk::BufferMemoryBarrier2{
|
||||||
.srcStageMask = stage,
|
.srcStageMask = stage,
|
||||||
.srcAccessMask = access_mask,
|
.srcAccessMask = access_mask,
|
||||||
.dstStageMask = dst_stage,
|
.dstStageMask = dst_stage,
|
||||||
@@ -152,6 +147,8 @@ public:
|
|||||||
return barrier;
|
return barrier;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Fill(u64 offset, u32 num_bytes, u32 value);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
VAddr cpu_addr = 0;
|
VAddr cpu_addr = 0;
|
||||||
bool is_picked{};
|
bool is_picked{};
|
||||||
|
|||||||
@@ -364,53 +364,28 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
|
|||||||
InlineDataBuffer(*buffer, address, value, num_bytes);
|
InlineDataBuffer(*buffer, address, value, num_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferCache::WriteData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
|
|
||||||
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
|
|
||||||
if (!is_gds && !IsRegionRegistered(address, num_bytes)) {
|
|
||||||
memcpy(std::bit_cast<void*>(address), value, num_bytes);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
Buffer* buffer = [&] {
|
|
||||||
if (is_gds) {
|
|
||||||
return &gds_buffer;
|
|
||||||
}
|
|
||||||
const BufferId buffer_id = FindBuffer(address, num_bytes);
|
|
||||||
return &slot_buffers[buffer_id];
|
|
||||||
}();
|
|
||||||
WriteDataBuffer(*buffer, address, value, num_bytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) {
|
void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) {
|
||||||
if (!dst_gds && !IsRegionGpuModified(dst, num_bytes)) {
|
texture_cache.InvalidateMemoryFromGPU(dst, num_bytes);
|
||||||
if (!src_gds && !IsRegionGpuModified(src, num_bytes)) {
|
|
||||||
// Both buffers were not transferred to GPU yet. Can safely copy in host memory.
|
|
||||||
memcpy(std::bit_cast<void*>(dst), std::bit_cast<void*>(src), num_bytes);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// Without a readback there's nothing we can do with this
|
|
||||||
// Fallback to creating dst buffer on GPU to at least have this data there
|
|
||||||
}
|
|
||||||
auto& src_buffer = [&] -> const Buffer& {
|
auto& src_buffer = [&] -> const Buffer& {
|
||||||
if (src_gds) {
|
if (src_gds) {
|
||||||
return gds_buffer;
|
return gds_buffer;
|
||||||
}
|
}
|
||||||
// Avoid using ObtainBuffer here as that might give us the stream buffer.
|
const auto buffer_id = FindBuffer(src, num_bytes);
|
||||||
const BufferId buffer_id = FindBuffer(src, num_bytes);
|
|
||||||
auto& buffer = slot_buffers[buffer_id];
|
auto& buffer = slot_buffers[buffer_id];
|
||||||
if (SynchronizeBuffer(buffer, src, num_bytes, false, true)) {
|
SynchronizeBuffer(buffer, src, num_bytes, false, true);
|
||||||
texture_cache.InvalidateMemoryFromGPU(dst, num_bytes);
|
|
||||||
}
|
|
||||||
return buffer;
|
return buffer;
|
||||||
}();
|
}();
|
||||||
auto& dst_buffer = [&] -> const Buffer& {
|
auto& dst_buffer = [&] -> const Buffer& {
|
||||||
if (dst_gds) {
|
if (dst_gds) {
|
||||||
return gds_buffer;
|
return gds_buffer;
|
||||||
}
|
}
|
||||||
// Prefer using ObtainBuffer here as that will auto-mark the region as GPU modified.
|
const auto buffer_id = FindBuffer(dst, num_bytes);
|
||||||
const auto [buffer, offset] = ObtainBuffer(dst, num_bytes, true);
|
auto& buffer = slot_buffers[buffer_id];
|
||||||
return *buffer;
|
SynchronizeBuffer(buffer, dst, num_bytes, true, true);
|
||||||
|
gpu_modified_ranges.Add(dst, num_bytes);
|
||||||
|
return buffer;
|
||||||
}();
|
}();
|
||||||
vk::BufferCopy region{
|
const vk::BufferCopy region = {
|
||||||
.srcOffset = src_buffer.Offset(src),
|
.srcOffset = src_buffer.Offset(src),
|
||||||
.dstOffset = dst_buffer.Offset(dst),
|
.dstOffset = dst_buffer.Offset(dst),
|
||||||
.size = num_bytes,
|
.size = num_bytes,
|
||||||
@@ -680,8 +655,6 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
|
|||||||
auto& new_buffer = slot_buffers[new_buffer_id];
|
auto& new_buffer = slot_buffers[new_buffer_id];
|
||||||
const size_t size_bytes = new_buffer.SizeBytes();
|
const size_t size_bytes = new_buffer.SizeBytes();
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
scheduler.EndRendering();
|
|
||||||
cmdbuf.fillBuffer(new_buffer.buffer, 0, size_bytes, 0);
|
|
||||||
for (const BufferId overlap_id : overlap.ids) {
|
for (const BufferId overlap_id : overlap.ids) {
|
||||||
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
|
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
|
||||||
}
|
}
|
||||||
@@ -851,8 +824,8 @@ void BufferCache::ChangeRegister(BufferId buffer_id) {
|
|||||||
} else {
|
} else {
|
||||||
total_used_memory -= Common::AlignUp(size, CACHING_PAGESIZE);
|
total_used_memory -= Common::AlignUp(size, CACHING_PAGESIZE);
|
||||||
lru_cache.Free(buffer.LRUId());
|
lru_cache.Free(buffer.LRUId());
|
||||||
FillBuffer(bda_pagetable_buffer, page_begin * sizeof(vk::DeviceAddress),
|
const u64 offset = bda_pagetable_buffer.Offset(page_begin * sizeof(vk::DeviceAddress));
|
||||||
size_pages * sizeof(vk::DeviceAddress), 0);
|
bda_pagetable_buffer.Fill(offset, size_pages * sizeof(vk::DeviceAddress), 0);
|
||||||
buffer_ranges.Subtract(buffer.CpuAddr(), buffer.SizeBytes());
|
buffer_ranges.Subtract(buffer.CpuAddr(), buffer.SizeBytes());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1004,10 +977,6 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void BufferCache::MemoryBarrier() {
|
void BufferCache::MemoryBarrier() {
|
||||||
// Vulkan doesn't know which buffer we access in a shader if we use
|
|
||||||
// BufferDeviceAddress. We need a full memory barrier.
|
|
||||||
// For now, we only read memory using BDA. If we want to write to it,
|
|
||||||
// we might need to change this.
|
|
||||||
scheduler.EndRendering();
|
scheduler.EndRendering();
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
vk::MemoryBarrier2 barrier = {
|
vk::MemoryBarrier2 barrier = {
|
||||||
@@ -1121,41 +1090,6 @@ void BufferCache::WriteDataBuffer(Buffer& buffer, VAddr address, const void* val
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferCache::FillBuffer(Buffer& buffer, VAddr address, u32 num_bytes, u32 value) {
|
|
||||||
scheduler.EndRendering();
|
|
||||||
ASSERT_MSG(num_bytes % 4 == 0, "FillBuffer size must be a multiple of 4 bytes");
|
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
|
||||||
const vk::BufferMemoryBarrier2 pre_barrier = {
|
|
||||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
|
||||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
|
||||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
|
||||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
|
||||||
.buffer = buffer.Handle(),
|
|
||||||
.offset = buffer.Offset(address),
|
|
||||||
.size = num_bytes,
|
|
||||||
};
|
|
||||||
const vk::BufferMemoryBarrier2 post_barrier = {
|
|
||||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
|
||||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
|
||||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
|
||||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
|
||||||
.buffer = buffer.Handle(),
|
|
||||||
.offset = buffer.Offset(address),
|
|
||||||
.size = num_bytes,
|
|
||||||
};
|
|
||||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
|
||||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
|
||||||
.bufferMemoryBarrierCount = 1,
|
|
||||||
.pBufferMemoryBarriers = &pre_barrier,
|
|
||||||
});
|
|
||||||
cmdbuf.fillBuffer(buffer.Handle(), buffer.Offset(address), num_bytes, value);
|
|
||||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
|
||||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
|
||||||
.bufferMemoryBarrierCount = 1,
|
|
||||||
.pBufferMemoryBarriers = &post_barrier,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
void BufferCache::RunGarbageCollector() {
|
void BufferCache::RunGarbageCollector() {
|
||||||
SCOPE_EXIT {
|
SCOPE_EXIT {
|
||||||
++gc_tick;
|
++gc_tick;
|
||||||
|
|||||||
@@ -128,9 +128,6 @@ public:
|
|||||||
/// Writes a value to GPU buffer. (uses command buffer to temporarily store the data)
|
/// Writes a value to GPU buffer. (uses command buffer to temporarily store the data)
|
||||||
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
||||||
|
|
||||||
/// Writes a value to GPU buffer. (uses staging buffer to temporarily store the data)
|
|
||||||
void WriteData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
|
||||||
|
|
||||||
/// Performs buffer to buffer data copy on the GPU.
|
/// Performs buffer to buffer data copy on the GPU.
|
||||||
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
|
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
|
||||||
|
|
||||||
@@ -211,8 +208,6 @@ private:
|
|||||||
|
|
||||||
void WriteDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes);
|
void WriteDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes);
|
||||||
|
|
||||||
void FillBuffer(Buffer& buffer, VAddr address, u32 num_bytes, u32 value);
|
|
||||||
|
|
||||||
void TouchBuffer(const Buffer& buffer);
|
void TouchBuffer(const Buffer& buffer);
|
||||||
|
|
||||||
void DeleteBuffer(BufferId buffer_id);
|
void DeleteBuffer(BufferId buffer_id);
|
||||||
|
|||||||
Reference in New Issue
Block a user