mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-23 18:45:36 +00:00
video_core: Improve some data validity
This commit is contained in:
parent
929853321b
commit
2ca34c4d94
@ -15,6 +15,7 @@
|
|||||||
#include "video_core/amdgpu/pm4_cmds.h"
|
#include "video_core/amdgpu/pm4_cmds.h"
|
||||||
#include "video_core/renderdoc.h"
|
#include "video_core/renderdoc.h"
|
||||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
|
|
||||||
namespace AmdGpu {
|
namespace AmdGpu {
|
||||||
|
|
||||||
@ -619,6 +620,10 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||||||
}
|
}
|
||||||
case PM4ItOpcode::EventWriteEop: {
|
case PM4ItOpcode::EventWriteEop: {
|
||||||
const auto* event_eop = reinterpret_cast<const PM4CmdEventWriteEop*>(header);
|
const auto* event_eop = reinterpret_cast<const PM4CmdEventWriteEop*>(header);
|
||||||
|
if (rasterizer) {
|
||||||
|
rasterizer->CommitAsyncFlushes();
|
||||||
|
}
|
||||||
|
++fence_tick;
|
||||||
event_eop->SignalFence([](void* address, u64 data, u32 num_bytes) {
|
event_eop->SignalFence([](void* address, u64 data, u32 num_bytes) {
|
||||||
auto* memory = Core::Memory::Instance();
|
auto* memory = Core::Memory::Instance();
|
||||||
if (!memory->TryWriteBacking(address, &data, num_bytes)) {
|
if (!memory->TryWriteBacking(address, &data, num_bytes)) {
|
||||||
@ -1016,6 +1021,10 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq
|
|||||||
}
|
}
|
||||||
case PM4ItOpcode::ReleaseMem: {
|
case PM4ItOpcode::ReleaseMem: {
|
||||||
const auto* release_mem = reinterpret_cast<const PM4CmdReleaseMem*>(header);
|
const auto* release_mem = reinterpret_cast<const PM4CmdReleaseMem*>(header);
|
||||||
|
++fence_tick;
|
||||||
|
if (rasterizer) {
|
||||||
|
rasterizer->CommitAsyncFlushes();
|
||||||
|
}
|
||||||
release_mem->SignalFence(static_cast<Platform::InterruptId>(queue.pipe_id));
|
release_mem->SignalFence(static_cast<Platform::InterruptId>(queue.pipe_id));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1532,6 +1532,10 @@ public:
|
|||||||
return mapped_queues[curr_qid].cs_state;
|
return mapped_queues[curr_qid].cs_state;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline u64 GetFenceTick() const {
|
||||||
|
return fence_tick;
|
||||||
|
}
|
||||||
|
|
||||||
struct AscQueueInfo {
|
struct AscQueueInfo {
|
||||||
static constexpr size_t Pm4BufferSize = 1024;
|
static constexpr size_t Pm4BufferSize = 1024;
|
||||||
VAddr map_addr;
|
VAddr map_addr;
|
||||||
@ -1627,6 +1631,7 @@ private:
|
|||||||
std::condition_variable_any submit_cv;
|
std::condition_variable_any submit_cv;
|
||||||
std::queue<Common::UniqueFunction<void>> command_queue{};
|
std::queue<Common::UniqueFunction<void>> command_queue{};
|
||||||
int curr_qid{-1};
|
int curr_qid{-1};
|
||||||
|
u64 fence_tick{0};
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
|
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
#pragma clang optimize off
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <new>
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
#include "common/debug.h"
|
#include "common/debug.h"
|
||||||
#include "common/div_ceil.h"
|
#include "common/div_ceil.h"
|
||||||
@ -26,6 +27,7 @@ static constexpr size_t UboStreamBufferSize = 128_MB;
|
|||||||
static constexpr size_t DownloadBufferSize = 128_MB;
|
static constexpr size_t DownloadBufferSize = 128_MB;
|
||||||
static constexpr size_t DeviceBufferSize = 128_MB;
|
static constexpr size_t DeviceBufferSize = 128_MB;
|
||||||
static constexpr size_t MaxPageFaults = 1024;
|
static constexpr size_t MaxPageFaults = 1024;
|
||||||
|
static constexpr size_t DownloadSizeThreshold = 2_MB;
|
||||||
|
|
||||||
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||||
AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_, PageManager& tracker_)
|
AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_, PageManager& tracker_)
|
||||||
@ -142,7 +144,7 @@ void BufferCache::ReadMemory(VAddr device_addr, u64 size) {
|
|||||||
DownloadBufferMemory(buffer, device_addr, size);
|
DownloadBufferMemory(buffer, device_addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size) {
|
void BufferCache::DownloadBufferMemory(const Buffer& buffer, VAddr device_addr, u64 size) {
|
||||||
boost::container::small_vector<vk::BufferCopy, 1> copies;
|
boost::container::small_vector<vk::BufferCopy, 1> copies;
|
||||||
u64 total_size_bytes = 0;
|
u64 total_size_bytes = 0;
|
||||||
memory_tracker.ForEachDownloadRange<true>(
|
memory_tracker.ForEachDownloadRange<true>(
|
||||||
@ -185,6 +187,91 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool BufferCache::CommitAsyncFlushes() {
|
||||||
|
if (pending_download_ranges.Empty()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
using BufferCopies = boost::container::small_vector<vk::BufferCopy, 8>;
|
||||||
|
boost::container::small_vector<BufferCopies, 8> copies;
|
||||||
|
boost::container::small_vector<BufferId, 8> buffer_ids;
|
||||||
|
u64 total_size_bytes = 0;
|
||||||
|
pending_download_ranges.ForEach([&](VAddr interval_lower, VAddr interval_upper) {
|
||||||
|
const std::size_t size = interval_upper - interval_lower;
|
||||||
|
const VAddr device_addr = interval_lower;
|
||||||
|
ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
|
||||||
|
const VAddr buffer_start = buffer.CpuAddr();
|
||||||
|
const VAddr buffer_end = buffer_start + buffer.SizeBytes();
|
||||||
|
const VAddr new_start = std::max(buffer_start, device_addr);
|
||||||
|
const VAddr new_end = std::min(buffer_end, device_addr + size);
|
||||||
|
auto& buffer_copies = copies.emplace_back();
|
||||||
|
buffer_ids.emplace_back(buffer_id);
|
||||||
|
memory_tracker.ForEachDownloadRange<false>(new_start, new_end - new_start,
|
||||||
|
[&](u64 device_addr_out, u64 range_size) {
|
||||||
|
const VAddr buffer_addr = buffer.CpuAddr();
|
||||||
|
const auto add_download = [&](VAddr start, VAddr end) {
|
||||||
|
const u64 new_offset = start - buffer_addr;
|
||||||
|
const u64 new_size = end - start;
|
||||||
|
buffer_copies.emplace_back(new_offset, total_size_bytes, new_size);
|
||||||
|
// Align up to avoid cache conflicts
|
||||||
|
constexpr u64 align = std::hardware_destructive_interference_size;
|
||||||
|
constexpr u64 mask = ~(align - 1ULL);
|
||||||
|
total_size_bytes += (new_size + align - 1) & mask;
|
||||||
|
};
|
||||||
|
gpu_modified_ranges.ForEachInRange(device_addr_out, range_size,
|
||||||
|
add_download);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
pending_download_ranges.Clear();
|
||||||
|
if (copies.empty()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const auto [download, offset] = download_buffer.Map(total_size_bytes);
|
||||||
|
download_buffer.Commit();
|
||||||
|
scheduler.EndRendering();
|
||||||
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
|
static constexpr vk::MemoryBarrier2 read_barrier = {
|
||||||
|
.srcAccessMask = vk::AccessFlagBits2::eMemoryWrite,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits2::eTransferRead,
|
||||||
|
};
|
||||||
|
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||||
|
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||||
|
.memoryBarrierCount = 1u,
|
||||||
|
.pMemoryBarriers = &read_barrier,
|
||||||
|
});
|
||||||
|
for (s32 i = 0; i < buffer_ids.size(); ++i) {
|
||||||
|
auto& buffer_copies = copies[i];
|
||||||
|
if (buffer_copies.empty()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (auto& copy : buffer_copies) {
|
||||||
|
copy.dstOffset += offset;
|
||||||
|
}
|
||||||
|
const BufferId buffer_id = buffer_ids[i];
|
||||||
|
Buffer& buffer = slot_buffers[buffer_id];
|
||||||
|
cmdbuf.copyBuffer(buffer.Handle(), download_buffer.Handle(), buffer_copies);
|
||||||
|
}
|
||||||
|
scheduler.DeferOperation([this, download, offset, buffer_ids, copies]() {
|
||||||
|
auto* memory = Core::Memory::Instance();
|
||||||
|
for (s32 i = 0; i < buffer_ids.size(); ++i) {
|
||||||
|
auto& buffer_copies = copies[i];
|
||||||
|
if (buffer_copies.empty()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const BufferId buffer_id = buffer_ids[i];
|
||||||
|
Buffer& buffer = slot_buffers[buffer_id];
|
||||||
|
for (auto& copy : buffer_copies) {
|
||||||
|
const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset;
|
||||||
|
const u64 dst_offset = copy.dstOffset - offset;
|
||||||
|
ASSERT(memory->TryWriteBacking(std::bit_cast<u8*>(copy_device_addr),
|
||||||
|
download + dst_offset, copy.size));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
scheduler.Flush();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) {
|
void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) {
|
||||||
Vulkan::VertexInputs<vk::VertexInputAttributeDescription2EXT> attributes;
|
Vulkan::VertexInputs<vk::VertexInputAttributeDescription2EXT> attributes;
|
||||||
Vulkan::VertexInputs<vk::VertexInputBindingDescription2EXT> bindings;
|
Vulkan::VertexInputs<vk::VertexInputBindingDescription2EXT> bindings;
|
||||||
@ -302,9 +389,11 @@ void BufferCache::BindIndexBuffer(u32 index_offset) {
|
|||||||
void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
|
void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
|
||||||
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
|
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
|
||||||
if (!is_gds) {
|
if (!is_gds) {
|
||||||
if (!IsRegionGpuModified(address, num_bytes)) {
|
if (!memory_tracker.IsRegionGpuModified(address, num_bytes)) {
|
||||||
memcpy(std::bit_cast<void*>(address), value, num_bytes);
|
memcpy(std::bit_cast<void*>(address), value, num_bytes);
|
||||||
|
return;
|
||||||
} else {
|
} else {
|
||||||
|
// Write to backing memory to bypass memory protection.
|
||||||
ASSERT(memory->TryWriteBacking(std::bit_cast<void*>(address), value, num_bytes));
|
ASSERT(memory->TryWriteBacking(std::bit_cast<void*>(address), value, num_bytes));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -319,11 +408,14 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
|
|||||||
}
|
}
|
||||||
|
|
||||||
void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) {
|
void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) {
|
||||||
if (!dst_gds && !IsRegionGpuModified(dst, num_bytes)) {
|
if (!src_gds && !memory_tracker.IsRegionGpuModified(src, num_bytes)) {
|
||||||
if (!src_gds && !IsRegionGpuModified(src, num_bytes)) {
|
if (!dst_gds && !memory_tracker.IsRegionGpuModified(dst, num_bytes)) {
|
||||||
// Both buffers were not transferred to GPU yet. Can safely copy in host memory.
|
// Both buffers were not transferred to GPU yet. Can safely copy in host memory.
|
||||||
memcpy(std::bit_cast<void*>(dst), std::bit_cast<void*>(src), num_bytes);
|
memcpy(std::bit_cast<void*>(dst), std::bit_cast<void*>(src), num_bytes);
|
||||||
return;
|
return;
|
||||||
|
} else if (!dst_gds) {
|
||||||
|
// Write to backing memory to bypass memory protection.
|
||||||
|
ASSERT(memory->TryWriteBacking(std::bit_cast<void*>(dst), std::bit_cast<void*>(src), num_bytes));
|
||||||
}
|
}
|
||||||
// Without a readback there's nothing we can do with this
|
// Without a readback there's nothing we can do with this
|
||||||
// Fallback to creating dst buffer on GPU to at least have this data there
|
// Fallback to creating dst buffer on GPU to at least have this data there
|
||||||
@ -400,35 +492,14 @@ void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds,
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferCache::WriteData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
|
|
||||||
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
|
|
||||||
if (!is_gds && !IsRegionRegistered(address, num_bytes)) {
|
|
||||||
memcpy(std::bit_cast<void*>(address), value, num_bytes);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
Buffer* buffer = [&] {
|
|
||||||
if (is_gds) {
|
|
||||||
return &gds_buffer;
|
|
||||||
}
|
|
||||||
const BufferId buffer_id = FindBuffer(address, num_bytes);
|
|
||||||
return &slot_buffers[buffer_id];
|
|
||||||
}();
|
|
||||||
WriteDataBuffer(*buffer, address, value, num_bytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written,
|
std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written,
|
||||||
bool is_texel_buffer, BufferId buffer_id) {
|
bool is_texel_buffer, BufferId buffer_id) {
|
||||||
// For small uniform buffers that have not been modified by gpu
|
// For small read-only buffers use device local stream buffer to reduce renderpass breaks.
|
||||||
// use device local stream buffer to reduce renderpass breaks.
|
if (!is_written && size <= CACHING_PAGESIZE && !IsRegionGpuModified(device_addr, size)) {
|
||||||
// Maybe we want to modify the threshold now that the page size is 16KB?
|
|
||||||
static constexpr u64 StreamThreshold = CACHING_PAGESIZE;
|
|
||||||
const bool is_gpu_dirty = memory_tracker.IsRegionGpuModified(device_addr, size);
|
|
||||||
if (!is_written && size <= StreamThreshold && !is_gpu_dirty) {
|
|
||||||
const u64 offset = stream_buffer.Copy(device_addr, size, instance.UniformMinAlignment());
|
const u64 offset = stream_buffer.Copy(device_addr, size, instance.UniformMinAlignment());
|
||||||
return {&stream_buffer, offset};
|
return {&stream_buffer, offset};
|
||||||
}
|
}
|
||||||
|
if (IsBufferInvalid(buffer_id)) {
|
||||||
if (!buffer_id || slot_buffers[buffer_id].is_deleted) {
|
|
||||||
buffer_id = FindBuffer(device_addr, size);
|
buffer_id = FindBuffer(device_addr, size);
|
||||||
}
|
}
|
||||||
Buffer& buffer = slot_buffers[buffer_id];
|
Buffer& buffer = slot_buffers[buffer_id];
|
||||||
@ -436,17 +507,21 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
|
|||||||
if (is_written) {
|
if (is_written) {
|
||||||
memory_tracker.MarkRegionAsGpuModified(device_addr, size);
|
memory_tracker.MarkRegionAsGpuModified(device_addr, size);
|
||||||
gpu_modified_ranges.Add(device_addr, size);
|
gpu_modified_ranges.Add(device_addr, size);
|
||||||
|
// Don't attempt to download the requested buffer if
|
||||||
|
// - It's a texel buffer; Most often used for image copies
|
||||||
|
// - It's too large; Large buffers are rarely needed by CPU
|
||||||
|
if (!is_texel_buffer && size <= DownloadSizeThreshold) {
|
||||||
|
pending_download_ranges.Add(device_addr, size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return {&buffer, buffer.Offset(device_addr)};
|
return {&buffer, buffer.Offset(device_addr)};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<Buffer*, u32> BufferCache::ObtainBufferForImage(VAddr gpu_addr, u32 size) {
|
std::pair<Buffer*, u32> BufferCache::ObtainBufferForImage(VAddr gpu_addr, u32 size) {
|
||||||
// Check if any buffer contains the full requested range.
|
// Check if any buffer contains the full requested range.
|
||||||
const u64 page = gpu_addr >> CACHING_PAGEBITS;
|
const BufferId buffer_id = page_table[gpu_addr >> CACHING_PAGEBITS].buffer_id;
|
||||||
const BufferId buffer_id = page_table[page].buffer_id;
|
|
||||||
if (buffer_id) {
|
if (buffer_id) {
|
||||||
Buffer& buffer = slot_buffers[buffer_id];
|
if (Buffer& buffer = slot_buffers[buffer_id]; buffer.IsInBounds(gpu_addr, size)) {
|
||||||
if (buffer.IsInBounds(gpu_addr, size)) {
|
|
||||||
SynchronizeBuffer(buffer, gpu_addr, size, false);
|
SynchronizeBuffer(buffer, gpu_addr, size, false);
|
||||||
return {&buffer, buffer.Offset(gpu_addr)};
|
return {&buffer, buffer.Offset(gpu_addr)};
|
||||||
}
|
}
|
||||||
@ -456,7 +531,6 @@ std::pair<Buffer*, u32> BufferCache::ObtainBufferForImage(VAddr gpu_addr, u32 si
|
|||||||
if (memory_tracker.IsRegionGpuModified(gpu_addr, size)) {
|
if (memory_tracker.IsRegionGpuModified(gpu_addr, size)) {
|
||||||
return ObtainBuffer(gpu_addr, size, false, false);
|
return ObtainBuffer(gpu_addr, size, false, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// In all other cases, just do a CPU copy to the staging buffer.
|
// In all other cases, just do a CPU copy to the staging buffer.
|
||||||
const auto [data, offset] = staging_buffer.Map(size, 16);
|
const auto [data, offset] = staging_buffer.Map(size, 16);
|
||||||
memory->CopySparseMemory(gpu_addr, data, size);
|
memory->CopySparseMemory(gpu_addr, data, size);
|
||||||
@ -474,7 +548,12 @@ bool BufferCache::IsRegionCpuModified(VAddr addr, size_t size) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool BufferCache::IsRegionGpuModified(VAddr addr, size_t size) {
|
bool BufferCache::IsRegionGpuModified(VAddr addr, size_t size) {
|
||||||
return memory_tracker.IsRegionGpuModified(addr, size);
|
if (!memory_tracker.IsRegionGpuModified(addr, size)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
bool modified = false;
|
||||||
|
gpu_modified_ranges.ForEachInRange(addr, size, [&](VAddr, size_t) { modified = true; });
|
||||||
|
return modified;
|
||||||
}
|
}
|
||||||
|
|
||||||
BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) {
|
BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) {
|
||||||
@ -814,12 +893,15 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
|||||||
u64 total_size_bytes = 0;
|
u64 total_size_bytes = 0;
|
||||||
VAddr buffer_start = buffer.CpuAddr();
|
VAddr buffer_start = buffer.CpuAddr();
|
||||||
memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
|
memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
|
||||||
|
//const auto add_upload = [&](VAddr start, u64 size) {
|
||||||
copies.push_back(vk::BufferCopy{
|
copies.push_back(vk::BufferCopy{
|
||||||
.srcOffset = total_size_bytes,
|
.srcOffset = total_size_bytes,
|
||||||
.dstOffset = device_addr_out - buffer_start,
|
.dstOffset = device_addr_out - buffer_start,
|
||||||
.size = range_size,
|
.size = range_size,
|
||||||
});
|
});
|
||||||
total_size_bytes += range_size;
|
total_size_bytes += range_size;
|
||||||
|
//};
|
||||||
|
//gpu_modified_ranges.ForEachNotInRange(device_addr_out, range_size, add_upload);
|
||||||
});
|
});
|
||||||
SCOPE_EXIT {
|
SCOPE_EXIT {
|
||||||
if (is_texel_buffer) {
|
if (is_texel_buffer) {
|
||||||
|
@ -51,6 +51,7 @@ public:
|
|||||||
|
|
||||||
struct PageData {
|
struct PageData {
|
||||||
BufferId buffer_id{};
|
BufferId buffer_id{};
|
||||||
|
u64 fence_tick;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Traits {
|
struct Traits {
|
||||||
@ -125,8 +126,8 @@ public:
|
|||||||
/// Performs buffer to buffer data copy on the GPU.
|
/// Performs buffer to buffer data copy on the GPU.
|
||||||
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
|
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
|
||||||
|
|
||||||
/// Writes a value to GPU buffer. (uses staging buffer to temporarily store the data)
|
/// Schedules all GPU modified ranges since last commit to be copied back the host memory.
|
||||||
void WriteData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
bool CommitAsyncFlushes();
|
||||||
|
|
||||||
/// Obtains a buffer for the specified region.
|
/// Obtains a buffer for the specified region.
|
||||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,
|
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,
|
||||||
@ -170,7 +171,11 @@ private:
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size);
|
inline bool IsBufferInvalid(BufferId buffer_id) const {
|
||||||
|
return !buffer_id || slot_buffers[buffer_id].is_deleted;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DownloadBufferMemory(const Buffer& buffer, VAddr device_addr, u64 size);
|
||||||
|
|
||||||
[[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size);
|
[[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size);
|
||||||
|
|
||||||
@ -210,6 +215,7 @@ private:
|
|||||||
Buffer fault_buffer;
|
Buffer fault_buffer;
|
||||||
std::shared_mutex slot_buffers_mutex;
|
std::shared_mutex slot_buffers_mutex;
|
||||||
Common::SlotVector<Buffer> slot_buffers;
|
Common::SlotVector<Buffer> slot_buffers;
|
||||||
|
RangeSet pending_download_ranges;
|
||||||
RangeSet gpu_modified_ranges;
|
RangeSet gpu_modified_ranges;
|
||||||
SplitRangeMap<BufferId> buffer_ranges;
|
SplitRangeMap<BufferId> buffer_ranges;
|
||||||
MemoryTracker memory_tracker;
|
MemoryTracker memory_tracker;
|
||||||
|
@ -57,6 +57,15 @@ public:
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Unmark region as modified from the host GPU
|
||||||
|
void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
|
||||||
|
IteratePages<true>(dirty_cpu_addr, query_size,
|
||||||
|
[](RegionManager* manager, u64 offset, size_t size) {
|
||||||
|
manager->template ChangeRegionState<Type::GPU, false>(
|
||||||
|
manager->GetCpuAddr() + offset, size);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
|
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
|
||||||
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, auto&& func) {
|
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, auto&& func) {
|
||||||
IteratePages<true>(query_cpu_range, query_size,
|
IteratePages<true>(query_cpu_range, query_size,
|
||||||
|
@ -45,6 +45,10 @@ struct RangeSet {
|
|||||||
m_ranges_set.clear();
|
m_ranges_set.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Empty() const {
|
||||||
|
return m_ranges_set.empty();
|
||||||
|
}
|
||||||
|
|
||||||
bool Contains(VAddr base_address, size_t size) const {
|
bool Contains(VAddr base_address, size_t size) const {
|
||||||
const VAddr end_address = base_address + size;
|
const VAddr end_address = base_address + size;
|
||||||
IntervalType interval{base_address, end_address};
|
IntervalType interval{base_address, end_address};
|
||||||
@ -110,6 +114,7 @@ struct RangeSet {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
IntervalSet m_ranges_set;
|
IntervalSet m_ranges_set;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
#pragma clang optimize off
|
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
|
#include "common/div_ceil.h"
|
||||||
#include "common/debug.h"
|
#include "common/debug.h"
|
||||||
#include "common/signal_context.h"
|
#include "common/signal_context.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
|
@ -60,6 +60,11 @@ void Rasterizer::CpSync() {
|
|||||||
vk::DependencyFlagBits::eByRegion, ib_barrier, {}, {});
|
vk::DependencyFlagBits::eByRegion, ib_barrier, {}, {});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Rasterizer::CommitAsyncFlushes() {
|
||||||
|
scheduler.PopPendingOperations();
|
||||||
|
return buffer_cache.CommitAsyncFlushes();
|
||||||
|
}
|
||||||
|
|
||||||
bool Rasterizer::FilterDraw() {
|
bool Rasterizer::FilterDraw() {
|
||||||
const auto& regs = liverpool->regs;
|
const auto& regs = liverpool->regs;
|
||||||
// There are several cases (e.g. FCE, FMask/HTile decompression) where we don't need to do an
|
// There are several cases (e.g. FCE, FMask/HTile decompression) where we don't need to do an
|
||||||
@ -272,6 +277,8 @@ void Rasterizer::EliminateFastClear() {
|
|||||||
void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
||||||
RENDERER_TRACE;
|
RENDERER_TRACE;
|
||||||
|
|
||||||
|
scheduler.PopPendingOperations();
|
||||||
|
|
||||||
if (!FilterDraw()) {
|
if (!FilterDraw()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -317,6 +324,8 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
|
|||||||
u32 max_count, VAddr count_address) {
|
u32 max_count, VAddr count_address) {
|
||||||
RENDERER_TRACE;
|
RENDERER_TRACE;
|
||||||
|
|
||||||
|
scheduler.PopPendingOperations();
|
||||||
|
|
||||||
if (!FilterDraw()) {
|
if (!FilterDraw()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -380,6 +389,8 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
|
|||||||
void Rasterizer::DispatchDirect() {
|
void Rasterizer::DispatchDirect() {
|
||||||
RENDERER_TRACE;
|
RENDERER_TRACE;
|
||||||
|
|
||||||
|
scheduler.PopPendingOperations();
|
||||||
|
|
||||||
const auto& cs_program = liverpool->GetCsRegs();
|
const auto& cs_program = liverpool->GetCsRegs();
|
||||||
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
|
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
|
||||||
if (!pipeline) {
|
if (!pipeline) {
|
||||||
@ -407,6 +418,8 @@ void Rasterizer::DispatchDirect() {
|
|||||||
void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) {
|
void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) {
|
||||||
RENDERER_TRACE;
|
RENDERER_TRACE;
|
||||||
|
|
||||||
|
scheduler.PopPendingOperations();
|
||||||
|
|
||||||
const auto& cs_program = liverpool->GetCsRegs();
|
const auto& cs_program = liverpool->GetCsRegs();
|
||||||
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
|
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
|
||||||
if (!pipeline) {
|
if (!pipeline) {
|
||||||
|
@ -65,6 +65,7 @@ public:
|
|||||||
void UnmapMemory(VAddr addr, u64 size);
|
void UnmapMemory(VAddr addr, u64 size);
|
||||||
|
|
||||||
void CpSync();
|
void CpSync();
|
||||||
|
bool CommitAsyncFlushes();
|
||||||
u64 Flush();
|
u64 Flush();
|
||||||
void Finish();
|
void Finish();
|
||||||
void ProcessFaults();
|
void ProcessFaults();
|
||||||
|
@ -65,6 +65,14 @@ void Scheduler::EndRendering() {
|
|||||||
current_cmdbuf.endRendering();
|
current_cmdbuf.endRendering();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Scheduler::PopPendingOperations() {
|
||||||
|
master_semaphore.Refresh();
|
||||||
|
while (!pending_ops.empty() && master_semaphore.IsFree(pending_ops.front().gpu_tick)) {
|
||||||
|
pending_ops.front().callback();
|
||||||
|
pending_ops.pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Scheduler::Flush(SubmitInfo& info) {
|
void Scheduler::Flush(SubmitInfo& info) {
|
||||||
// When flushing, we only send data to the driver; no waiting is necessary.
|
// When flushing, we only send data to the driver; no waiting is necessary.
|
||||||
SubmitExecution(info);
|
SubmitExecution(info);
|
||||||
@ -95,10 +103,7 @@ void Scheduler::Wait(u64 tick) {
|
|||||||
// We don't currently sync the GPU, and some games are very sensitive to this.
|
// We don't currently sync the GPU, and some games are very sensitive to this.
|
||||||
// If this becomes a problem, it can be commented out.
|
// If this becomes a problem, it can be commented out.
|
||||||
// Idealy we would implement proper gpu sync.
|
// Idealy we would implement proper gpu sync.
|
||||||
while (!pending_ops.empty() && pending_ops.front().gpu_tick <= tick) {
|
PopPendingOperations();
|
||||||
pending_ops.front().callback();
|
|
||||||
pending_ops.pop();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Scheduler::AllocateWorkerCommandBuffers() {
|
void Scheduler::AllocateWorkerCommandBuffers() {
|
||||||
@ -174,11 +179,7 @@ void Scheduler::SubmitExecution(SubmitInfo& info) {
|
|||||||
master_semaphore.Refresh();
|
master_semaphore.Refresh();
|
||||||
AllocateWorkerCommandBuffers();
|
AllocateWorkerCommandBuffers();
|
||||||
|
|
||||||
// Apply pending operations
|
PopPendingOperations();
|
||||||
while (!pending_ops.empty() && IsFree(pending_ops.front().gpu_tick)) {
|
|
||||||
pending_ops.front().callback();
|
|
||||||
pending_ops.pop();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmdbuf) {
|
void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmdbuf) {
|
||||||
|
@ -323,6 +323,9 @@ public:
|
|||||||
/// Ends current rendering scope.
|
/// Ends current rendering scope.
|
||||||
void EndRendering();
|
void EndRendering();
|
||||||
|
|
||||||
|
/// Attempts to execute pending operations whose tick the GPU has caught up with.
|
||||||
|
void PopPendingOperations();
|
||||||
|
|
||||||
/// Returns the current render state.
|
/// Returns the current render state.
|
||||||
const RenderState& GetRenderState() const {
|
const RenderState& GetRenderState() const {
|
||||||
return render_state;
|
return render_state;
|
||||||
@ -354,8 +357,8 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Defers an operation until the gpu has reached the current cpu tick.
|
/// Defers an operation until the gpu has reached the current cpu tick.
|
||||||
void DeferOperation(Common::UniqueFunction<void>&& func) {
|
void DeferOperation(Common::UniqueFunction<void>&& func, bool prev_tick = false) {
|
||||||
pending_ops.emplace(std::move(func), CurrentTick());
|
pending_ops.emplace(std::move(func), prev_tick ? CurrentTick() - 1 : CurrentTick());
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::mutex submit_mutex;
|
static std::mutex submit_mutex;
|
||||||
|
Loading…
Reference in New Issue
Block a user