mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-23 18:45:36 +00:00
texture_cache: Implement color to depth copies and vise versa
This commit is contained in:
parent
9981c8df03
commit
2b3eef0114
@ -23,6 +23,7 @@ static constexpr size_t DataShareBufferSize = 64_KB;
|
||||
static constexpr size_t StagingBufferSize = 512_MB;
|
||||
static constexpr size_t UboStreamBufferSize = 128_MB;
|
||||
static constexpr size_t DownloadBufferSize = 128_MB;
|
||||
static constexpr size_t DeviceBufferSize = 16_MB;
|
||||
static constexpr size_t MaxPageFaults = 1024;
|
||||
|
||||
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||
@ -32,7 +33,8 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
||||
memory{Core::Memory::Instance()}, texture_cache{texture_cache_}, tracker{tracker_},
|
||||
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
||||
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
||||
download_buffer(instance, scheduler, MemoryUsage::Download, DownloadBufferSize),
|
||||
download_buffer{instance, scheduler, MemoryUsage::Download, DownloadBufferSize},
|
||||
device_buffer{instance, scheduler, MemoryUsage::DeviceLocal, DeviceBufferSize},
|
||||
gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize},
|
||||
bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal,
|
||||
0, AllFlags, BDA_PAGETABLE_SIZE},
|
||||
@ -348,7 +350,7 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
|
||||
return {&buffer, buffer.Offset(device_addr)};
|
||||
}
|
||||
|
||||
std::pair<Buffer*, u32> BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size, bool prefer_gpu) {
|
||||
std::pair<Buffer*, u32> BufferCache::ObtainBufferForImage(VAddr gpu_addr, u32 size) {
|
||||
// Check if any buffer contains the full requested range.
|
||||
const u64 page = gpu_addr >> CACHING_PAGEBITS;
|
||||
const BufferId buffer_id = page_table[page].buffer_id;
|
||||
@ -361,10 +363,10 @@ std::pair<Buffer*, u32> BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size,
|
||||
}
|
||||
// If no buffer contains the full requested range but some buffer within was GPU-modified,
|
||||
// fall back to ObtainBuffer to create a full buffer and avoid losing GPU modifications.
|
||||
// This is only done if the request prefers to use GPU memory, otherwise we can skip it.
|
||||
if (prefer_gpu && memory_tracker.IsRegionGpuModified(gpu_addr, size)) {
|
||||
if (memory_tracker.IsRegionGpuModified(gpu_addr, size)) {
|
||||
return ObtainBuffer(gpu_addr, size, false, false);
|
||||
}
|
||||
|
||||
// In all other cases, just do a CPU copy to the staging buffer.
|
||||
const auto [data, offset] = staging_buffer.Map(size, 16);
|
||||
memory->CopySparseMemory(gpu_addr, data, size);
|
||||
|
@ -80,11 +80,6 @@ public:
|
||||
return &gds_buffer;
|
||||
}
|
||||
|
||||
/// Retrieves the host visible device local stream buffer.
|
||||
[[nodiscard]] StreamBuffer& GetStreamBuffer() noexcept {
|
||||
return stream_buffer;
|
||||
}
|
||||
|
||||
/// Retrieves the device local DBA page table buffer.
|
||||
[[nodiscard]] Buffer* GetBdaPageTableBuffer() noexcept {
|
||||
return &bda_pagetable_buffer;
|
||||
@ -100,6 +95,20 @@ public:
|
||||
return slot_buffers[id];
|
||||
}
|
||||
|
||||
/// Retrieves a utility buffer optimized for specified memory usage.
|
||||
StreamBuffer& GetUtilityBuffer(MemoryUsage usage) noexcept {
|
||||
switch (usage) {
|
||||
case MemoryUsage::Stream:
|
||||
return stream_buffer;
|
||||
case MemoryUsage::Download:
|
||||
return download_buffer;
|
||||
case MemoryUsage::Upload:
|
||||
return staging_buffer;
|
||||
case MemoryUsage::DeviceLocal:
|
||||
return device_buffer;
|
||||
}
|
||||
}
|
||||
|
||||
/// Invalidates any buffer in the logical page range.
|
||||
void InvalidateMemory(VAddr device_addr, u64 size, bool unmap);
|
||||
|
||||
@ -121,8 +130,7 @@ public:
|
||||
BufferId buffer_id = {});
|
||||
|
||||
/// Attempts to obtain a buffer without modifying the cache contents.
|
||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainViewBuffer(VAddr gpu_addr, u32 size,
|
||||
bool prefer_gpu);
|
||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainBufferForImage(VAddr gpu_addr, u32 size);
|
||||
|
||||
/// Return true when a region is registered on the cache
|
||||
[[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
|
||||
@ -193,6 +201,7 @@ private:
|
||||
StreamBuffer staging_buffer;
|
||||
StreamBuffer stream_buffer;
|
||||
StreamBuffer download_buffer;
|
||||
StreamBuffer device_buffer;
|
||||
Buffer gds_buffer;
|
||||
Buffer bda_pagetable_buffer;
|
||||
Buffer fault_buffer;
|
||||
|
@ -549,7 +549,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
||||
const auto* gds_buf = buffer_cache.GetGdsBuffer();
|
||||
buffer_infos.emplace_back(gds_buf->Handle(), 0, gds_buf->SizeBytes());
|
||||
} else if (desc.buffer_type == Shader::BufferType::Flatbuf) {
|
||||
auto& vk_buffer = buffer_cache.GetStreamBuffer();
|
||||
auto& vk_buffer = buffer_cache.GetUtilityBuffer(VideoCore::MemoryUsage::Stream);
|
||||
const u32 ubo_size = stage.flattened_ud_buf.size() * sizeof(u32);
|
||||
const u64 offset = vk_buffer.Copy(stage.flattened_ud_buf.data(), ubo_size,
|
||||
instance.UniformMinAlignment());
|
||||
@ -561,7 +561,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
||||
const auto* fault_buffer = buffer_cache.GetFaultBuffer();
|
||||
buffer_infos.emplace_back(fault_buffer->Handle(), 0, fault_buffer->SizeBytes());
|
||||
} else if (desc.buffer_type == Shader::BufferType::SharedMemory) {
|
||||
auto& lds_buffer = buffer_cache.GetStreamBuffer();
|
||||
auto& lds_buffer = buffer_cache.GetUtilityBuffer(VideoCore::MemoryUsage::Stream);
|
||||
const auto& cs_program = liverpool->GetCsRegs();
|
||||
const auto lds_size = cs_program.SharedMemSize() * cs_program.NumWorkgroups();
|
||||
const auto [data, offset] =
|
||||
|
@ -312,43 +312,121 @@ void Image::Upload(vk::Buffer buffer, u64 offset) {
|
||||
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {});
|
||||
}
|
||||
|
||||
void Image::CopyImage(const Image& image) {
|
||||
void Image::CopyImage(const Image& src_image) {
|
||||
scheduler->EndRendering();
|
||||
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
|
||||
|
||||
auto cmdbuf = scheduler->CommandBuffer();
|
||||
const auto& src_info = src_image.info;
|
||||
|
||||
boost::container::small_vector<vk::ImageCopy, 14> image_copy{};
|
||||
const u32 num_mips = std::min(image.info.resources.levels, info.resources.levels);
|
||||
const u32 num_mips = std::min(src_info.resources.levels, info.resources.levels);
|
||||
for (u32 m = 0; m < num_mips; ++m) {
|
||||
const auto mip_w = std::max(image.info.size.width >> m, 1u);
|
||||
const auto mip_h = std::max(image.info.size.height >> m, 1u);
|
||||
const auto mip_d = std::max(image.info.size.depth >> m, 1u);
|
||||
const auto mip_w = std::max(src_info.size.width >> m, 1u);
|
||||
const auto mip_h = std::max(src_info.size.height >> m, 1u);
|
||||
const auto mip_d = std::max(src_info.size.depth >> m, 1u);
|
||||
|
||||
image_copy.emplace_back(vk::ImageCopy{
|
||||
.srcSubresource{
|
||||
.aspectMask = image.aspect_mask,
|
||||
.aspectMask = src_image.aspect_mask,
|
||||
.mipLevel = m,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = image.info.resources.layers,
|
||||
.layerCount = src_info.resources.layers,
|
||||
},
|
||||
.dstSubresource{
|
||||
.aspectMask = image.aspect_mask,
|
||||
.aspectMask = src_image.aspect_mask,
|
||||
.mipLevel = m,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = image.info.resources.layers,
|
||||
.layerCount = src_info.resources.layers,
|
||||
},
|
||||
.extent = {mip_w, mip_h, mip_d},
|
||||
});
|
||||
}
|
||||
cmdbuf.copyImage(image.image, image.last_state.layout, this->image, this->last_state.layout,
|
||||
cmdbuf.copyImage(src_image.image, src_image.last_state.layout, image, last_state.layout,
|
||||
image_copy);
|
||||
|
||||
Transit(vk::ImageLayout::eGeneral,
|
||||
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {});
|
||||
}
|
||||
|
||||
void Image::CopyMip(const Image& image, u32 mip, u32 slice) {
|
||||
void Image::CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset) {
|
||||
const auto& src_info = src_image.info;
|
||||
|
||||
vk::BufferImageCopy buffer_image_copy = {
|
||||
.bufferOffset = offset,
|
||||
.bufferRowLength = 0,
|
||||
.bufferImageHeight = 0,
|
||||
.imageSubresource =
|
||||
{
|
||||
.aspectMask = src_info.IsDepthStencil() ? vk::ImageAspectFlagBits::eDepth
|
||||
: vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
.imageOffset =
|
||||
{
|
||||
.x = 0,
|
||||
.y = 0,
|
||||
.z = 0,
|
||||
},
|
||||
.imageExtent =
|
||||
{
|
||||
.width = src_info.size.width,
|
||||
.height = src_info.size.height,
|
||||
.depth = src_info.size.depth,
|
||||
},
|
||||
};
|
||||
|
||||
const vk::BufferMemoryBarrier2 pre_copy_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.buffer = buffer,
|
||||
.offset = offset,
|
||||
.size = VK_WHOLE_SIZE,
|
||||
};
|
||||
|
||||
const vk::BufferMemoryBarrier2 post_copy_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferRead,
|
||||
.buffer = buffer,
|
||||
.offset = offset,
|
||||
.size = VK_WHOLE_SIZE,
|
||||
};
|
||||
|
||||
scheduler->EndRendering();
|
||||
src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
|
||||
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
|
||||
|
||||
auto cmdbuf = scheduler->CommandBuffer();
|
||||
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &pre_copy_barrier,
|
||||
});
|
||||
|
||||
cmdbuf.copyImageToBuffer(src_image.image, vk::ImageLayout::eTransferSrcOptimal, buffer,
|
||||
buffer_image_copy);
|
||||
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &post_copy_barrier,
|
||||
});
|
||||
|
||||
buffer_image_copy.imageSubresource.aspectMask =
|
||||
info.IsDepthStencil() ? vk::ImageAspectFlagBits::eDepth : vk::ImageAspectFlagBits::eColor;
|
||||
|
||||
cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal,
|
||||
buffer_image_copy);
|
||||
}
|
||||
|
||||
void Image::CopyMip(const Image& src_image, u32 mip, u32 slice) {
|
||||
scheduler->EndRendering();
|
||||
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
|
||||
|
||||
@ -358,26 +436,27 @@ void Image::CopyMip(const Image& image, u32 mip, u32 slice) {
|
||||
const auto mip_h = std::max(info.size.height >> mip, 1u);
|
||||
const auto mip_d = std::max(info.size.depth >> mip, 1u);
|
||||
|
||||
ASSERT(mip_w == image.info.size.width);
|
||||
ASSERT(mip_h == image.info.size.height);
|
||||
const auto& src_info = src_image.info;
|
||||
ASSERT(mip_w == src_info.size.width);
|
||||
ASSERT(mip_h == src_info.size.height);
|
||||
|
||||
const u32 num_layers = std::min(image.info.resources.layers, info.resources.layers);
|
||||
const u32 num_layers = std::min(src_info.resources.layers, info.resources.layers);
|
||||
const vk::ImageCopy image_copy{
|
||||
.srcSubresource{
|
||||
.aspectMask = image.aspect_mask,
|
||||
.aspectMask = src_image.aspect_mask,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = num_layers,
|
||||
},
|
||||
.dstSubresource{
|
||||
.aspectMask = image.aspect_mask,
|
||||
.aspectMask = src_image.aspect_mask,
|
||||
.mipLevel = mip,
|
||||
.baseArrayLayer = slice,
|
||||
.layerCount = num_layers,
|
||||
},
|
||||
.extent = {mip_w, mip_h, mip_d},
|
||||
};
|
||||
cmdbuf.copyImage(image.image, image.last_state.layout, this->image, this->last_state.layout,
|
||||
cmdbuf.copyImage(src_image.image, src_image.last_state.layout, image, last_state.layout,
|
||||
image_copy);
|
||||
|
||||
Transit(vk::ImageLayout::eGeneral,
|
||||
|
@ -104,7 +104,8 @@ struct Image {
|
||||
std::optional<SubresourceRange> range, vk::CommandBuffer cmdbuf = {});
|
||||
void Upload(vk::Buffer buffer, u64 offset);
|
||||
|
||||
void CopyImage(const Image& image);
|
||||
void CopyImage(const Image& src_image);
|
||||
void CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset);
|
||||
void CopyMip(const Image& src_image, u32 mip, u32 slice);
|
||||
|
||||
bool IsTracked() {
|
||||
|
@ -8,7 +8,6 @@
|
||||
#include "common/debug.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/page_manager.h"
|
||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/texture_cache/host_compatibility.h"
|
||||
@ -126,7 +125,7 @@ void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) {
|
||||
|
||||
ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, BindingType binding,
|
||||
ImageId cache_image_id) {
|
||||
const auto& cache_image = slot_images[cache_image_id];
|
||||
auto& cache_image = slot_images[cache_image_id];
|
||||
|
||||
if (!cache_image.info.IsDepthStencil() && !requested_info.IsDepthStencil()) {
|
||||
return {};
|
||||
@ -169,18 +168,21 @@ ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, Bindi
|
||||
}
|
||||
|
||||
if (recreate) {
|
||||
auto new_info{requested_info};
|
||||
new_info.resources = std::max(requested_info.resources, cache_image.info.resources);
|
||||
new_info.UpdateSize();
|
||||
auto new_info = requested_info;
|
||||
new_info.resources = std::min(requested_info.resources, cache_image.info.resources);
|
||||
const auto new_image_id = slot_images.insert(instance, scheduler, new_info);
|
||||
RegisterImage(new_image_id);
|
||||
|
||||
// Inherit image usage
|
||||
auto& new_image = GetImage(new_image_id);
|
||||
auto& new_image = slot_images[new_image_id];
|
||||
new_image.usage = cache_image.usage;
|
||||
new_image.flags &= ~ImageFlagBits::Dirty;
|
||||
|
||||
// TODO: perform a depth copy here
|
||||
// Perform depth<->color copy using the intermediate copy buffer.
|
||||
const auto& copy_buffer = buffer_cache.GetUtilityBuffer(MemoryUsage::DeviceLocal);
|
||||
new_image.CopyImageWithBuffer(cache_image, copy_buffer.Handle(), 0);
|
||||
|
||||
// Free the cache image.
|
||||
FreeImage(cache_image_id);
|
||||
return new_image_id;
|
||||
}
|
||||
@ -395,6 +397,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) {
|
||||
// Create and register a new image
|
||||
if (!image_id) {
|
||||
image_id = slot_images.insert(instance, scheduler, info);
|
||||
RefreshImage(slot_images[image_id]);
|
||||
RegisterImage(image_id);
|
||||
}
|
||||
|
||||
@ -580,16 +583,14 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
|
||||
}
|
||||
|
||||
auto* sched_ptr = custom_scheduler ? custom_scheduler : &scheduler;
|
||||
const auto cmdbuf = sched_ptr->CommandBuffer();
|
||||
sched_ptr->EndRendering();
|
||||
|
||||
const VAddr image_addr = image.info.guest_address;
|
||||
const size_t image_size = image.info.guest_size;
|
||||
const auto [vk_buffer, buf_offset] =
|
||||
buffer_cache.ObtainViewBuffer(image_addr, image_size, is_gpu_dirty);
|
||||
const auto [vk_buffer, buf_offset] = buffer_cache.ObtainBufferForImage(image_addr, image_size);
|
||||
|
||||
const auto cmdbuf = sched_ptr->CommandBuffer();
|
||||
// The obtained buffer may be written by a shader so we need to emit a barrier to prevent RAW
|
||||
// hazard
|
||||
// The obtained buffer may be GPU modified so we need to emit a barrier to prevent RAW hazard
|
||||
if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead,
|
||||
vk::PipelineStageFlagBits2::eTransfer)) {
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
|
Loading…
Reference in New Issue
Block a user