From 55ccec4a387dcf1bf3dcf11f55f2ce546e88a173 Mon Sep 17 00:00:00 2001 From: polyproxy <47796739+polybiusproxy@users.noreply.github.com> Date: Thu, 2 Jan 2025 18:40:10 +0100 Subject: [PATCH 1/6] fix typos --- src/core/libraries/kernel/memory.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp index b18d5f570..8deefb496 100644 --- a/src/core/libraries/kernel/memory.cpp +++ b/src/core/libraries/kernel/memory.cpp @@ -505,13 +505,13 @@ int PS4_SYSV_ABI posix_munmap(void* addr, size_t len) { return result; } -static constexpr int MAX_PTR_APERTURES = 3; +static constexpr int MAX_PRT_APERTURES = 3; static constexpr VAddr PRT_AREA_START_ADDR = 0x1000000000; static constexpr size_t PRT_AREA_SIZE = 0xec00000000; -static std::array, MAX_PTR_APERTURES> PrtApertures{}; +static std::array, MAX_PRT_APERTURES> PrtApertures{}; int PS4_SYSV_ABI sceKernelSetPrtAperture(int id, VAddr address, size_t size) { - if (id < 0 || id >= MAX_PTR_APERTURES) { + if (id < 0 || id >= MAX_PRT_APERTURES) { return ORBIS_KERNEL_ERROR_EINVAL; } @@ -531,12 +531,12 @@ int PS4_SYSV_ABI sceKernelSetPrtAperture(int id, VAddr address, size_t size) { return ORBIS_OK; } -int PS4_SYSV_ABI sceKernelGetPrtAperture(int id, VAddr* addres, size_t* size) { - if (id < 0 || id >= MAX_PTR_APERTURES) { +int PS4_SYSV_ABI sceKernelGetPrtAperture(int id, VAddr* address, size_t* size) { + if (id < 0 || id >= MAX_PRT_APERTURES) { return ORBIS_KERNEL_ERROR_EINVAL; } - std::tie(*addres, *size) = PrtApertures[id]; + std::tie(*address, *size) = PrtApertures[id]; return ORBIS_OK; } From f7a8e2409c38a5f49fcf3cd0db1bdbf26ff69d7f Mon Sep 17 00:00:00 2001 From: psucien Date: Thu, 2 Jan 2025 19:41:15 +0100 Subject: [PATCH 2/6] hot-fix: debug build --- src/video_core/renderer_vulkan/liverpool_to_vk.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 97825b1e1..690d26cfc 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -697,8 +697,8 @@ static const size_t amd_gpu_number_format_bit_size = 4; // All values are under static size_t GetSurfaceFormatTableIndex(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) { - DEBUG_ASSERT(data_format < 1 << amd_gpu_data_format_bit_size); - DEBUG_ASSERT(num_format < 1 << amd_gpu_number_format_bit_size); + DEBUG_ASSERT(u32(data_format) < 1 << amd_gpu_data_format_bit_size); + DEBUG_ASSERT(u32(num_format) < 1 << amd_gpu_number_format_bit_size); size_t result = static_cast(num_format) | (static_cast(data_format) << amd_gpu_number_format_bit_size); return result; From 77d217244142c11e01a960da4558ae122e20b3a8 Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Thu, 2 Jan 2025 20:43:56 +0200 Subject: [PATCH 3/6] renderer_vulkan: Cleanup and improve barriers in caches (#1865) * texture_cache: Stricter barriers on image upload * buffer_cache: Stricter barrier for vkCmdUpdateBuffer * vk_rasterizer: Barrier also normal buffers and make it apply to all stages * texture_cache: Minor barrier cleanup * Batch image and buffer barriers in a single command * clang format --- src/video_core/buffer_cache/buffer_cache.cpp | 147 ++++++++++++++---- .../renderer_vulkan/vk_rasterizer.cpp | 8 +- .../texture_cache/texture_cache.cpp | 45 +++++- src/video_core/texture_cache/tile_manager.cpp | 59 +++---- src/video_core/texture_cache/tile_manager.h | 7 +- 5 files changed, 190 insertions(+), 76 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 3ac6a3598..3e43b4fbc 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -259,7 +259,16 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo const BufferId buffer_id = FindBuffer(address, num_bytes); return &slot_buffers[buffer_id]; }(); - const vk::BufferMemoryBarrier2 buf_barrier = { + const vk::BufferMemoryBarrier2 pre_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = buffer->Handle(), + .offset = buffer->Offset(address), + .size = num_bytes, + }; + const vk::BufferMemoryBarrier2 post_barrier = { .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, @@ -271,9 +280,14 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo cmdbuf.pipelineBarrier2(vk::DependencyInfo{ .dependencyFlags = vk::DependencyFlagBits::eByRegion, .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &buf_barrier, + .pBufferMemoryBarriers = &pre_barrier, + }); + cmdbuf.updateBuffer(buffer->Handle(), buffer->Offset(address), num_bytes, value); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &post_barrier, }); - cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value); } std::pair BufferCache::ObtainHostUBO(std::span data) { @@ -465,21 +479,48 @@ void BufferCache::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, }; scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); - static constexpr vk::MemoryBarrier READ_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, + const std::array pre_barriers = { + vk::BufferMemoryBarrier2{ + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferRead, + .buffer = overlap.Handle(), + .offset = 0, + .size = overlap.SizeBytes(), + }, }; - static constexpr vk::MemoryBarrier WRITE_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + const std::array post_barriers = { + vk::BufferMemoryBarrier2{ + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferRead, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryWrite, + .buffer = overlap.Handle(), + .offset = 0, + .size = overlap.SizeBytes(), + }, + vk::BufferMemoryBarrier2{ + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, + .buffer = new_buffer.Handle(), + .offset = dst_base_offset, + .size = overlap.SizeBytes(), + }, }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, - READ_BARRIER, {}, {}); - cmdbuf.copyBuffer(overlap.buffer, new_buffer.buffer, copy); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, - vk::PipelineStageFlagBits::eAllCommands, - vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = pre_barriers.data(), + }); + cmdbuf.copyBuffer(overlap.Handle(), new_buffer.Handle(), copy); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = static_cast(post_barriers.size()), + .pBufferMemoryBarriers = post_barriers.data(), + }); DeleteBuffer(overlap_id); } @@ -583,21 +624,35 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, } scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); - static constexpr vk::MemoryBarrier READ_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, + const vk::BufferMemoryBarrier2 pre_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = buffer.Handle(), + .offset = 0, + .size = buffer.SizeBytes(), }; - static constexpr vk::MemoryBarrier WRITE_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + const vk::BufferMemoryBarrier2 post_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, + .buffer = buffer.Handle(), + .offset = 0, + .size = buffer.SizeBytes(), }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, - READ_BARRIER, {}, {}); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &pre_barrier, + }); cmdbuf.copyBuffer(src_buffer, buffer.buffer, copies); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, - vk::PipelineStageFlagBits::eAllCommands, - vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &post_barrier, + }); } bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) { @@ -647,10 +702,42 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, } if (!copies.empty()) { scheduler.EndRendering(); - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); + const vk::BufferMemoryBarrier2 pre_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = buffer.Handle(), + .offset = max_offset - size, + .size = size, + }; + const vk::BufferMemoryBarrier2 post_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, + .buffer = buffer.Handle(), + .offset = max_offset - size, + .size = size, + }; + auto barriers = image.GetBarriers(vk::ImageLayout::eTransferSrcOptimal, + vk::AccessFlagBits2::eTransferRead, + vk::PipelineStageFlagBits2::eTransfer, {}); const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &pre_barrier, + .imageMemoryBarrierCount = static_cast(barriers.size()), + .pImageMemoryBarriers = barriers.data(), + }); + cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.Handle(), copies); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &post_barrier, + }); } return true; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 4384cdbea..6e628239b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -562,6 +562,12 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding push_data.AddOffset(binding.buffer, adjust); buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, vsharp.GetSize() + adjust); + if (auto barrier = + vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite + : vk::AccessFlagBits2::eShaderRead, + vk::PipelineStageFlagBits2::eAllCommands)) { + buffer_barriers.emplace_back(*barrier); + } } set_writes.push_back({ @@ -600,7 +606,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding if (auto barrier = vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite : vk::AccessFlagBits2::eShaderRead, - vk::PipelineStageFlagBits2::eComputeShader)) { + vk::PipelineStageFlagBits2::eAllCommands)) { buffer_barriers.emplace_back(*barrier); } if (desc.is_written) { diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 897d6f67e..291e1da7c 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -542,31 +542,62 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule sched_ptr->EndRendering(); const auto cmdbuf = sched_ptr->CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}, - cmdbuf); - const VAddr image_addr = image.info.guest_address; const size_t image_size = image.info.guest_size_bytes; const auto [vk_buffer, buf_offset] = buffer_cache.ObtainViewBuffer(image_addr, image_size, is_gpu_dirty); + // The obtained buffer may be written by a shader so we need to emit a barrier to prevent RAW // hazard if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead, vk::PipelineStageFlagBits2::eTransfer)) { - const auto dependencies = vk::DependencyInfo{ + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ .dependencyFlags = vk::DependencyFlagBits::eByRegion, .bufferMemoryBarrierCount = 1, .pBufferMemoryBarriers = &barrier.value(), - }; - cmdbuf.pipelineBarrier2(dependencies); + }); } - const auto [buffer, offset] = tile_manager.TryDetile(vk_buffer->Handle(), buf_offset, image); + const auto [buffer, offset] = + tile_manager.TryDetile(vk_buffer->Handle(), buf_offset, image.info); for (auto& copy : image_copy) { copy.bufferOffset += offset; } + const vk::BufferMemoryBarrier2 pre_barrier{ + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferRead, + .buffer = buffer, + .offset = offset, + .size = image_size, + }; + const vk::BufferMemoryBarrier2 post_barrier{ + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, + .buffer = buffer, + .offset = offset, + .size = image_size, + }; + const auto image_barriers = + image.GetBarriers(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, + vk::PipelineStageFlagBits2::eTransfer, {}); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &pre_barrier, + .imageMemoryBarrierCount = static_cast(image_barriers.size()), + .pImageMemoryBarriers = image_barriers.data(), + }); cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &post_barrier, + }); image.flags &= ~ImageFlagBits::Dirty; } diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index fda7e511a..c1243dafb 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -4,6 +4,7 @@ #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/texture_cache/image_info.h" #include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/tile_manager.h" @@ -86,10 +87,10 @@ static vk::Format DemoteImageFormatForDetiling(vk::Format format) { return format; } -const DetilerContext* TileManager::GetDetiler(const Image& image) const { - const auto format = DemoteImageFormatForDetiling(image.info.pixel_format); +const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const { + const auto format = DemoteImageFormatForDetiling(info.pixel_format); - switch (image.info.tiling_mode) { + switch (info.tiling_mode) { case AmdGpu::TilingMode::Texture_MicroTiled: switch (format) { case vk::Format::eR8Uint: @@ -258,23 +259,23 @@ void TileManager::FreeBuffer(ScratchBuffer buffer) { } std::pair TileManager::TryDetile(vk::Buffer in_buffer, u32 in_offset, - Image& image) { - if (!image.info.props.is_tiled) { + const ImageInfo& info) { + if (!info.props.is_tiled) { return {in_buffer, in_offset}; } - const auto* detiler = GetDetiler(image); + const auto* detiler = GetDetiler(info); if (!detiler) { - if (image.info.tiling_mode != AmdGpu::TilingMode::Texture_MacroTiled && - image.info.tiling_mode != AmdGpu::TilingMode::Display_MacroTiled && - image.info.tiling_mode != AmdGpu::TilingMode::Depth_MacroTiled) { + if (info.tiling_mode != AmdGpu::TilingMode::Texture_MacroTiled && + info.tiling_mode != AmdGpu::TilingMode::Display_MacroTiled && + info.tiling_mode != AmdGpu::TilingMode::Depth_MacroTiled) { LOG_ERROR(Render_Vulkan, "Unsupported tiled image: {} ({})", - vk::to_string(image.info.pixel_format), NameOf(image.info.tiling_mode)); + vk::to_string(info.pixel_format), NameOf(info.tiling_mode)); } return {in_buffer, in_offset}; } - const u32 image_size = image.info.guest_size_bytes; + const u32 image_size = info.guest_size_bytes; // Prepare output buffer auto out_buffer = AllocBuffer(image_size, true); @@ -317,22 +318,21 @@ std::pair TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o set_writes); DetilerParams params; - params.num_levels = image.info.resources.levels; - params.pitch0 = image.info.pitch >> (image.info.props.is_block ? 2u : 0u); - params.height = image.info.size.height; - if (image.info.tiling_mode == AmdGpu::TilingMode::Texture_Volume) { - ASSERT(image.info.resources.levels == 1); - ASSERT(image.info.num_bits >= 32); - const auto tiles_per_row = image.info.pitch / 8u; - const auto tiles_per_slice = tiles_per_row * ((image.info.size.height + 7u) / 8u); + params.num_levels = info.resources.levels; + params.pitch0 = info.pitch >> (info.props.is_block ? 2u : 0u); + params.height = info.size.height; + if (info.tiling_mode == AmdGpu::TilingMode::Texture_Volume) { + ASSERT(info.resources.levels == 1); + ASSERT(info.num_bits >= 32); + const auto tiles_per_row = info.pitch / 8u; + const auto tiles_per_slice = tiles_per_row * ((info.size.height + 7u) / 8u); params.sizes[0] = tiles_per_row; params.sizes[1] = tiles_per_slice; } else { - - ASSERT(image.info.resources.levels <= 14); + ASSERT(info.resources.levels <= 14); std::memset(¶ms.sizes, 0, sizeof(params.sizes)); - for (int m = 0; m < image.info.resources.levels; ++m) { - params.sizes[m] = image.info.mips_layout[m].size * image.info.resources.layers + + for (int m = 0; m < info.resources.levels; ++m) { + params.sizes[m] = info.mips_layout[m].size * info.resources.layers + (m > 0 ? params.sizes[m - 1] : 0); } } @@ -341,20 +341,9 @@ std::pair TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o ¶ms); ASSERT((image_size % 64) == 0); - const auto bpp = image.info.num_bits * (image.info.props.is_block ? 16u : 1u); + const auto bpp = info.num_bits * (info.props.is_block ? 16u : 1u); const auto num_tiles = image_size / (64 * (bpp / 8)); cmdbuf.dispatch(num_tiles, 1, 1); - - const vk::BufferMemoryBarrier post_barrier{ - .srcAccessMask = vk::AccessFlagBits::eShaderWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferRead, - .buffer = out_buffer.first, - .size = image_size, - }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, - vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, - {}, post_barrier, {}); - return {out_buffer.first, 0}; } diff --git a/src/video_core/texture_cache/tile_manager.h b/src/video_core/texture_cache/tile_manager.h index 72860bca0..1d731d2f2 100644 --- a/src/video_core/texture_cache/tile_manager.h +++ b/src/video_core/texture_cache/tile_manager.h @@ -5,11 +5,11 @@ #include "common/types.h" #include "video_core/buffer_cache/buffer.h" -#include "video_core/texture_cache/image.h" namespace VideoCore { class TextureCache; +struct ImageInfo; enum DetilerType : u32 { Micro8x1, @@ -36,14 +36,15 @@ public: TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler); ~TileManager(); - std::pair TryDetile(vk::Buffer in_buffer, u32 in_offset, Image& image); + std::pair TryDetile(vk::Buffer in_buffer, u32 in_offset, + const ImageInfo& info); ScratchBuffer AllocBuffer(u32 size, bool is_storage = false); void Upload(ScratchBuffer buffer, const void* data, size_t size); void FreeBuffer(ScratchBuffer buffer); private: - const DetilerContext* GetDetiler(const Image& image) const; + const DetilerContext* GetDetiler(const ImageInfo& info) const; private: const Vulkan::Instance& instance; From 67c531298a6c6c56a2ba82251fd4f04487601bc8 Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Thu, 2 Jan 2025 14:51:47 -0600 Subject: [PATCH 4/6] Fixup returns (#2023) On a signed out console, these two functions return ERROR_SIGNED_OUT. --- src/core/libraries/np_manager/np_manager.cpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/core/libraries/np_manager/np_manager.cpp b/src/core/libraries/np_manager/np_manager.cpp index ec9cc6bf5..87d752c69 100644 --- a/src/core/libraries/np_manager/np_manager.cpp +++ b/src/core/libraries/np_manager/np_manager.cpp @@ -972,11 +972,8 @@ int PS4_SYSV_ABI sceNpGetGamePresenceStatusA() { } int PS4_SYSV_ABI sceNpGetNpId(OrbisUserServiceUserId user_id, OrbisNpId* np_id) { - LOG_INFO(Lib_NpManager, "user_id {}", user_id); - const auto name = Config::getUserName(); - std::memset(np_id, 0, sizeof(OrbisNpId)); - name.copy(np_id->handle.data, sizeof(np_id->handle.data)); - return ORBIS_OK; + LOG_DEBUG(Lib_NpManager, "user_id {}", user_id); + return ORBIS_NP_ERROR_SIGNED_OUT; } int PS4_SYSV_ABI sceNpGetNpReachabilityState() { @@ -986,10 +983,7 @@ int PS4_SYSV_ABI sceNpGetNpReachabilityState() { int PS4_SYSV_ABI sceNpGetOnlineId(s32 user_id, OrbisNpOnlineId* online_id) { LOG_DEBUG(Lib_NpManager, "user_id {}", user_id); - const auto name = Config::getUserName(); - std::memset(online_id, 0, sizeof(OrbisNpOnlineId)); - name.copy(online_id->data, sizeof(online_id->data)); - return ORBIS_OK; + return ORBIS_NP_ERROR_SIGNED_OUT; } int PS4_SYSV_ABI sceNpGetParentalControlInfo() { From dcc662ff1afa4a7aad8f50260b64c2e03a0dd2e6 Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Thu, 2 Jan 2025 22:52:10 +0200 Subject: [PATCH 5/6] ir_passes: Integrate DS barriers in block (#2020) --- .../frontend/translate/data_share.cpp | 6 -- .../frontend/translate/translate.h | 1 - .../ir/passes/shared_memory_barrier_pass.cpp | 71 ++++++++++++++----- 3 files changed, 52 insertions(+), 26 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index 4408cae28..62c0423dd 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -205,7 +205,6 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0))); ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0); } - emit_ds_read_barrier = true; } void Translator::DS_SWIZZLE_B32(const GcnInst& inst) { @@ -222,11 +221,6 @@ void Translator::DS_SWIZZLE_B32(const GcnInst& inst) { void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst) { - if (emit_ds_read_barrier && profile.needs_lds_barriers) { - ir.Barrier(); - emit_ds_read_barrier = false; - } - const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))}; IR::VectorReg dst_reg{inst.dst[0].code}; if (is_pair) { diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index e8584ec2f..9da0844e4 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -308,7 +308,6 @@ private: const RuntimeInfo& runtime_info; const Profile& profile; bool opcode_missing = false; - bool emit_ds_read_barrier = false; }; void Translate(IR::Block* block, u32 block_base, std::span inst_list, Info& info, diff --git a/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp b/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp index aad8fb148..ec7d7e986 100644 --- a/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp +++ b/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp @@ -8,6 +8,54 @@ namespace Shader::Optimization { +static void EmitBarrierInBlock(IR::Block* block) { + // This is inteded to insert a barrier when shared memory write and read + // occur in the same basic block. Also checks if branch depth is zero as + // we don't want to insert barrier in potentially divergent code. + bool emit_barrier_on_write = false; + bool emit_barrier_on_read = false; + const auto emit_barrier = [block](bool& emit_cond, IR::Inst& inst) { + if (emit_cond) { + IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; + ir.Barrier(); + emit_cond = false; + } + }; + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::LoadSharedU32 || + inst.GetOpcode() == IR::Opcode::LoadSharedU64) { + emit_barrier(emit_barrier_on_read, inst); + emit_barrier_on_write = true; + } + if (inst.GetOpcode() == IR::Opcode::WriteSharedU32 || + inst.GetOpcode() == IR::Opcode::WriteSharedU64) { + emit_barrier(emit_barrier_on_write, inst); + emit_barrier_on_read = true; + } + } +} + +static void EmitBarrierInMergeBlock(const IR::AbstractSyntaxNode::Data& data) { + // Insert a barrier after divergent conditional blocks. + // This avoids potential softlocks and crashes when some threads + // initialize shared memory and others read from it. + const IR::U1 cond = data.if_node.cond; + const auto insert_barrier = + IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional { + if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 && + inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) { + return true; + } + return std::nullopt; + }); + if (insert_barrier) { + IR::Block* const merge = data.if_node.merge; + auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi); + IR::IREmitter ir{*merge, insert_point}; + ir.Barrier(); + } +} + void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) { if (!program.info.uses_shared || !profile.needs_lds_barriers) { return; @@ -19,27 +67,12 @@ void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) { --branch_depth; continue; } - if (node.type != Type::If) { + if (node.type == Type::If && branch_depth++ == 0) { + EmitBarrierInMergeBlock(node.data); continue; } - u32 curr_depth = branch_depth++; - if (curr_depth != 0) { - continue; - } - const IR::U1 cond = node.data.if_node.cond; - const auto insert_barrier = - IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional { - if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 && - inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) { - return true; - } - return std::nullopt; - }); - if (insert_barrier) { - IR::Block* const merge = node.data.if_node.merge; - auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi); - IR::IREmitter ir{*merge, insert_point}; - ir.Barrier(); + if (node.type == Type::Block && branch_depth == 0) { + EmitBarrierInBlock(node.data.block); } } } From 4e1733222f4b42d4292d6c7417f215c109cd51d3 Mon Sep 17 00:00:00 2001 From: georgemoralis Date: Thu, 2 Jan 2025 23:21:43 +0200 Subject: [PATCH 6/6] fixed deadzones (#2025) --- src/core/libraries/pad/pad.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/libraries/pad/pad.cpp b/src/core/libraries/pad/pad.cpp index 27564294e..7eb628a90 100644 --- a/src/core/libraries/pad/pad.cpp +++ b/src/core/libraries/pad/pad.cpp @@ -104,8 +104,8 @@ int PS4_SYSV_ABI scePadGetControllerInformation(s32 handle, OrbisPadControllerIn pInfo->touchPadInfo.pixelDensity = 1; pInfo->touchPadInfo.resolution.x = 1920; pInfo->touchPadInfo.resolution.y = 950; - pInfo->stickInfo.deadZoneLeft = 20; - pInfo->stickInfo.deadZoneRight = 20; + pInfo->stickInfo.deadZoneLeft = 2; + pInfo->stickInfo.deadZoneRight = 2; pInfo->connectionType = ORBIS_PAD_PORT_TYPE_STANDARD; pInfo->connectedCount = 1; pInfo->connected = true;