diff --git a/src/core/libraries/videoout/driver.cpp b/src/core/libraries/videoout/driver.cpp index 7c162de88..fb7dffa39 100644 --- a/src/core/libraries/videoout/driver.cpp +++ b/src/core/libraries/videoout/driver.cpp @@ -233,13 +233,8 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, } if (!is_eop) { - // Before processing the flip we need to ask GPU thread to flush command list as at this - // point VO surface is ready to be presented, and we will need have an actual state of - // Vulkan image at the time of frame presentation. - liverpool->SendCommand([=, this]() { - presenter->FlushDraw(); - SubmitFlipInternal(port, index, flip_arg, is_eop); - }); + // Non EOP flips can arrive from any thread so ask GPU thread to perform them + liverpool->SendCommand([=, this]() { SubmitFlipInternal(port, index, flip_arg, is_eop); }); } else { SubmitFlipInternal(port, index, flip_arg, is_eop); } @@ -247,15 +242,14 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, return true; } -void VideoOutDriver::SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg, - bool is_eop /*= false*/) { +void VideoOutDriver::SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop) { Vulkan::Frame* frame; if (index == -1) { - frame = presenter->PrepareBlankFrame(is_eop); + frame = presenter->PrepareBlankFrame(false); } else { const auto& buffer = port->buffer_slots[index]; const auto& group = port->groups[buffer.group_index]; - frame = presenter->PrepareFrame(group, buffer.address_left, is_eop); + frame = presenter->PrepareFrame(group, buffer.address_left); } std::scoped_lock lock{mutex}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index a5cdca8f1..2f01d00db 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -301,7 +301,8 @@ void SetupCapabilities(const Info& info, const Profile& profile, const RuntimeIn ctx.AddExtension("SPV_KHR_fragment_shader_barycentric"); ctx.AddCapability(spv::Capability::FragmentBarycentricKHR); } - if (runtime_info.fs_info.addr_flags.linear_sample_ena || + if (info.loads.Get(IR::Attribute::SampleIndex) || + runtime_info.fs_info.addr_flags.linear_sample_ena || runtime_info.fs_info.addr_flags.persp_sample_ena) { ctx.AddCapability(spv::Capability::SampleRateShading); } diff --git a/src/shader_recompiler/frontend/fetch_shader.h b/src/shader_recompiler/frontend/fetch_shader.h index 2fd0556a2..442a9af2f 100644 --- a/src/shader_recompiler/frontend/fetch_shader.h +++ b/src/shader_recompiler/frontend/fetch_shader.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include "common/types.h" #include "shader_recompiler/info.h" diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 38aad55c4..b9b4e9726 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -1022,7 +1022,7 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info, auto converted = ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion()); if (sampler.force_degamma && image.GetNumberFmt() != AmdGpu::NumberFormat::Srgb) { - converted = ApplyForceDegamma(ir, texel, image.DstSelect()); + converted = ApplyForceDegamma(ir, texel); } inst.ReplaceUsesWith(converted); } diff --git a/src/shader_recompiler/ir/reinterpret.h b/src/shader_recompiler/ir/reinterpret.h index 84a4a51d5..349ce448f 100644 --- a/src/shader_recompiler/ir/reinterpret.h +++ b/src/shader_recompiler/ir/reinterpret.h @@ -29,25 +29,15 @@ inline F32 ApplyGammaToLinear(IREmitter& ir, const F32& c) { return IR::F32{ir.Select(ir.FPGreaterThan(c, ir.Imm32(0.04045f)), a, b)}; } -inline Value ApplyForceDegamma(IREmitter& ir, const Value& value, - const AmdGpu::CompMapping& mapping) { +inline Value ApplyForceDegamma(IREmitter& ir, const Value& value) { auto x = F32{ir.CompositeExtract(value, 0)}; auto y = F32{ir.CompositeExtract(value, 1)}; auto z = F32{ir.CompositeExtract(value, 2)}; auto w = F32{ir.CompositeExtract(value, 3)}; // Gamma correction is only applied to RGB components - if (AmdGpu::IsRgb(mapping.r)) { - x = ApplyGammaToLinear(ir, x); - } - if (AmdGpu::IsRgb(mapping.g)) { - y = ApplyGammaToLinear(ir, y); - } - if (AmdGpu::IsRgb(mapping.b)) { - z = ApplyGammaToLinear(ir, z); - } - if (AmdGpu::IsRgb(mapping.a)) { - w = ApplyGammaToLinear(ir, w); - } + x = ApplyGammaToLinear(ir, x); + y = ApplyGammaToLinear(ir, y); + z = ApplyGammaToLinear(ir, z); return ir.CompositeConstruct(x, y, z, w); } diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 52bbd277b..10d4648b7 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -1485,26 +1485,6 @@ struct Liverpool { return nullptr; } - u32 NumSamples() const { - // It seems that the number of samples > 1 set in the AA config doesn't mean we're - // always rendering with MSAA, so we need to derive MS ratio from the CB and DB - // settings. - u32 num_samples = 1u; - if (color_control.mode != ColorControl::OperationMode::Disable) { - for (auto cb = 0u; cb < NumColorBuffers; ++cb) { - const auto& col_buf = color_buffers[cb]; - if (!col_buf) { - continue; - } - num_samples = std::max(num_samples, col_buf.NumSamples()); - } - } - if (depth_buffer.DepthValid() || depth_buffer.StencilValid()) { - num_samples = std::max(num_samples, depth_buffer.NumSamples()); - } - return num_samples; - } - bool IsClipDisabled() const { return clipper_control.clip_disable || primitive_type == PrimitiveType::RectList; } diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index e3cb6cc81..6fabd5d10 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -984,44 +984,8 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, if (copy_size == 0) { return false; } - scheduler.EndRendering(); - const vk::BufferMemoryBarrier2 pre_barrier = { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, - .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, - .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, - .buffer = buffer.Handle(), - .offset = buf_offset, - .size = copy_size, - }; - const vk::BufferMemoryBarrier2 post_barrier = { - .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, - .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, - .buffer = buffer.Handle(), - .offset = buf_offset, - .size = copy_size, - }; - auto barriers = - image.GetBarriers(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, - vk::PipelineStageFlagBits2::eTransfer, {}); - auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &pre_barrier, - .imageMemoryBarrierCount = static_cast(barriers.size()), - .pImageMemoryBarriers = barriers.data(), - }); auto& tile_manager = texture_cache.GetTileManager(); - tile_manager.TileImage(image.image, buffer_copies, buffer.Handle(), buf_offset, image.info); - cmdbuf = scheduler.CommandBuffer(); - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &post_barrier, - }); + tile_manager.TileImage(image, buffer_copies, buffer.Handle(), buf_offset, copy_size); return true; } diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 486bc51dc..4aae78776 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -12,6 +12,7 @@ set(SHADER_FILES detilers/micro_64bpp.comp detilers/micro_8bpp.comp color_to_ms_depth.frag + ms_image_blit.frag fault_buffer_process.comp fs_tri.vert fsr.comp diff --git a/src/video_core/host_shaders/fs_tri.vert b/src/video_core/host_shaders/fs_tri.vert index 7b82c11a9..d6ea55ad2 100644 --- a/src/video_core/host_shaders/fs_tri.vert +++ b/src/video_core/host_shaders/fs_tri.vert @@ -3,6 +3,10 @@ #version 450 +#if defined(INSTANCE_AS_LAYER) +#extension GL_ARB_shader_viewport_layer_array : require +#endif + layout(location = 0) out vec2 uv; void main() { @@ -11,5 +15,8 @@ void main() { float((gl_VertexIndex & 2u) << 1u) ); gl_Position = vec4(pos - vec2(1.0, 1.0), 0.0, 1.0); +#if defined(INSTANCE_AS_LAYER) + gl_Layer = gl_InstanceIndex; +#endif uv = pos * 0.5; } diff --git a/src/video_core/host_shaders/ms_image_blit.frag b/src/video_core/host_shaders/ms_image_blit.frag new file mode 100644 index 000000000..44811dcf1 --- /dev/null +++ b/src/video_core/host_shaders/ms_image_blit.frag @@ -0,0 +1,23 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 450 core +#extension GL_EXT_samplerless_texture_functions : require + +#if defined(SRC_MSAA) +layout (binding = 0, set = 0) uniform texture2DMS in_tex; +#else +layout (binding = 0, set = 0) uniform texture2D in_tex; +#endif + +layout (location = 0) in vec2 uv; +layout (location = 0) out vec4 out_color; + +void main() +{ +#if defined(SRC_MSAA) + out_color = texelFetch(in_tex, ivec2(gl_FragCoord.xy), gl_SampleID); +#else + out_color = texelFetch(in_tex, ivec2(gl_FragCoord.xy), 0); +#endif +} diff --git a/src/video_core/renderer_vulkan/host_passes/fsr_pass.cpp b/src/video_core/renderer_vulkan/host_passes/fsr_pass.cpp index 1c54207e0..8f58f3499 100644 --- a/src/video_core/renderer_vulkan/host_passes/fsr_pass.cpp +++ b/src/video_core/renderer_vulkan/host_passes/fsr_pass.cpp @@ -1,10 +1,10 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include "fsr_pass.h" - #include "common/assert.h" +#include "common/config.h" #include "video_core/host_shaders/fsr_comp.h" +#include "video_core/renderer_vulkan/host_passes/fsr_pass.h" #include "video_core/renderer_vulkan/vk_platform.h" #include "video_core/renderer_vulkan/vk_shader_util.h" @@ -164,6 +164,12 @@ vk::ImageView FsrPass::Render(vk::CommandBuffer cmdbuf, vk::ImageView input, CreateImages(img); } + if (Config::getVkHostMarkersEnabled()) { + cmdbuf.beginDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{ + .pLabelName = "Host/FSR", + }); + } + static const int thread_group_work_region_dim = 16; int dispatch_x = (width + (thread_group_work_region_dim - 1)) / thread_group_work_region_dim; int dispatch_y = (height + (thread_group_work_region_dim - 1)) / thread_group_work_region_dim; @@ -381,6 +387,10 @@ vk::ImageView FsrPass::Render(vk::CommandBuffer cmdbuf, vk::ImageView input, .pImageMemoryBarriers = return_barrier.data(), }); + if (Config::getVkHostMarkersEnabled()) { + cmdbuf.endDebugUtilsLabelEXT(); + } + return img.output_image_view.get(); } @@ -442,4 +452,4 @@ void FsrPass::CreateImages(Img& img) const { SetObjectName(device, img.output_image_view.get(), "FSR Output ImageView #{}", img.id); } -} // namespace Vulkan::HostPasses \ No newline at end of file +} // namespace Vulkan::HostPasses diff --git a/src/video_core/renderer_vulkan/host_passes/pp_pass.cpp b/src/video_core/renderer_vulkan/host_passes/pp_pass.cpp index 73dd3a7b5..5c1fb4638 100644 --- a/src/video_core/renderer_vulkan/host_passes/pp_pass.cpp +++ b/src/video_core/renderer_vulkan/host_passes/pp_pass.cpp @@ -1,9 +1,10 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include "pp_pass.h" +#include "video_core/renderer_vulkan/host_passes/pp_pass.h" #include "common/assert.h" +#include "common/config.h" #include "video_core/host_shaders/fs_tri_vert.h" #include "video_core/host_shaders/post_process_frag.h" #include "video_core/renderer_vulkan/vk_platform.h" @@ -187,6 +188,17 @@ void PostProcessingPass::Create(vk::Device device, const vk::Format surface_form void PostProcessingPass::Render(vk::CommandBuffer cmdbuf, vk::ImageView input, vk::Extent2D input_size, Frame& frame, Settings settings) { + if (Config::getVkHostMarkersEnabled()) { + cmdbuf.beginDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{ + .pLabelName = "Host/Post processing", + }); + } + + constexpr vk::ImageSubresourceRange simple_subresource = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .levelCount = 1, + .layerCount = 1, + }; const std::array attachments{{ { .imageView = frame.image_view, @@ -250,6 +262,26 @@ void PostProcessingPass::Render(vk::CommandBuffer cmdbuf, vk::ImageView input, cmdbuf.beginRendering(rendering_info); cmdbuf.draw(3, 1, 0, 0); cmdbuf.endRendering(); + + const auto post_barrier = vk::ImageMemoryBarrier2{ + .srcStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput, + .srcAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eFragmentShader, + .dstAccessMask = vk::AccessFlagBits2::eShaderRead, + .oldLayout = vk::ImageLayout::eColorAttachmentOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .image = frame.image, + .subresourceRange = simple_subresource, + }; + + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .imageMemoryBarrierCount = 1, + .pImageMemoryBarriers = &post_barrier, + }); + + if (Config::getVkHostMarkersEnabled()) { + cmdbuf.endDebugUtilsLabelEXT(); + } } -} // namespace Vulkan::HostPasses \ No newline at end of file +} // namespace Vulkan::HostPasses diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 72b7e8d9b..2c910888b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -135,8 +135,8 @@ GraphicsPipeline::GraphicsPipeline( } const vk::PipelineMultisampleStateCreateInfo multisampling = { - .rasterizationSamples = - LiverpoolToVK::NumSamples(key.num_samples, instance.GetFramebufferSampleCounts()), + .rasterizationSamples = LiverpoolToVK::NumSamples( + key.num_samples, instance.GetColorSampleCounts() & instance.GetDepthSampleCounts()), .sampleShadingEnable = fs_info.addr_flags.persp_sample_ena || fs_info.addr_flags.linear_sample_ena, }; @@ -259,7 +259,20 @@ GraphicsPipeline::GraphicsPipeline( color_formats[i] = color_format; } + std::array color_samples; + std::ranges::transform(key.color_samples, color_samples.begin(), [&instance](u8 num_samples) { + return num_samples ? LiverpoolToVK::NumSamples(num_samples, instance.GetColorSampleCounts()) + : vk::SampleCountFlagBits::e1; + }); + const vk::AttachmentSampleCountInfoAMD mixed_samples = { + .colorAttachmentCount = key.num_color_attachments, + .pColorAttachmentSamples = color_samples.data(), + .depthStencilAttachmentSamples = + LiverpoolToVK::NumSamples(key.depth_samples, instance.GetDepthSampleCounts()), + }; + const vk::PipelineRenderingCreateInfo pipeline_rendering_ci = { + .pNext = instance.IsMixedDepthSamplesSupported() ? &mixed_samples : nullptr, .colorAttachmentCount = key.num_color_attachments, .pColorAttachmentFormats = color_formats.data(), .depthAttachmentFormat = key.z_format != Liverpool::DepthBuffer::ZFormat::Invalid diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 9a3199de9..4786c43ca 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -41,7 +41,9 @@ struct GraphicsPipelineKey { std::array write_masks; Liverpool::ColorBufferMask cb_shader_mask; Liverpool::ColorControl::LogicOp logic_op; - u32 num_samples; + u8 num_samples; + u8 depth_samples; + std::array color_samples; u32 mrt_mask; struct { Liverpool::DepthBuffer::ZFormat z_format : 2; @@ -80,12 +82,8 @@ public: return fetch_shader; } - auto GetWriteMasks() const { - return key.write_masks; - } - - u32 GetMrtMask() const { - return key.mrt_mask; + const GraphicsPipelineKey& GetGraphicsKey() const { + return key; } /// Gets the attributes and bindings for vertex inputs. diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 5206edbec..ca7d09c52 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -297,6 +297,8 @@ bool Instance::CreateDevice() { image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME); amd_gcn_shader = add_extension(VK_AMD_GCN_SHADER_EXTENSION_NAME); amd_shader_trinary_minmax = add_extension(VK_AMD_SHADER_TRINARY_MINMAX_EXTENSION_NAME); + nv_framebuffer_mixed_samples = add_extension(VK_NV_FRAMEBUFFER_MIXED_SAMPLES_EXTENSION_NAME); + amd_mixed_attachment_samples = add_extension(VK_AMD_MIXED_ATTACHMENT_SAMPLES_EXTENSION_NAME); shader_atomic_float2 = add_extension(VK_EXT_SHADER_ATOMIC_FLOAT_2_EXTENSION_NAME); if (shader_atomic_float2) { shader_atomic_float2_features = diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index d7d434e54..2a8bd3c82 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -239,6 +239,17 @@ public: workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess; } + /// Returns true if VK_NV_framebuffer_mixed_samples or + /// VK_AMD_mixed_attachment_samples is supported + bool IsMixedDepthSamplesSupported() const { + return nv_framebuffer_mixed_samples || amd_mixed_attachment_samples; + } + + /// Returns true if VK_AMD_mixed_attachment_samples is supported + bool IsMixedAnySamplesSupported() const { + return amd_mixed_attachment_samples; + } + /// Returns true when geometry shaders are supported by the device bool IsGeometryStageSupported() const { return features.geometryShader; @@ -389,10 +400,14 @@ public: return properties.limits.maxFramebufferHeight; } - /// Returns the sample count flags supported by framebuffers. - vk::SampleCountFlags GetFramebufferSampleCounts() const { - return properties.limits.framebufferColorSampleCounts & - properties.limits.framebufferDepthSampleCounts & + /// Returns the sample count flags supported by color buffers. + vk::SampleCountFlags GetColorSampleCounts() const { + return properties.limits.framebufferColorSampleCounts; + } + + /// Returns the sample count flags supported by depth buffer. + vk::SampleCountFlags GetDepthSampleCounts() const { + return properties.limits.framebufferDepthSampleCounts & properties.limits.framebufferStencilSampleCounts; } @@ -481,6 +496,8 @@ private: bool image_load_store_lod{}; bool amd_gcn_shader{}; bool amd_shader_trinary_minmax{}; + bool nv_framebuffer_mixed_samples{}; + bool amd_mixed_attachment_samples{}; bool shader_atomic_float2{}; bool workgroup_memory_explicit_layout{}; bool portability_subset{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 994184cf1..f81f3283c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -325,6 +325,8 @@ bool PipelineCache::RefreshGraphicsKey() { const auto& regs = liverpool->regs; auto& key = graphics_key; + const bool db_enabled = regs.depth_buffer.DepthValid() || regs.depth_buffer.StencilValid(); + key.z_format = regs.depth_buffer.DepthValid() ? regs.depth_buffer.z_info.format.Value() : Liverpool::DepthBuffer::ZFormat::Invalid; key.stencil_format = regs.depth_buffer.StencilValid() @@ -339,17 +341,17 @@ bool PipelineCache::RefreshGraphicsKey() { key.patch_control_points = regs.stage_enable.hs_en ? regs.ls_hs_config.hs_input_control_points.Value() : 0; key.logic_op = regs.color_control.rop3; - key.num_samples = regs.NumSamples(); + key.depth_samples = db_enabled ? regs.depth_buffer.NumSamples() : 1; + key.num_samples = key.depth_samples; key.cb_shader_mask = regs.color_shader_mask; const bool skip_cb_binding = regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; - // First pass to fill render target information + // First pass to fill render target information needed by shader recompiler for (s32 cb = 0; cb < Liverpool::NumColorBuffers && !skip_cb_binding; ++cb) { const auto& col_buf = regs.color_buffers[cb]; - const u32 target_mask = regs.color_target_mask.GetMask(cb); - if (!col_buf || !target_mask) { + if (!col_buf || !regs.color_target_mask.GetMask(cb)) { // No attachment bound or writing to it is disabled. continue; } @@ -362,6 +364,26 @@ bool PipelineCache::RefreshGraphicsKey() { .export_format = regs.color_export_format.GetFormat(cb), .swizzle = col_buf.Swizzle(), }; + } + + // Compile and bind shader stages + if (!RefreshGraphicsStages()) { + return false; + } + + // Second pass to mask out render targets not written by shader and fill remaining info + u8 color_samples = 0; + bool all_color_samples_same = true; + for (s32 cb = 0; cb < key.num_color_attachments && !skip_cb_binding; ++cb) { + const auto& col_buf = regs.color_buffers[cb]; + const u32 target_mask = regs.color_target_mask.GetMask(cb); + if (!col_buf || !target_mask) { + continue; + } + if ((key.mrt_mask & (1u << cb)) == 0) { + key.color_buffers[cb] = {}; + continue; + } // Fill color blending information if (regs.blend_control[cb].enable && !col_buf.info.blend_bypass) { @@ -371,22 +393,21 @@ bool PipelineCache::RefreshGraphicsKey() { // Apply swizzle to target mask key.write_masks[cb] = vk::ColorComponentFlags{key.color_buffers[cb].swizzle.ApplyMask(target_mask)}; + + // Fill color samples + const u8 prev_color_samples = std::exchange(color_samples, col_buf.NumSamples()); + all_color_samples_same &= color_samples == prev_color_samples || prev_color_samples == 0; + key.color_samples[cb] = color_samples; + key.num_samples = std::max(key.num_samples, color_samples); } - // Compile and bind shader stages - if (!RefreshGraphicsStages()) { - return false; - } - - // Second pass to mask out render targets not written by fragment shader - for (s32 cb = 0; cb < key.num_color_attachments && !skip_cb_binding; ++cb) { - const auto& col_buf = regs.color_buffers[cb]; - if (!col_buf || !regs.color_target_mask.GetMask(cb)) { - continue; - } - if ((key.mrt_mask & (1u << cb)) == 0) { - // Attachment is bound and mask allows writes but shader does not output to it. - key.color_buffers[cb] = {}; + // Force all color samples to match depth samples to avoid unsupported MSAA configuration + if (color_samples != 0) { + const bool depth_mismatch = db_enabled && color_samples != key.depth_samples; + if (!all_color_samples_same && !instance.IsMixedAnySamplesSupported() || + all_color_samples_same && depth_mismatch && !instance.IsMixedDepthSamplesSupported()) { + key.color_samples.fill(key.depth_samples); + key.num_samples = key.depth_samples; } } diff --git a/src/video_core/renderer_vulkan/vk_presenter.cpp b/src/video_core/renderer_vulkan/vk_presenter.cpp index 829273aa5..3605a3542 100644 --- a/src/video_core/renderer_vulkan/vk_presenter.cpp +++ b/src/video_core/renderer_vulkan/vk_presenter.cpp @@ -3,26 +3,21 @@ #include "common/config.h" #include "common/debug.h" +#include "common/elf_info.h" #include "common/singleton.h" #include "core/debug_state.h" #include "core/devtools/layer.h" #include "core/libraries/system/systemservice.h" #include "imgui/renderer/imgui_core.h" +#include "imgui/renderer/imgui_impl_vulkan.h" #include "sdl_window.h" #include "video_core/renderer_vulkan/vk_platform.h" #include "video_core/renderer_vulkan/vk_presenter.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" -#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/texture_cache/image.h" -#include "video_core/host_shaders/fs_tri_vert.h" - -#include - #include - -#include "common/elf_info.h" -#include "imgui/renderer/imgui_impl_vulkan.h" +#include namespace Vulkan { @@ -291,26 +286,14 @@ static vk::Format GetFrameViewFormat(const Libraries::VideoOut::PixelFormat form return {}; } -Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id, - const Libraries::VideoOut::PixelFormat format, bool is_eop) { - // Request a free presentation frame. +Frame* Presenter::PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, + VAddr cpu_address) { + auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address}; + const auto image_id = texture_cache.FindImage(desc); + texture_cache.UpdateImage(image_id); + Frame* frame = GetRenderFrame(); - // EOP flips are triggered from GPU thread so use the drawing scheduler to record - // commands. Otherwise we are dealing with a CPU flip which could have arrived - // from any guest thread. Use a separate scheduler for that. - auto& scheduler = is_eop ? draw_scheduler : flip_scheduler; - scheduler.EndRendering(); - const auto cmdbuf = scheduler.CommandBuffer(); - - bool vk_host_markers_enabled = Config::getVkHostMarkersEnabled(); - if (vk_host_markers_enabled) { - const auto label = fmt::format("PrepareFrameInternal:{}", image_id.index); - cmdbuf.beginDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{ - .pLabelName = label.c_str(), - }); - } - const auto frame_subresources = vk::ImageSubresourceRange{ .aspectMask = vk::ImageAspectFlagBits::eColor, .baseMipLevel = 0, @@ -319,111 +302,116 @@ Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id, .layerCount = VK_REMAINING_ARRAY_LAYERS, }; - const auto pre_barrier = - vk::ImageMemoryBarrier2{.srcStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput, - .srcAccessMask = vk::AccessFlagBits2::eColorAttachmentRead, - .dstStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput, - .dstAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite, - .oldLayout = vk::ImageLayout::eUndefined, - .newLayout = vk::ImageLayout::eColorAttachmentOptimal, - .image = frame->image, - .subresourceRange{frame_subresources}}; + const auto pre_barrier = vk::ImageMemoryBarrier2{ + .srcStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput, + .srcAccessMask = vk::AccessFlagBits2::eColorAttachmentRead, + .dstStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput, + .dstAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite, + .oldLayout = vk::ImageLayout::eUndefined, + .newLayout = vk::ImageLayout::eColorAttachmentOptimal, + .image = frame->image, + .subresourceRange{frame_subresources}, + }; + + draw_scheduler.EndRendering(); + const auto cmdbuf = draw_scheduler.CommandBuffer(); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .imageMemoryBarrierCount = 1, + .pImageMemoryBarriers = &pre_barrier, + }); + + VideoCore::ImageViewInfo view_info{}; + view_info.format = GetFrameViewFormat(attribute.attrib.pixel_format); + // Exclude alpha from output frame to avoid blending with UI. + view_info.mapping.a = vk::ComponentSwizzle::eOne; + + auto& image = texture_cache.GetImage(image_id); + auto image_view = *image.FindView(view_info).image_view; + image.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits2::eShaderRead, {}); + + const vk::Extent2D image_size = {image.info.size.width, image.info.size.height}; + expected_ratio = static_cast(image_size.width) / static_cast(image_size.height); + + image_view = fsr_pass.Render(cmdbuf, image_view, image_size, {frame->width, frame->height}, + fsr_settings, frame->is_hdr); + pp_pass.Render(cmdbuf, image_view, image_size, *frame, pp_settings); + + DebugState.game_resolution = {image_size.width, image_size.height}; + DebugState.output_resolution = {frame->width, frame->height}; + + // Flush frame creation commands. + frame->ready_semaphore = draw_scheduler.GetMasterSemaphore()->Handle(); + frame->ready_tick = draw_scheduler.CurrentTick(); + SubmitInfo info{}; + draw_scheduler.Flush(info); + return frame; +} + +Frame* Presenter::PrepareBlankFrame(bool present_thread) { + // Request a free presentation frame. + Frame* frame = GetRenderFrame(); + + auto& scheduler = present_thread ? present_scheduler : draw_scheduler; + scheduler.EndRendering(); + + const auto cmdbuf = scheduler.CommandBuffer(); + + constexpr vk::ImageSubresourceRange simple_subresource = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .levelCount = 1, + .layerCount = 1, + }; + const auto pre_barrier = vk::ImageMemoryBarrier2{ + .srcStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput, + .srcAccessMask = vk::AccessFlagBits2::eColorAttachmentRead, + .dstStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput, + .dstAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite, + .oldLayout = vk::ImageLayout::eUndefined, + .newLayout = vk::ImageLayout::eColorAttachmentOptimal, + .image = frame->image, + .subresourceRange = simple_subresource, + }; + + const auto post_barrier = vk::ImageMemoryBarrier2{ + .srcStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput, + .srcAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eFragmentShader, + .dstAccessMask = vk::AccessFlagBits2::eShaderRead, + .oldLayout = vk::ImageLayout::eColorAttachmentOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .image = frame->image, + .subresourceRange = simple_subresource, + }; + + const vk::RenderingAttachmentInfo attachment = { + .imageView = frame->image_view, + .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, + .loadOp = vk::AttachmentLoadOp::eClear, + .storeOp = vk::AttachmentStoreOp::eStore, + }; + const vk::RenderingInfo rendering_info = { + .renderArea = + { + .extent = {frame->width, frame->height}, + }, + .layerCount = 1, + .colorAttachmentCount = 1u, + .pColorAttachments = &attachment, + }; cmdbuf.pipelineBarrier2(vk::DependencyInfo{ .imageMemoryBarrierCount = 1, .pImageMemoryBarriers = &pre_barrier, }); - if (image_id != VideoCore::NULL_IMAGE_ID) { - auto& image = texture_cache.GetImage(image_id); - vk::Extent2D image_size = {image.info.size.width, image.info.size.height}; - float ratio = (float)image_size.width / (float)image_size.height; - if (ratio != expected_ratio) { - expected_ratio = ratio; - } - - image.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits2::eShaderRead, {}, - cmdbuf); - - VideoCore::ImageViewInfo info{}; - info.format = GetFrameViewFormat(format); - // Exclude alpha from output frame to avoid blending with UI. - info.mapping = vk::ComponentMapping{ - .r = vk::ComponentSwizzle::eIdentity, - .g = vk::ComponentSwizzle::eIdentity, - .b = vk::ComponentSwizzle::eIdentity, - .a = vk::ComponentSwizzle::eOne, - }; - vk::ImageView imageView; - if (auto view = image.FindView(info)) { - imageView = *texture_cache.GetImageView(view).image_view; - } else { - imageView = *texture_cache.RegisterImageView(image_id, info).image_view; - } - - if (vk_host_markers_enabled) { - cmdbuf.beginDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{ - .pLabelName = "Host/FSR", - }); - } - - imageView = fsr_pass.Render(cmdbuf, imageView, image_size, {frame->width, frame->height}, - fsr_settings, frame->is_hdr); - - if (vk_host_markers_enabled) { - cmdbuf.endDebugUtilsLabelEXT(); - cmdbuf.beginDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{ - .pLabelName = "Host/Post processing", - }); - } - pp_pass.Render(cmdbuf, imageView, image_size, *frame, pp_settings); - if (vk_host_markers_enabled) { - cmdbuf.endDebugUtilsLabelEXT(); - } - - DebugState.game_resolution = {image_size.width, image_size.height}; - DebugState.output_resolution = {frame->width, frame->height}; - } else { - // Fix display of garbage images on startup on some drivers - const std::array attachments = {{ - { - .imageView = frame->image_view, - .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, - .loadOp = vk::AttachmentLoadOp::eClear, - .storeOp = vk::AttachmentStoreOp::eStore, - }, - }}; - const vk::RenderingInfo rendering_info{ - .renderArea{ - .extent{frame->width, frame->height}, - }, - .layerCount = 1, - .colorAttachmentCount = attachments.size(), - .pColorAttachments = attachments.data(), - }; - cmdbuf.beginRendering(rendering_info); - cmdbuf.endRendering(); - } - - const auto post_barrier = - vk::ImageMemoryBarrier2{.srcStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput, - .srcAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput, - .dstAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite, - .oldLayout = vk::ImageLayout::eColorAttachmentOptimal, - .newLayout = vk::ImageLayout::eGeneral, - .image = frame->image, - .subresourceRange{frame_subresources}}; + cmdbuf.beginRendering(rendering_info); + cmdbuf.endRendering(); cmdbuf.pipelineBarrier2(vk::DependencyInfo{ .imageMemoryBarrierCount = 1, .pImageMemoryBarriers = &post_barrier, }); - if (vk_host_markers_enabled) { - cmdbuf.endDebugUtilsLabelEXT(); - } - // Flush frame creation commands. frame->ready_semaphore = scheduler.GetMasterSemaphore()->Handle(); frame->ready_tick = scheduler.CurrentTick(); diff --git a/src/video_core/renderer_vulkan/vk_presenter.h b/src/video_core/renderer_vulkan/vk_presenter.h index ea933b21c..b0913333d 100644 --- a/src/video_core/renderer_vulkan/vk_presenter.h +++ b/src/video_core/renderer_vulkan/vk_presenter.h @@ -66,44 +66,6 @@ public: return window; } - Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, - VAddr cpu_address, bool is_eop) { - auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address}; - const auto image_id = texture_cache.FindImage(desc); - texture_cache.UpdateImage(image_id, is_eop ? nullptr : &flip_scheduler); - return PrepareFrameInternal(image_id, attribute.attrib.pixel_format, is_eop); - } - - Frame* PrepareBlankFrame(bool is_eop) { - return PrepareFrameInternal(VideoCore::NULL_IMAGE_ID, - Libraries::VideoOut::PixelFormat::Unknown, is_eop); - } - - VideoCore::Image& RegisterVideoOutSurface( - const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) { - vo_buffers_addr.emplace_back(cpu_address); - auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address}; - const auto image_id = texture_cache.FindImage(desc); - auto& image = texture_cache.GetImage(image_id); - image.usage.vo_surface = 1u; - return image; - } - - bool IsVideoOutSurface(const AmdGpu::Liverpool::ColorBuffer& color_buffer) { - return std::ranges::find_if(vo_buffers_addr, [&](VAddr vo_buffer) { - return vo_buffer == color_buffer.Address(); - }) != vo_buffers_addr.cend(); - } - - void Present(Frame* frame, bool is_reusing_frame = false); - void RecreateFrame(Frame* frame, u32 width, u32 height); - Frame* PrepareLastFrame(); - - void FlushDraw() { - SubmitInfo info{}; - draw_scheduler.Flush(info); - } - Rasterizer& GetRasterizer() const { return *rasterizer.get(); } @@ -120,11 +82,33 @@ public: pp_settings.hdr = enable ? 1 : 0; } + bool IsVideoOutSurface(const AmdGpu::Liverpool::ColorBuffer& color_buffer) const { + return std::ranges::find(vo_buffers_addr, color_buffer.Address()) != vo_buffers_addr.cend(); + } + + VideoCore::Image& RegisterVideoOutSurface( + const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) { + vo_buffers_addr.emplace_back(cpu_address); + auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address}; + const auto image_id = texture_cache.FindImage(desc); + auto& image = texture_cache.GetImage(image_id); + image.usage.vo_surface = 1u; + return image; + } + + Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, + VAddr cpu_address); + + Frame* PrepareBlankFrame(bool present_thread); + + void Present(Frame* frame, bool is_reusing_frame = false); + Frame* PrepareLastFrame(); + private: - Frame* PrepareFrameInternal(VideoCore::ImageId image_id, - Libraries::VideoOut::PixelFormat format, bool is_eop = true); Frame* GetRenderFrame(); + void RecreateFrame(Frame* frame, u32 width, u32 height); + void SetExpectedGameSize(s32 width, s32 height); private: diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 771a16f2e..33bec3083 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -107,128 +107,44 @@ bool Rasterizer::FilterDraw() { return true; } -RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { - // Prefetch color and depth buffers to let texture cache handle possible overlaps with bound - // textures (e.g. mipgen) - RenderState state; - state.width = instance.GetMaxFramebufferWidth(); - state.height = instance.GetMaxFramebufferHeight(); - state.num_layers = std::numeric_limits::max(); - state.num_color_attachments = std::bit_width(mrt_mask); - - cb_descs.clear(); - db_desc.reset(); - +void Rasterizer::PrepareRenderState(const GraphicsPipeline* pipeline) { + // Prefetch render targets to handle overlaps with bound textures (e.g. mipgen) + const auto& key = pipeline->GetGraphicsKey(); const auto& regs = liverpool->regs; - if (regs.color_control.degamma_enable) { LOG_WARNING(Render_Vulkan, "Color buffers require gamma correction"); } const bool skip_cb_binding = regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; - - for (s32 cb = 0; cb < state.num_color_attachments && !skip_cb_binding; ++cb) { + for (s32 cb = 0; cb < std::bit_width(key.mrt_mask); ++cb) { + auto& [image_id, desc] = cb_descs[cb]; const auto& col_buf = regs.color_buffers[cb]; - if (!col_buf) { - state.color_attachments[cb].imageView = VK_NULL_HANDLE; + const u32 target_mask = regs.color_target_mask.GetMask(cb); + if (skip_cb_binding || !col_buf || !target_mask || (key.mrt_mask & (1 << cb)) == 0) { + image_id = {}; continue; } - - // Skip stale color buffers if shader doesn't output to them. Otherwise it will perform - // an unnecessary transition and may result in state conflict if the resource is already - // bound for reading. - if ((mrt_mask & (1 << cb)) == 0) { - state.color_attachments[cb].imageView = VK_NULL_HANDLE; - continue; - } - - // If the color buffer is still bound but rendering to it is disabled by the target - // mask, we need to prevent the render area from being affected by unbound render target - // extents. - if (!regs.color_target_mask.GetMask(cb)) { - state.color_attachments[cb].imageView = VK_NULL_HANDLE; - continue; - } - const auto& hint = liverpool->last_cb_extent[cb]; - auto& [image_id, desc] = cb_descs.emplace_back(std::piecewise_construct, std::tuple{}, - std::tuple{col_buf, hint}); - const auto& image_view = texture_cache.FindRenderTarget(desc); - image_id = bound_images.emplace_back(image_view.image_id); + std::construct_at(&desc, col_buf, hint); + image_id = bound_images.emplace_back(texture_cache.FindImage(desc)); auto& image = texture_cache.GetImage(image_id); image.binding.is_target = 1u; - - const auto slice = image_view.info.range.base.layer; - const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress(), slice); - texture_cache.TouchMeta(col_buf.CmaskAddress(), slice, false); - - const auto mip = image_view.info.range.base.level; - state.width = std::min(state.width, std::max(image.info.size.width >> mip, 1u)); - state.height = std::min(state.height, std::max(image.info.size.height >> mip, 1u)); - state.num_layers = std::min(state.num_layers, image_view.info.range.extent.layers); - state.color_attachments[cb] = { - .imageView = *image_view.image_view, - .imageLayout = vk::ImageLayout::eUndefined, - .loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, - .storeOp = vk::AttachmentStoreOp::eStore, - .clearValue = - is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{}, - }; } if ((regs.depth_control.depth_enable && regs.depth_buffer.DepthValid()) || (regs.depth_control.stencil_enable && regs.depth_buffer.StencilValid())) { const auto htile_address = regs.depth_htile_data_base.GetAddress(); const auto& hint = liverpool->last_db_extent; - auto& [image_id, desc] = - db_desc.emplace(std::piecewise_construct, std::tuple{}, - std::tuple{regs.depth_buffer, regs.depth_view, regs.depth_control, - htile_address, hint}); - const auto& image_view = texture_cache.FindDepthTarget(desc); - image_id = bound_images.emplace_back(image_view.image_id); + auto& [image_id, desc] = db_desc; + std::construct_at(&desc, regs.depth_buffer, regs.depth_view, regs.depth_control, + htile_address, hint); + image_id = bound_images.emplace_back(texture_cache.FindImage(desc)); auto& image = texture_cache.GetImage(image_id); image.binding.is_target = 1u; - - const auto slice = image_view.info.range.base.layer; - const bool is_depth_clear = regs.depth_render_control.depth_clear_enable || - texture_cache.IsMetaCleared(htile_address, slice); - const bool is_stencil_clear = regs.depth_render_control.stencil_clear_enable; - ASSERT(desc.view_info.range.extent.levels == 1); - - state.width = std::min(state.width, image.info.size.width); - state.height = std::min(state.height, image.info.size.height); - state.has_depth = regs.depth_buffer.DepthValid(); - state.has_stencil = regs.depth_buffer.StencilValid(); - state.num_layers = std::min(state.num_layers, image_view.info.range.extent.layers); - if (state.has_depth) { - state.depth_attachment = { - .imageView = *image_view.image_view, - .imageLayout = vk::ImageLayout::eUndefined, - .loadOp = - is_depth_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, - .storeOp = vk::AttachmentStoreOp::eStore, - .clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear}}, - }; - } - if (state.has_stencil) { - state.stencil_attachment = { - .imageView = *image_view.image_view, - .imageLayout = vk::ImageLayout::eUndefined, - .loadOp = - is_stencil_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, - .storeOp = vk::AttachmentStoreOp::eStore, - .clearValue = vk::ClearValue{.depthStencil = {.stencil = regs.stencil_clear}}, - }; - } - texture_cache.TouchMeta(htile_address, slice, false); + } else { + db_desc.first = {}; } - - if (state.num_layers == std::numeric_limits::max()) { - state.num_layers = 1; - } - - return state; } [[nodiscard]] std::pair GetDrawOffsets( @@ -253,28 +169,20 @@ void Rasterizer::EliminateFastClear() { return; } VideoCore::TextureCache::RenderTargetDesc desc(col_buf, liverpool->last_cb_extent[0]); - const auto& image_view = texture_cache.FindRenderTarget(desc); + const auto image_id = texture_cache.FindImage(desc); + const auto& image_view = texture_cache.FindRenderTarget(image_id, desc); if (!texture_cache.IsMetaCleared(col_buf.CmaskAddress(), col_buf.view.slice_start)) { return; } for (u32 slice = col_buf.view.slice_start; slice <= col_buf.view.slice_max; ++slice) { texture_cache.TouchMeta(col_buf.CmaskAddress(), slice, false); } - auto& image = texture_cache.GetImage(image_view.image_id); - const vk::ImageSubresourceRange range = { - .aspectMask = vk::ImageAspectFlagBits::eColor, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = col_buf.view.slice_start, - .layerCount = col_buf.view.slice_max - col_buf.view.slice_start + 1, - }; - scheduler.EndRendering(); + auto& image = texture_cache.GetImage(image_id); + const auto clear_value = LiverpoolToVK::ColorBufferClearValue(col_buf); + ScopeMarkerBegin(fmt::format("EliminateFastClear:MRT={:#x}:M={:#x}", col_buf.Address(), col_buf.CmaskAddress())); - image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}); - scheduler.CommandBuffer().clearColorImage(image.image, image.last_state.layout, - LiverpoolToVK::ColorBufferClearValue(col_buf).color, - range); + image.Clear(clear_value, desc.view_info.range); ScopeMarkerEnd(); } @@ -293,18 +201,20 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { return; } - auto state = PrepareRenderState(pipeline->GetMrtMask()); + PrepareRenderState(pipeline); if (!BindResources(pipeline)) { return; } + const auto state = BeginRendering(pipeline); buffer_cache.BindVertexBuffers(*pipeline); if (is_indexed) { buffer_cache.BindIndexBuffer(index_offset); } - BeginRendering(*pipeline, state); - UpdateDynamicState(*pipeline, is_indexed); + pipeline->BindResources(set_writes, buffer_barriers, push_data); + UpdateDynamicState(pipeline, is_indexed); + scheduler.BeginRendering(state); const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex); const auto& fetch_shader = pipeline->GetFetchShader(); @@ -339,10 +249,11 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 return; } - auto state = PrepareRenderState(pipeline->GetMrtMask()); + PrepareRenderState(pipeline); if (!BindResources(pipeline)) { return; } + const auto state = BeginRendering(pipeline); buffer_cache.BindVertexBuffers(*pipeline); if (is_indexed) { @@ -358,8 +269,9 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 std::tie(count_buffer, count_base) = buffer_cache.ObtainBuffer(count_address, 4, false); } - BeginRendering(*pipeline, state); - UpdateDynamicState(*pipeline, is_indexed); + pipeline->BindResources(set_writes, buffer_barriers, push_data); + UpdateDynamicState(pipeline, is_indexed); + scheduler.BeginRendering(state); // We can safely ignore both SGPR UD indices and results of fetch shader parsing, as vertex and // instance offsets will be automatically applied by Vulkan from indirect args buffer. @@ -411,6 +323,7 @@ void Rasterizer::DispatchDirect() { } scheduler.EndRendering(); + pipeline->BindResources(set_writes, buffer_barriers, push_data); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle()); @@ -434,10 +347,11 @@ void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) { return; } - scheduler.EndRendering(); - const auto [buffer, base] = buffer_cache.ObtainBuffer(address + offset, size, false); + scheduler.EndRendering(); + pipeline->BindResources(set_writes, buffer_barriers, push_data); + const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle()); cmdbuf.dispatchIndirect(buffer->Handle(), base); @@ -480,7 +394,7 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { // Bind resource buffers and textures. Shader::Backend::Bindings binding{}; - Shader::PushData push_data = MakeUserData(liverpool->regs); + push_data = MakeUserData(liverpool->regs); for (const auto* stage : pipeline->GetStages()) { if (!stage) { continue; @@ -505,8 +419,6 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { fault_process_pending |= uses_dma; - pipeline->BindResources(set_writes, buffer_barriers, push_data); - return true; } @@ -740,7 +652,7 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin } else { if (auto& old_image = texture_cache.GetImage(image_id); old_image.binding.needs_rebind) { - old_image.binding.Reset(); // clean up previous image binding state + old_image.binding = {}; image_id = texture_cache.FindImage(desc); } @@ -781,7 +693,7 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin image.usage.texture |= !is_storage; image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, - image.last_state.layout); + image.backing->state.layout); } set_writes.push_back({ @@ -816,55 +728,78 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin } } -void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline, RenderState& state) { - int cb_index = 0; +RenderState Rasterizer::BeginRendering(const GraphicsPipeline* pipeline) { attachment_feedback_loop = false; - for (auto attach_idx = 0u; attach_idx < state.num_color_attachments; ++attach_idx) { - if (state.color_attachments[attach_idx].imageView == VK_NULL_HANDLE) { + const auto& regs = liverpool->regs; + const auto& key = pipeline->GetGraphicsKey(); + RenderState state; + state.width = instance.GetMaxFramebufferWidth(); + state.height = instance.GetMaxFramebufferHeight(); + state.num_layers = std::numeric_limits::max(); + state.num_color_attachments = std::bit_width(key.mrt_mask); + for (auto cb = 0u; cb < state.num_color_attachments; ++cb) { + auto& [image_id, desc] = cb_descs[cb]; + if (!image_id) { continue; } - - auto& [image_id, desc] = cb_descs[cb_index++]; - if (auto& old_img = texture_cache.GetImage(image_id); old_img.binding.needs_rebind) { - auto& view = texture_cache.FindRenderTarget(desc); - ASSERT(view.image_id != image_id); - image_id = bound_images.emplace_back(view.image_id); - auto& image = texture_cache.GetImage(view.image_id); - state.color_attachments[attach_idx].imageView = *view.image_view; - state.color_attachments[attach_idx].imageLayout = image.last_state.layout; - - const auto mip = view.info.range.base.level; - state.width = std::min(state.width, std::max(image.info.size.width >> mip, 1u)); - state.height = std::min(state.height, std::max(image.info.size.height >> mip, 1u)); + auto* image = &texture_cache.GetImage(image_id); + if (image->binding.needs_rebind) { + image_id = bound_images.emplace_back(texture_cache.FindImage(desc)); + image = &texture_cache.GetImage(image_id); } - auto& image = texture_cache.GetImage(image_id); - if (image.binding.is_bound) { - ASSERT_MSG(!image.binding.force_general, + texture_cache.UpdateImage(image_id); + image->SetBackingSamples(key.color_samples[cb]); + const auto& image_view = texture_cache.FindRenderTarget(image_id, desc); + const auto slice = image_view.info.range.base.layer; + const auto mip = image_view.info.range.base.level; + + const auto& col_buf = regs.color_buffers[cb]; + const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress(), slice); + texture_cache.TouchMeta(col_buf.CmaskAddress(), slice, false); + + if (image->binding.is_bound) { + ASSERT_MSG(!image->binding.force_general, "Having image both as storage and render target is unsupported"); - image.Transit(instance.IsAttachmentFeedbackLoopLayoutSupported() - ? vk::ImageLayout::eAttachmentFeedbackLoopOptimalEXT - : vk::ImageLayout::eGeneral, - vk::AccessFlagBits2::eColorAttachmentWrite, {}); + image->Transit(instance.IsAttachmentFeedbackLoopLayoutSupported() + ? vk::ImageLayout::eAttachmentFeedbackLoopOptimalEXT + : vk::ImageLayout::eGeneral, + vk::AccessFlagBits2::eColorAttachmentWrite, {}); attachment_feedback_loop = true; } else { - image.Transit(vk::ImageLayout::eColorAttachmentOptimal, - vk::AccessFlagBits2::eColorAttachmentWrite | - vk::AccessFlagBits2::eColorAttachmentRead, - desc.view_info.range); + image->Transit(vk::ImageLayout::eColorAttachmentOptimal, + vk::AccessFlagBits2::eColorAttachmentWrite | + vk::AccessFlagBits2::eColorAttachmentRead, + desc.view_info.range); } - image.usage.render_target = 1u; - state.color_attachments[attach_idx].imageLayout = image.last_state.layout; + + state.width = std::min(state.width, std::max(image->info.size.width >> mip, 1u)); + state.height = std::min(state.height, std::max(image->info.size.height >> mip, 1u)); + state.num_layers = std::min(state.num_layers, image_view.info.range.extent.layers); + state.color_attachments[cb] = { + .imageView = *image_view.image_view, + .imageLayout = image->backing->state.layout, + .loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, + .storeOp = vk::AttachmentStoreOp::eStore, + .clearValue = + is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{}, + }; + image->usage.render_target = 1u; } - if (db_desc) { - const auto& image_id = std::get<0>(*db_desc); - const auto& desc = std::get<1>(*db_desc); + if (auto image_id = db_desc.first; image_id) { + auto& desc = db_desc.second; + const auto htile_address = regs.depth_htile_data_base.GetAddress(); + const auto& image_view = texture_cache.FindDepthTarget(image_id, desc); auto& image = texture_cache.GetImage(image_id); - ASSERT(image.binding.needs_rebind == 0); - const bool has_stencil = image.usage.stencil; - if (has_stencil) { - image.aspect_mask |= vk::ImageAspectFlagBits::eStencil; - } + + const auto slice = image_view.info.range.base.layer; + const bool is_depth_clear = regs.depth_render_control.depth_clear_enable || + texture_cache.IsMetaCleared(htile_address, slice); + const bool is_stencil_clear = regs.depth_render_control.stencil_clear_enable; + texture_cache.TouchMeta(htile_address, slice, false); + ASSERT(desc.view_info.range.extent.levels == 1 && !image.binding.needs_rebind); + + const bool has_stencil = image.info.props.has_stencil; const auto new_layout = desc.view_info.is_storage ? has_stencil ? vk::ImageLayout::eDepthStencilAttachmentOptimal : vk::ImageLayout::eDepthAttachmentOptimal @@ -874,13 +809,41 @@ void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline, RenderState& s vk::AccessFlagBits2::eDepthStencilAttachmentWrite | vk::AccessFlagBits2::eDepthStencilAttachmentRead, desc.view_info.range); - state.depth_attachment.imageLayout = image.last_state.layout; - state.stencil_attachment.imageLayout = image.last_state.layout; + + state.width = std::min(state.width, image.info.size.width); + state.height = std::min(state.height, image.info.size.height); + state.has_depth = regs.depth_buffer.DepthValid(); + state.has_stencil = regs.depth_buffer.StencilValid(); + state.num_layers = std::min(state.num_layers, image_view.info.range.extent.layers); + if (state.has_depth) { + state.depth_attachment = { + .imageView = *image_view.image_view, + .imageLayout = image.backing->state.layout, + .loadOp = + is_depth_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, + .storeOp = vk::AttachmentStoreOp::eStore, + .clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear}}, + }; + } + if (state.has_stencil) { + state.stencil_attachment = { + .imageView = *image_view.image_view, + .imageLayout = image.backing->state.layout, + .loadOp = + is_stencil_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, + .storeOp = vk::AttachmentStoreOp::eStore, + .clearValue = vk::ClearValue{.depthStencil = {.stencil = regs.stencil_clear}}, + }; + } + image.usage.depth_target = true; - image.usage.stencil = has_stencil; } - scheduler.BeginRendering(state); + if (state.num_layers == std::numeric_limits::max()) { + state.num_layers = 1; + } + + return state; } void Rasterizer::Resolve() { @@ -904,66 +867,7 @@ void Rasterizer::Resolve() { ScopeMarkerBegin(fmt::format("Resolve:MRT0={:#x}:MRT1={:#x}", liverpool->regs.color_buffers[0].Address(), liverpool->regs.color_buffers[1].Address())); - - mrt0_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, - mrt0_range); - mrt1_image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, - mrt1_range); - - if (mrt0_image.info.num_samples == 1) { - // Vulkan does not allow resolve from a single sample image, so change it to a copy. - // Note that resolving a single-sampled image doesn't really make sense, but a game might do - // it. - vk::ImageCopy region = { - .srcSubresource = - { - .aspectMask = vk::ImageAspectFlagBits::eColor, - .mipLevel = 0, - .baseArrayLayer = mrt0_range.base.layer, - .layerCount = mrt0_range.extent.layers, - }, - .srcOffset = {0, 0, 0}, - .dstSubresource = - { - .aspectMask = vk::ImageAspectFlagBits::eColor, - .mipLevel = 0, - .baseArrayLayer = mrt1_range.base.layer, - .layerCount = mrt1_range.extent.layers, - }, - .dstOffset = {0, 0, 0}, - .extent = {mrt1_image.info.size.width, mrt1_image.info.size.height, 1}, - }; - scheduler.CommandBuffer().copyImage(mrt0_image.image, vk::ImageLayout::eTransferSrcOptimal, - mrt1_image.image, vk::ImageLayout::eTransferDstOptimal, - region); - } else { - vk::ImageResolve region = { - .srcSubresource = - { - .aspectMask = vk::ImageAspectFlagBits::eColor, - .mipLevel = 0, - .baseArrayLayer = mrt0_range.base.layer, - .layerCount = mrt0_range.extent.layers, - }, - .srcOffset = {0, 0, 0}, - .dstSubresource = - { - .aspectMask = vk::ImageAspectFlagBits::eColor, - .mipLevel = 0, - .baseArrayLayer = mrt1_range.base.layer, - .layerCount = mrt1_range.extent.layers, - }, - .dstOffset = {0, 0, 0}, - .extent = {mrt1_image.info.size.width, mrt1_image.info.size.height, 1}, - }; - scheduler.CommandBuffer().resolveImage( - mrt0_image.image, vk::ImageLayout::eTransferSrcOptimal, mrt1_image.image, - vk::ImageLayout::eTransferDstOptimal, region); - } - - mrt1_image.flags |= VideoCore::ImageFlagBits::GpuModified; - mrt1_image.flags &= ~VideoCore::ImageFlagBits::Dirty; - + mrt1_image.Resolve(mrt0_image, mrt0_range, mrt1_range); ScopeMarkerEnd(); } @@ -1020,9 +924,9 @@ void Rasterizer::DepthStencilCopy(bool is_depth, bool is_stencil) { .dstOffset = {0, 0, 0}, .extent = {write_image.info.size.width, write_image.info.size.height, 1}, }; - scheduler.CommandBuffer().copyImage(read_image.image, vk::ImageLayout::eTransferSrcOptimal, - write_image.image, vk::ImageLayout::eTransferDstOptimal, - region); + scheduler.CommandBuffer().copyImage(read_image.GetImage(), vk::ImageLayout::eTransferSrcOptimal, + write_image.GetImage(), + vk::ImageLayout::eTransferDstOptimal, region); ScopeMarkerEnd(); } @@ -1090,18 +994,14 @@ void Rasterizer::UnmapMemory(VAddr addr, u64 size) { } } -void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline, const bool is_indexed) const { +void Rasterizer::UpdateDynamicState(const GraphicsPipeline* pipeline, const bool is_indexed) const { UpdateViewportScissorState(); UpdateDepthStencilState(); UpdatePrimitiveState(is_indexed); UpdateRasterizationState(); + UpdateColorBlendingState(pipeline); auto& dynamic_state = scheduler.GetDynamicState(); - dynamic_state.SetBlendConstants(liverpool->regs.blend_constants); - dynamic_state.SetColorWriteMasks(pipeline.GetWriteMasks()); - dynamic_state.SetAttachmentFeedbackLoopEnabled(attachment_feedback_loop); - - // Commit new dynamic state to the command buffer. dynamic_state.Commit(instance, scheduler.CommandBuffer()); } @@ -1320,6 +1220,14 @@ void Rasterizer::UpdateRasterizationState() const { dynamic_state.SetLineWidth(regs.line_control.Width()); } +void Rasterizer::UpdateColorBlendingState(const GraphicsPipeline* pipeline) const { + const auto& regs = liverpool->regs; + auto& dynamic_state = scheduler.GetDynamicState(); + dynamic_state.SetBlendConstants(regs.blend_constants); + dynamic_state.SetColorWriteMasks(pipeline->GetGraphicsKey().write_masks); + dynamic_state.SetAttachmentFeedbackLoopEnabled(attachment_feedback_loop); +} + void Rasterizer::ScopeMarkerBegin(const std::string_view& str, bool from_guest) { if ((from_guest && !Config::getVkGuestMarkersEnabled()) || (!from_guest && !Config::getVkHostMarkersEnabled())) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index b32cfa424..9ba8bfdd3 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -3,7 +3,6 @@ #pragma once -#include #include "common/recursive_lock.h" #include "common/shared_first_mutex.h" #include "video_core/buffer_cache/buffer_cache.h" @@ -84,29 +83,29 @@ public: } private: - RenderState PrepareRenderState(u32 mrt_mask); - void BeginRendering(const GraphicsPipeline& pipeline, RenderState& state); + void PrepareRenderState(const GraphicsPipeline* pipeline); + RenderState BeginRendering(const GraphicsPipeline* pipeline); void Resolve(); void DepthStencilCopy(bool is_depth, bool is_stencil); void EliminateFastClear(); - void UpdateDynamicState(const GraphicsPipeline& pipeline, bool is_indexed) const; + void UpdateDynamicState(const GraphicsPipeline* pipeline, bool is_indexed) const; void UpdateViewportScissorState() const; void UpdateDepthStencilState() const; void UpdatePrimitiveState(bool is_indexed) const; void UpdateRasterizationState() const; + void UpdateColorBlendingState(const GraphicsPipeline* pipeline) const; bool FilterDraw(); void BindBuffers(const Shader::Info& stage, Shader::Backend::Bindings& binding, Shader::PushData& push_data); - void BindTextures(const Shader::Info& stage, Shader::Backend::Bindings& binding); - bool BindResources(const Pipeline* pipeline); + void ResetBindings() { for (auto& image_id : bound_images) { - texture_cache.GetImage(image_id).binding.Reset(); + texture_cache.GetImage(image_id).binding = {}; } bound_images.clear(); } @@ -128,16 +127,17 @@ private: Common::SharedFirstMutex mapped_ranges_mutex; PipelineCache pipeline_cache; - boost::container::static_vector< - std::pair, 8> - cb_descs; - std::optional> db_desc; + using RenderTargetInfo = + std::pair; + std::array cb_descs; + std::pair db_desc; boost::container::static_vector image_infos; boost::container::static_vector buffer_infos; boost::container::static_vector bound_images; Pipeline::DescriptorWrites set_writes; Pipeline::BufferBarriers buffer_barriers; + Shader::PushData push_data; using BufferBindingInfo = std::tuple; boost::container::static_vector buffer_bindings; diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp index 5af8e2f1f..5bd8025aa 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp @@ -4,7 +4,6 @@ #include #include #include "common/assert.h" -#include "common/scope_exit.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" diff --git a/src/video_core/texture_cache/blit_helper.cpp b/src/video_core/texture_cache/blit_helper.cpp index 4f1d17547..f1b79e925 100644 --- a/src/video_core/texture_cache/blit_helper.cpp +++ b/src/video_core/texture_cache/blit_helper.cpp @@ -9,6 +9,7 @@ #include "video_core/host_shaders/color_to_ms_depth_frag.h" #include "video_core/host_shaders/fs_tri_vert.h" +#include "video_core/host_shaders/ms_image_blit_frag.h" namespace VideoCore { @@ -35,19 +36,23 @@ BlitHelper::BlitHelper(const Vulkan::Instance& instance_, Vulkan::Scheduler& sch CreatePipelineLayouts(); } -BlitHelper::~BlitHelper() = default; - -void BlitHelper::BlitColorToMsDepth(Image& source, Image& dest) { - source.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits2::eShaderRead, {}); - dest.Transit(vk::ImageLayout::eDepthAttachmentOptimal, - vk::AccessFlagBits2::eDepthStencilAttachmentWrite, {}); +BlitHelper::~BlitHelper() { + const auto device = instance.GetDevice(); + device.destroy(fs_tri_vertex); + device.destroy(color_to_ms_depth_frag); + device.destroy(src_msaa_copy_frag); + device.destroy(src_non_msaa_copy_frag); +} +void BlitHelper::ReinterpretColorAsMsDepth(u32 width, u32 height, u32 num_samples, + vk::Format src_pixel_format, vk::Format dst_pixel_format, + vk::Image source, vk::Image dest) { const vk::ImageViewUsageCreateInfo color_usage_ci{.usage = vk::ImageUsageFlagBits::eSampled}; const vk::ImageViewCreateInfo color_view_ci = { .pNext = &color_usage_ci, - .image = source.image, + .image = source, .viewType = vk::ImageViewType::e2D, - .format = source.info.pixel_format, + .format = src_pixel_format, .subresourceRange{ .aspectMask = vk::ImageAspectFlagBits::eColor, .baseMipLevel = 0U, @@ -64,9 +69,9 @@ void BlitHelper::BlitColorToMsDepth(Image& source, Image& dest) { .usage = vk::ImageUsageFlagBits::eDepthStencilAttachment}; const vk::ImageViewCreateInfo depth_view_ci = { .pNext = &depth_usage_ci, - .image = dest.image, + .image = dest, .viewType = vk::ImageViewType::e2D, - .format = dest.info.pixel_format, + .format = dst_pixel_format, .subresourceRange{ .aspectMask = vk::ImageAspectFlagBits::eDepth, .baseMipLevel = 0U, @@ -86,8 +91,8 @@ void BlitHelper::BlitColorToMsDepth(Image& source, Image& dest) { Vulkan::RenderState state{}; state.has_depth = true; - state.width = dest.info.size.width; - state.height = dest.info.size.height; + state.width = width; + state.height = height; state.depth_attachment = vk::RenderingAttachmentInfo{ .imageView = depth_view, .imageLayout = vk::ImageLayout::eDepthAttachmentOptimal, @@ -114,9 +119,13 @@ void BlitHelper::BlitColorToMsDepth(Image& source, Image& dest) { cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *single_texture_pl_layout, 0U, texture_write); - const DepthPipelineKey key{dest.info.num_samples, dest.info.pixel_format}; - const vk::Pipeline depth_pipeline = GetDepthToMsPipeline(key); - cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, depth_pipeline); + const MsPipelineKey key{num_samples, dst_pixel_format, false}; + auto it = std::ranges::find(color_to_ms_depth_pl, key, &MsPipeline::first); + if (it == color_to_ms_depth_pl.end()) { + CreateColorToMSDepthPipeline(key); + it = --color_to_ms_depth_pl.end(); + } + cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, *it->second); const vk::Viewport viewport = { .x = 0, @@ -136,24 +145,122 @@ void BlitHelper::BlitColorToMsDepth(Image& source, Image& dest) { cmdbuf.draw(3, 1, 0, 0); + scheduler.EndRendering(); scheduler.GetDynamicState().Invalidate(); } -vk::Pipeline BlitHelper::GetDepthToMsPipeline(const DepthPipelineKey& key) { - auto it = std::ranges::find(color_to_ms_depth_pl, key, &DepthPipeline::first); - if (it != color_to_ms_depth_pl.end()) { - return *it->second; +void BlitHelper::CopyBetweenMsImages(u32 width, u32 height, u32 num_samples, + vk::Format pixel_format, bool src_msaa, vk::Image source, + vk::Image dest) { + const vk::ImageViewUsageCreateInfo src_usage_ci{.usage = vk::ImageUsageFlagBits::eSampled}; + const vk::ImageViewCreateInfo src_view_ci = { + .pNext = &src_usage_ci, + .image = source, + .viewType = vk::ImageViewType::e2D, + .format = pixel_format, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0U, + .levelCount = 1U, + .baseArrayLayer = 0U, + .layerCount = 1U, + }, + }; + const auto [src_view_result, src_view] = instance.GetDevice().createImageView(src_view_ci); + ASSERT_MSG(src_view_result == vk::Result::eSuccess, "Failed to create image view: {}", + vk::to_string(src_view_result)); + + const vk::ImageViewUsageCreateInfo dst_usage_ci{.usage = + vk::ImageUsageFlagBits::eColorAttachment}; + const vk::ImageViewCreateInfo dst_view_ci = { + .pNext = &dst_usage_ci, + .image = dest, + .viewType = vk::ImageViewType::e2D, + .format = pixel_format, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0U, + .levelCount = 1U, + .baseArrayLayer = 0U, + .layerCount = 1U, + }, + }; + const auto [dst_view_result, dst_view] = instance.GetDevice().createImageView(dst_view_ci); + ASSERT_MSG(dst_view_result == vk::Result::eSuccess, "Failed to create image view: {}", + vk::to_string(dst_view_result)); + scheduler.DeferOperation([device = instance.GetDevice(), src_view, dst_view] { + device.destroyImageView(src_view); + device.destroyImageView(dst_view); + }); + + Vulkan::RenderState state{}; + state.width = width; + state.height = height; + state.color_attachments[state.num_color_attachments++] = vk::RenderingAttachmentInfo{ + .imageView = dst_view, + .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, + .loadOp = vk::AttachmentLoadOp::eDontCare, + .storeOp = vk::AttachmentStoreOp::eStore, + }; + scheduler.BeginRendering(state); + + const auto cmdbuf = scheduler.CommandBuffer(); + const vk::DescriptorImageInfo image_info = { + .sampler = VK_NULL_HANDLE, + .imageView = src_view, + .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal, + }; + const vk::WriteDescriptorSet texture_write = { + .dstSet = VK_NULL_HANDLE, + .dstBinding = 0U, + .dstArrayElement = 0U, + .descriptorCount = 1U, + .descriptorType = vk::DescriptorType::eSampledImage, + .pImageInfo = &image_info, + }; + cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *single_texture_pl_layout, 0U, + texture_write); + + const MsPipelineKey key{num_samples, pixel_format, src_msaa}; + auto it = std::ranges::find(ms_image_copy_pl, key, &MsPipeline::first); + if (it == ms_image_copy_pl.end()) { + CreateMsCopyPipeline(key); + it = --ms_image_copy_pl.end(); } - CreateColorToMSDepthPipeline(key); - return *color_to_ms_depth_pl.back().second; + cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, *it->second); + + const vk::Viewport viewport = { + .x = 0, + .y = 0, + .width = float(state.width), + .height = float(state.height), + .minDepth = 0.f, + .maxDepth = 1.f, + }; + cmdbuf.setViewportWithCount(viewport); + + const vk::Rect2D scissor = { + .offset = {0, 0}, + .extent = {state.width, state.height}, + }; + cmdbuf.setScissorWithCount(scissor); + + cmdbuf.draw(3, 1, 0, 0); + + scheduler.EndRendering(); + scheduler.GetDynamicState().Invalidate(); } void BlitHelper::CreateShaders() { - fs_tri_vertex = Vulkan::Compile(HostShaders::FS_TRI_VERT, vk::ShaderStageFlagBits::eVertex, - instance.GetDevice()); - color_to_ms_depth_frag = - Vulkan::Compile(HostShaders::COLOR_TO_MS_DEPTH_FRAG, vk::ShaderStageFlagBits::eFragment, - instance.GetDevice()); + const auto device = instance.GetDevice(); + fs_tri_vertex = + Vulkan::Compile(HostShaders::FS_TRI_VERT, vk::ShaderStageFlagBits::eVertex, device); + color_to_ms_depth_frag = Vulkan::Compile(HostShaders::COLOR_TO_MS_DEPTH_FRAG, + vk::ShaderStageFlagBits::eFragment, device); + src_msaa_copy_frag = Vulkan::Compile(HostShaders::MS_IMAGE_BLIT_FRAG, + vk::ShaderStageFlagBits::eFragment, device, {"SRC_MSAA"}); + src_non_msaa_copy_frag = Vulkan::Compile(HostShaders::MS_IMAGE_BLIT_FRAG, + vk::ShaderStageFlagBits::eFragment, device); } void BlitHelper::CreatePipelineLayouts() { @@ -186,7 +293,7 @@ void BlitHelper::CreatePipelineLayouts() { single_texture_pl_layout = std::move(pipeline_layout); } -void BlitHelper::CreateColorToMSDepthPipeline(const DepthPipelineKey& key) { +void BlitHelper::CreateColorToMSDepthPipeline(const MsPipelineKey& key) { const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { .topology = vk::PrimitiveTopology::eTriangleList, }; @@ -220,7 +327,7 @@ void BlitHelper::CreateColorToMSDepthPipeline(const DepthPipelineKey& key) { const vk::PipelineRenderingCreateInfo pipeline_rendering_ci = { .colorAttachmentCount = 0U, .pColorAttachmentFormats = nullptr, - .depthAttachmentFormat = key.depth_format, + .depthAttachmentFormat = key.attachment_format, .stencilAttachmentFormat = vk::Format::eUndefined, }; @@ -253,4 +360,83 @@ void BlitHelper::CreateColorToMSDepthPipeline(const DepthPipelineKey& key) { color_to_ms_depth_pl.emplace_back(key, std::move(pipeline)); } +void BlitHelper::CreateMsCopyPipeline(const MsPipelineKey& key) { + const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { + .topology = vk::PrimitiveTopology::eTriangleList, + }; + const vk::PipelineMultisampleStateCreateInfo multisampling = { + .rasterizationSamples = ToSampleCount(key.num_samples), + }; + const vk::PipelineDepthStencilStateCreateInfo depth_state = { + .depthTestEnable = false, + .depthWriteEnable = false, + .depthCompareOp = vk::CompareOp::eAlways, + }; + const std::array dynamic_states = {vk::DynamicState::eViewportWithCount, + vk::DynamicState::eScissorWithCount}; + const vk::PipelineDynamicStateCreateInfo dynamic_info = { + .dynamicStateCount = static_cast(dynamic_states.size()), + .pDynamicStates = dynamic_states.data(), + }; + + std::array shader_stages; + shader_stages[0] = { + .stage = vk::ShaderStageFlagBits::eVertex, + .module = fs_tri_vertex, + .pName = "main", + }; + shader_stages[1] = { + .stage = vk::ShaderStageFlagBits::eFragment, + .module = key.src_msaa ? src_msaa_copy_frag : src_non_msaa_copy_frag, + .pName = "main", + }; + + const vk::PipelineRenderingCreateInfo pipeline_rendering_ci = { + .colorAttachmentCount = 1u, + .pColorAttachmentFormats = &key.attachment_format, + .depthAttachmentFormat = vk::Format::eUndefined, + .stencilAttachmentFormat = vk::Format::eUndefined, + }; + + const vk::PipelineColorBlendAttachmentState attachment = { + .blendEnable = false, + .colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | + vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA, + }; + + const vk::PipelineColorBlendStateCreateInfo color_blending = { + .logicOpEnable = false, + .logicOp = vk::LogicOp::eCopy, + .attachmentCount = 1u, + .pAttachments = &attachment, + }; + const vk::PipelineViewportStateCreateInfo viewport_info{}; + const vk::PipelineVertexInputStateCreateInfo vertex_input_info{}; + const vk::PipelineRasterizationStateCreateInfo raster_state{.lineWidth = 1.f}; + + const vk::GraphicsPipelineCreateInfo pipeline_info = { + .pNext = &pipeline_rendering_ci, + .stageCount = static_cast(shader_stages.size()), + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_info, + .pInputAssemblyState = &input_assembly, + .pViewportState = &viewport_info, + .pRasterizationState = &raster_state, + .pMultisampleState = &multisampling, + .pDepthStencilState = &depth_state, + .pColorBlendState = &color_blending, + .pDynamicState = &dynamic_info, + .layout = *single_texture_pl_layout, + }; + + auto [pipeline_result, pipeline] = + instance.GetDevice().createGraphicsPipelineUnique(VK_NULL_HANDLE, pipeline_info); + ASSERT_MSG(pipeline_result == vk::Result::eSuccess, "Failed to create graphics pipeline: {}", + vk::to_string(pipeline_result)); + Vulkan::SetObjectName(instance.GetDevice(), *pipeline, "Non MS Image to MS Image {}", + key.num_samples); + + ms_image_copy_pl.emplace_back(key, std::move(pipeline)); +} + } // namespace VideoCore diff --git a/src/video_core/texture_cache/blit_helper.h b/src/video_core/texture_cache/blit_helper.h index 8c506bd0b..58d4553ff 100644 --- a/src/video_core/texture_cache/blit_helper.h +++ b/src/video_core/texture_cache/blit_helper.h @@ -17,6 +17,7 @@ namespace VideoCore { class Image; class ImageView; +struct ImageInfo; class BlitHelper { static constexpr size_t MaxMsPipelines = 6; @@ -25,20 +26,26 @@ public: explicit BlitHelper(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler); ~BlitHelper(); - void BlitColorToMsDepth(Image& source, Image& dest); + void ReinterpretColorAsMsDepth(u32 width, u32 height, u32 num_samples, + vk::Format src_pixel_format, vk::Format dst_pixel_format, + vk::Image source, vk::Image dest); + + void CopyBetweenMsImages(u32 width, u32 height, u32 num_samples, vk::Format pixel_format, + bool src_msaa, vk::Image source, vk::Image dest); private: void CreateShaders(); void CreatePipelineLayouts(); - struct DepthPipelineKey { + struct MsPipelineKey { u32 num_samples; - vk::Format depth_format; + vk::Format attachment_format; + bool src_msaa; - auto operator<=>(const DepthPipelineKey&) const noexcept = default; + auto operator<=>(const MsPipelineKey&) const noexcept = default; }; - vk::Pipeline GetDepthToMsPipeline(const DepthPipelineKey& key); - void CreateColorToMSDepthPipeline(const DepthPipelineKey& key); + void CreateColorToMSDepthPipeline(const MsPipelineKey& key); + void CreateMsCopyPipeline(const MsPipelineKey& key); private: const Vulkan::Instance& instance; @@ -47,9 +54,12 @@ private: vk::UniquePipelineLayout single_texture_pl_layout; vk::ShaderModule fs_tri_vertex; vk::ShaderModule color_to_ms_depth_frag; + vk::ShaderModule src_msaa_copy_frag; + vk::ShaderModule src_non_msaa_copy_frag; - using DepthPipeline = std::pair; - std::vector color_to_ms_depth_pl{}; + using MsPipeline = std::pair; + std::vector color_to_ms_depth_pl; + std::vector ms_image_copy_pl; }; } // namespace VideoCore diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index a0daab362..41a66503f 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -6,6 +6,7 @@ #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/texture_cache/blit_helper.h" #include "video_core/texture_cache/image.h" #include @@ -75,11 +76,6 @@ static vk::FormatFeatureFlags2 FormatFeatureFlags(const vk::ImageUsageFlags usag return feature_flags; } -UniqueImage::UniqueImage() {} - -UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_) - : device{device_}, allocator{allocator_} {} - UniqueImage::~UniqueImage() { if (image) { vmaDestroyImage(allocator, image, allocation); @@ -87,9 +83,8 @@ UniqueImage::~UniqueImage() { } void UniqueImage::Create(const vk::ImageCreateInfo& image_ci) { - if (image) { - vmaDestroyImage(allocator, image, allocation); - } + this->image_ci = image_ci; + ASSERT(!image); const VmaAllocationCreateInfo alloc_info = { .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT, .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, @@ -109,9 +104,10 @@ void UniqueImage::Create(const vk::ImageCreateInfo& image_ci) { } Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, + BlitHelper& blit_helper_, Common::SlotVector& slot_image_views_, const ImageInfo& info_) - : instance{&instance_}, scheduler{&scheduler_}, info{info_}, - image{instance->GetDevice(), instance->GetAllocator()} { + : instance{&instance_}, scheduler{&scheduler_}, blit_helper{&blit_helper_}, + slot_image_views{&slot_image_views_}, info{info_} { if (info.pixel_format == vk::Format::eUndefined) { return; } @@ -130,20 +126,11 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, usage_flags = ImageUsageFlags(instance, info); format_features = FormatFeatureFlags(usage_flags); - - switch (info.pixel_format) { - case vk::Format::eD16Unorm: - case vk::Format::eD32Sfloat: - case vk::Format::eX8D24UnormPack32: + if (info.props.is_depth) { aspect_mask = vk::ImageAspectFlagBits::eDepth; - break; - case vk::Format::eD16UnormS8Uint: - case vk::Format::eD24UnormS8Uint: - case vk::Format::eD32SfloatS8Uint: - aspect_mask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil; - break; - default: - break; + if (info.props.has_stencil) { + aspect_mask |= vk::ImageAspectFlagBits::eStencil; + } } constexpr auto tiling = vk::ImageTiling::eOptimal; @@ -162,10 +149,9 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, vk::to_string(supported_format), vk::to_string(format_info.type), vk::to_string(format_info.flags), vk::to_string(format_info.usage)); } - const auto supported_samples = - image_format_properties.result == vk::Result::eSuccess - ? image_format_properties.value.imageFormatProperties.sampleCounts - : vk::SampleCountFlagBits::e1; + supported_samples = image_format_properties.result == vk::Result::eSuccess + ? image_format_properties.value.imageFormatProperties.sampleCounts + : vk::SampleCountFlagBits::e1; const vk::ImageCreateInfo image_ci = { .flags = flags, @@ -184,22 +170,48 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, .initialLayout = vk::ImageLayout::eUndefined, }; - image.Create(image_ci); + backing = &backing_images.emplace_back(); + backing->num_samples = info.num_samples; + backing->image = UniqueImage{instance->GetDevice(), instance->GetAllocator()}; + backing->image.Create(image_ci); - Vulkan::SetObjectName(instance->GetDevice(), (vk::Image)image, "Image {}x{}x{} {} {:#x}:{:#x}", - info.size.width, info.size.height, info.size.depth, - AmdGpu::NameOf(info.tile_mode), info.guest_address, info.guest_size); + Vulkan::SetObjectName(instance->GetDevice(), GetImage(), + "Image {}x{}x{} {} {} {:#x}:{:#x} L:{} M:{} S:{}", info.size.width, + info.size.height, info.size.depth, AmdGpu::NameOf(info.tile_mode), + vk::to_string(info.pixel_format), info.guest_address, info.guest_size, + info.resources.layers, info.resources.levels, info.num_samples); } -boost::container::small_vector Image::GetBarriers( - vk::ImageLayout dst_layout, vk::Flags dst_mask, - vk::PipelineStageFlags2 dst_stage, std::optional subres_range) { +Image::~Image() = default; + +ImageView& Image::FindView(const ImageViewInfo& view_info, bool ensure_guest_samples) { + if (ensure_guest_samples && backing->num_samples > 1 != info.num_samples > 1) { + SetBackingSamples(info.num_samples); + } + const auto& view_infos = backing->image_view_infos; + const auto it = std::ranges::find(view_infos, view_info); + if (it != view_infos.end()) { + const auto view_id = backing->image_view_ids[std::distance(view_infos.begin(), it)]; + return (*slot_image_views)[view_id]; + } + const auto view_id = slot_image_views->insert(*instance, view_info, *this); + backing->image_view_infos.emplace_back(view_info); + backing->image_view_ids.emplace_back(view_id); + return (*slot_image_views)[view_id]; +} + +Image::Barriers Image::GetBarriers(vk::ImageLayout dst_layout, vk::AccessFlags2 dst_mask, + vk::PipelineStageFlags2 dst_stage, + std::optional subres_range) { + auto& last_state = backing->state; + auto& subresource_states = backing->subresource_states; + const bool needs_partial_transition = subres_range && (subres_range->base != SubresourceBase{} || subres_range->extent != info.resources); const bool partially_transited = !subresource_states.empty(); - boost::container::small_vector barriers{}; + Barriers barriers; if (needs_partial_transition || partially_transited) { if (!partially_transited) { subresource_states.resize(info.resources.levels * info.resources.layers); @@ -238,7 +250,7 @@ boost::container::small_vector Image::GetBarriers( .newLayout = dst_layout, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, + .image = GetImage(), .subresourceRange{ .aspectMask = aspect_mask, .baseMipLevel = mip, @@ -271,7 +283,7 @@ boost::container::small_vector Image::GetBarriers( .newLayout = dst_layout, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, + .image = GetImage(), .subresourceRange{ .aspectMask = aspect_mask, .baseMipLevel = 0, @@ -289,7 +301,7 @@ boost::container::small_vector Image::GetBarriers( return barriers; } -void Image::Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, +void Image::Transit(vk::ImageLayout dst_layout, vk::AccessFlags2 dst_mask, std::optional range, vk::CommandBuffer cmdbuf /*= {}*/) { // Adjust pipieline stage const vk::PipelineStageFlags2 dst_pl_stage = @@ -314,33 +326,91 @@ void Image::Transit(vk::ImageLayout dst_layout, vk::Flags d }); } -void Image::Upload(vk::Buffer buffer, u64 offset) { +void Image::Upload(std::span upload_copies, vk::Buffer buffer, + u64 offset) { + SetBackingSamples(info.num_samples, false); scheduler->EndRendering(); - Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}); - // Copy to the image. - const auto aspect = aspect_mask & vk::ImageAspectFlagBits::eStencil - ? vk::ImageAspectFlagBits::eDepth - : aspect_mask; - const vk::BufferImageCopy image_copy = { - .bufferOffset = offset, - .bufferRowLength = info.pitch, - .bufferImageHeight = info.size.height, - .imageSubresource{ - .aspectMask = aspect, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1, - }, - .imageOffset = {0, 0, 0}, - .imageExtent = {info.size.width, info.size.height, 1}, + const vk::BufferMemoryBarrier2 pre_barrier{ + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferRead, + .buffer = buffer, + .offset = offset, + .size = info.guest_size, }; - + const vk::BufferMemoryBarrier2 post_barrier{ + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, + .buffer = buffer, + .offset = offset, + .size = info.guest_size, + }; + const auto image_barriers = + GetBarriers(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, + vk::PipelineStageFlagBits2::eCopy, {}); const auto cmdbuf = scheduler->CommandBuffer(); - cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, image_copy); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &pre_barrier, + .imageMemoryBarrierCount = static_cast(image_barriers.size()), + .pImageMemoryBarriers = image_barriers.data(), + }); + cmdbuf.copyBufferToImage(buffer, GetImage(), vk::ImageLayout::eTransferDstOptimal, + upload_copies); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &post_barrier, + }); + flags &= ~ImageFlagBits::Dirty; +} - Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {}); +void Image::Download(std::span download_copies, vk::Buffer buffer, + u64 offset, u64 download_size) { + SetBackingSamples(info.num_samples); + scheduler->EndRendering(); + + const vk::BufferMemoryBarrier2 pre_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, + .dstStageMask = vk::PipelineStageFlagBits2::eCopy, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = buffer, + .offset = offset, + .size = download_size, + }; + const vk::BufferMemoryBarrier2 post_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eCopy, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, + .buffer = buffer, + .offset = offset, + .size = download_size, + }; + const auto image_barriers = + GetBarriers(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, + vk::PipelineStageFlagBits2::eCopy, {}); + auto cmdbuf = scheduler->CommandBuffer(); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &pre_barrier, + .imageMemoryBarrierCount = static_cast(image_barriers.size()), + .pImageMemoryBarriers = image_barriers.data(), + }); + cmdbuf.copyImageToBuffer(GetImage(), vk::ImageLayout::eTransferSrcOptimal, buffer, + download_copies); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &post_barrier, + }); } void Image::CopyImage(Image& src_image) { @@ -353,6 +423,9 @@ void Image::CopyImage(Image& src_image) { const u32 depth = info.type == AmdGpu::ImageType::Color3D ? info.size.depth : src_info.size.depth; + SetBackingSamples(info.num_samples, false); + src_image.SetBackingSamples(src_info.num_samples); + boost::container::small_vector image_copies; for (u32 mip = 0; mip < num_mips; ++mip) { const auto mip_w = std::max(width >> mip, 1u); @@ -381,8 +454,8 @@ void Image::CopyImage(Image& src_image) { Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}); auto cmdbuf = scheduler->CommandBuffer(); - cmdbuf.copyImage(src_image.image, src_image.last_state.layout, image, last_state.layout, - image_copies); + cmdbuf.copyImage(src_image.GetImage(), src_image.backing->state.layout, GetImage(), + backing->state.layout, image_copies); Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {}); @@ -393,6 +466,9 @@ void Image::CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset) const u32 num_mips = std::min(src_info.resources.levels, info.resources.levels); ASSERT(src_info.resources.layers == info.resources.layers || num_mips == 1); + SetBackingSamples(info.num_samples, false); + src_image.SetBackingSamples(src_info.num_samples); + boost::container::small_vector buffer_copies; for (u32 mip = 0; mip < num_mips; ++mip) { const auto mip_w = std::max(src_info.size.width >> mip, 1u); @@ -445,7 +521,7 @@ void Image::CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset) .pBufferMemoryBarriers = &pre_copy_barrier, }); - cmdbuf.copyImageToBuffer(src_image.image, vk::ImageLayout::eTransferSrcOptimal, buffer, + cmdbuf.copyImageToBuffer(src_image.GetImage(), vk::ImageLayout::eTransferSrcOptimal, buffer, buffer_copies); cmdbuf.pipelineBarrier2(vk::DependencyInfo{ @@ -458,15 +534,11 @@ void Image::CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset) copy.imageSubresource.aspectMask = aspect_mask & ~vk::ImageAspectFlagBits::eStencil; } - cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, buffer_copies); + cmdbuf.copyBufferToImage(buffer, GetImage(), vk::ImageLayout::eTransferDstOptimal, + buffer_copies); } -void Image::CopyMip(const Image& src_image, u32 mip, u32 slice) { - scheduler->EndRendering(); - Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}); - - auto cmdbuf = scheduler->CommandBuffer(); - +void Image::CopyMip(Image& src_image, u32 mip, u32 slice) { const auto mip_w = std::max(info.size.width >> mip, 1u); const auto mip_h = std::max(info.size.height >> mip, 1u); const auto mip_d = std::max(info.size.depth >> mip, 1u); @@ -491,13 +563,166 @@ void Image::CopyMip(const Image& src_image, u32 mip, u32 slice) { }, .extent = {mip_w, mip_h, mip_d}, }; - cmdbuf.copyImage(src_image.image, src_image.last_state.layout, image, last_state.layout, - image_copy); - Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {}); + SetBackingSamples(info.num_samples); + src_image.SetBackingSamples(src_info.num_samples); + + scheduler->EndRendering(); + Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}); + src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); + + const auto cmdbuf = scheduler->CommandBuffer(); + cmdbuf.copyImage(src_image.GetImage(), src_image.backing->state.layout, GetImage(), + backing->state.layout, image_copy); } -Image::~Image() = default; +void Image::Resolve(Image& src_image, const VideoCore::SubresourceRange& mrt0_range, + const VideoCore::SubresourceRange& mrt1_range) { + SetBackingSamples(1, false); + scheduler->EndRendering(); + + src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, + mrt0_range); + Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, mrt1_range); + + if (src_image.backing->num_samples == 1) { + const vk::ImageCopy region = { + .srcSubresource{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = mrt0_range.base.layer, + .layerCount = mrt0_range.extent.layers, + }, + .srcOffset = {0, 0, 0}, + .dstSubresource{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = mrt1_range.base.layer, + .layerCount = mrt1_range.extent.layers, + }, + .dstOffset = {0, 0, 0}, + .extent = {info.size.width, info.size.height, 1}, + }; + scheduler->CommandBuffer().copyImage(src_image.GetImage(), + vk::ImageLayout::eTransferSrcOptimal, GetImage(), + vk::ImageLayout::eTransferDstOptimal, region); + } else { + const vk::ImageResolve region = { + .srcSubresource{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = mrt0_range.base.layer, + .layerCount = mrt0_range.extent.layers, + }, + .srcOffset = {0, 0, 0}, + .dstSubresource{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = mrt1_range.base.layer, + .layerCount = mrt1_range.extent.layers, + }, + .dstOffset = {0, 0, 0}, + .extent = {info.size.width, info.size.height, 1}, + }; + scheduler->CommandBuffer().resolveImage(src_image.GetImage(), + vk::ImageLayout::eTransferSrcOptimal, GetImage(), + vk::ImageLayout::eTransferDstOptimal, region); + } + + flags |= VideoCore::ImageFlagBits::GpuModified; + flags &= ~VideoCore::ImageFlagBits::Dirty; +} + +void Image::Clear(const vk::ClearValue& clear_value, const VideoCore::SubresourceRange& range) { + const vk::ImageSubresourceRange vk_range = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = range.base.level, + .levelCount = range.extent.levels, + .baseArrayLayer = range.base.layer, + .layerCount = range.extent.layers, + }; + scheduler->EndRendering(); + Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}); + const auto cmdbuf = scheduler->CommandBuffer(); + cmdbuf.clearColorImage(GetImage(), vk::ImageLayout::eTransferDstOptimal, clear_value.color, + vk_range); +} + +void Image::SetBackingSamples(u32 num_samples, bool copy_backing) { + if (!backing || backing->num_samples == num_samples) { + return; + } + ASSERT_MSG(!info.props.is_depth, "Swapping samples is only valid for color images"); + BackingImage* new_backing; + auto it = std::ranges::find(backing_images, num_samples, &BackingImage::num_samples); + if (it == backing_images.end()) { + auto new_image_ci = backing->image.image_ci; + new_image_ci.samples = LiverpoolToVK::NumSamples(num_samples, supported_samples); + + new_backing = &backing_images.emplace_back(); + new_backing->num_samples = num_samples; + new_backing->image = UniqueImage{instance->GetDevice(), instance->GetAllocator()}; + new_backing->image.Create(new_image_ci); + + Vulkan::SetObjectName(instance->GetDevice(), new_backing->image.image, + "Image {}x{}x{} {} {} {:#x}:{:#x} L:{} M:{} S:{} (backing)", + info.size.width, info.size.height, info.size.depth, + AmdGpu::NameOf(info.tile_mode), vk::to_string(info.pixel_format), + info.guest_address, info.guest_size, info.resources.layers, + info.resources.levels, num_samples); + } else { + new_backing = std::addressof(*it); + } + + if (copy_backing) { + scheduler->EndRendering(); + ASSERT(info.resources.levels == 1 && info.resources.layers == 1); + + // Transition current backing to shader read layout + auto barriers = + GetBarriers(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits2::eShaderRead, + vk::PipelineStageFlagBits2::eFragmentShader, std::nullopt); + + // Transition dest backing to color attachment layout, not caring of previous contents + constexpr auto dst_stage = vk::PipelineStageFlagBits2::eColorAttachmentOutput; + constexpr auto dst_access = vk::AccessFlagBits2::eColorAttachmentWrite; + constexpr auto dst_layout = vk::ImageLayout::eColorAttachmentOptimal; + barriers.push_back(vk::ImageMemoryBarrier2{ + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eNone, + .dstStageMask = dst_stage, + .dstAccessMask = dst_access, + .oldLayout = vk::ImageLayout::eUndefined, + .newLayout = dst_layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = new_backing->image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = info.resources.layers, + }, + }); + const auto cmdbuf = scheduler->CommandBuffer(); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .imageMemoryBarrierCount = static_cast(barriers.size()), + .pImageMemoryBarriers = barriers.data(), + }); + + // Copy between ms and non ms backing images + blit_helper->CopyBetweenMsImages( + info.size.width, info.size.height, new_backing->num_samples, info.pixel_format, + backing->num_samples > 1, backing->image, new_backing->image); + + // Update current layout in tracker to new backings layout + new_backing->state.layout = dst_layout; + new_backing->state.access_mask = dst_access; + new_backing->state.pl_stage = dst_stage; + } + + backing = new_backing; +} } // namespace VideoCore diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index c30edad79..451c7757a 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -9,6 +9,7 @@ #include "video_core/texture_cache/image_info.h" #include "video_core/texture_cache/image_view.h" +#include #include namespace Vulkan { @@ -34,8 +35,9 @@ enum ImageFlagBits : u32 { DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) struct UniqueImage { - explicit UniqueImage(); - explicit UniqueImage(vk::Device device, VmaAllocator allocator); + explicit UniqueImage() = default; + explicit UniqueImage(vk::Device device, VmaAllocator allocator) + : device{device}, allocator{allocator} {} ~UniqueImage(); UniqueImage(const UniqueImage&) = delete; @@ -44,11 +46,12 @@ struct UniqueImage { UniqueImage(UniqueImage&& other) : allocator{std::exchange(other.allocator, VK_NULL_HANDLE)}, allocation{std::exchange(other.allocation, VK_NULL_HANDLE)}, - image{std::exchange(other.image, VK_NULL_HANDLE)} {} + image{std::exchange(other.image, VK_NULL_HANDLE)}, image_ci{std::move(other.image_ci)} {} UniqueImage& operator=(UniqueImage&& other) { image = std::exchange(other.image, VK_NULL_HANDLE); allocator = std::exchange(other.allocator, VK_NULL_HANDLE); allocation = std::exchange(other.allocation, VK_NULL_HANDLE); + image_ci = std::move(other.image_ci); return *this; } @@ -58,17 +61,25 @@ struct UniqueImage { return image; } -private: - vk::Device device; - VmaAllocator allocator; - VmaAllocation allocation; + operator bool() const { + return image; + } + +public: + vk::Device device{}; + VmaAllocator allocator{}; + VmaAllocation allocation{}; vk::Image image{}; + vk::ImageCreateInfo image_ci{}; }; constexpr Common::SlotId NULL_IMAGE_ID{0}; +class BlitHelper; + struct Image { - Image(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, const ImageInfo& info); + Image(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, BlitHelper& blit_helper, + Common::SlotVector& slot_image_views, const ImageInfo& info); ~Image(); Image(const Image&) = delete; @@ -77,94 +88,100 @@ struct Image { Image(Image&&) = default; Image& operator=(Image&&) = default; - [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { + bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { const VAddr overlap_end = overlap_cpu_addr + overlap_size; const auto image_addr = info.guest_address; const auto image_end = info.guest_address + info.guest_size; return image_addr < overlap_end && overlap_cpu_addr < image_end; } - ImageViewId FindView(const ImageViewInfo& info) const { - const auto it = std::ranges::find(image_view_infos, info); - if (it == image_view_infos.end()) { - return {}; - } - return image_view_ids[std::distance(image_view_infos.begin(), it)]; + vk::Image GetImage() const { + return backing->image.image; } - void AssociateDepth(ImageId image_id) { - depth_id = image_id; - } - - boost::container::small_vector GetBarriers( - vk::ImageLayout dst_layout, vk::Flags dst_mask, - vk::PipelineStageFlags2 dst_stage, std::optional subres_range); - void Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, - std::optional range, vk::CommandBuffer cmdbuf = {}); - void Upload(vk::Buffer buffer, u64 offset); - - void CopyImage(Image& src_image); - void CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset); - void CopyMip(const Image& src_image, u32 mip, u32 slice); - bool IsTracked() { return track_addr != 0 && track_addr_end != 0; } bool SafeToDownload() const { - return True(flags & ImageFlagBits::GpuModified) && - False(flags & (ImageFlagBits::GpuDirty | ImageFlagBits::CpuDirty)); + return True(flags & ImageFlagBits::GpuModified) && False(flags & (ImageFlagBits::Dirty)); } + void AssociateDepth(ImageId image_id) { + depth_id = image_id; + } + + ImageView& FindView(const ImageViewInfo& view_info, bool ensure_guest_samples = true); + + using Barriers = boost::container::small_vector; + Barriers GetBarriers(vk::ImageLayout dst_layout, vk::AccessFlags2 dst_mask, + vk::PipelineStageFlags2 dst_stage, + std::optional subres_range); + void Transit(vk::ImageLayout dst_layout, vk::AccessFlags2 dst_mask, + std::optional range, vk::CommandBuffer cmdbuf = {}); + void Upload(std::span upload_copies, vk::Buffer buffer, u64 offset); + void Download(std::span download_copies, vk::Buffer buffer, + u64 offset, u64 download_size); + + void CopyImage(Image& src_image); + void CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset); + void CopyMip(Image& src_image, u32 mip, u32 slice); + + void Resolve(Image& src_image, const VideoCore::SubresourceRange& mrt0_range, + const VideoCore::SubresourceRange& mrt1_range); + void Clear(const vk::ClearValue& clear_value, const VideoCore::SubresourceRange& range); + + void SetBackingSamples(u32 num_samples, bool copy_backing = true); + +public: const Vulkan::Instance* instance; Vulkan::Scheduler* scheduler; + BlitHelper* blit_helper; + Common::SlotVector* slot_image_views; ImageInfo info; - UniqueImage image; vk::ImageAspectFlags aspect_mask = vk::ImageAspectFlagBits::eColor; + vk::SampleCountFlags supported_samples = vk::SampleCountFlagBits::e1; ImageFlagBits flags = ImageFlagBits::Dirty; VAddr track_addr = 0; VAddr track_addr_end = 0; - std::vector image_view_infos; - std::vector image_view_ids; ImageId depth_id{}; - u64 lru_id{}; // Resource state tracking + vk::ImageUsageFlags usage_flags; + vk::FormatFeatureFlags2 format_features; + struct State { + vk::PipelineStageFlags2 pl_stage = vk::PipelineStageFlagBits2::eAllCommands; + vk::AccessFlags2 access_mask = vk::AccessFlagBits2::eNone; + vk::ImageLayout layout = vk::ImageLayout::eUndefined; + }; + struct BackingImage { + UniqueImage image; + State state; + std::vector subresource_states; + boost::container::small_vector image_view_infos; + boost::container::small_vector image_view_ids; + u32 num_samples; + }; + std::deque backing_images; + BackingImage* backing{}; + boost::container::static_vector mip_hashes{}; + u64 lru_id{}; + u64 tick_accessed_last{}; + u64 hash{}; + struct { u32 texture : 1; u32 storage : 1; u32 render_target : 1; u32 depth_target : 1; - u32 stencil : 1; u32 vo_surface : 1; } usage{}; - vk::ImageUsageFlags usage_flags; - vk::FormatFeatureFlags2 format_features; - struct State { - vk::Flags pl_stage = vk::PipelineStageFlagBits2::eAllCommands; - vk::Flags access_mask = vk::AccessFlagBits2::eNone; - vk::ImageLayout layout = vk::ImageLayout::eUndefined; - }; - State last_state{}; - std::vector subresource_states{}; - boost::container::small_vector mip_hashes{}; - u64 tick_accessed_last{0}; - u64 hash{0}; struct { - union { - struct { - u32 is_bound : 1; // the image is bound to a descriptor set - u32 is_target : 1; // the image is bound as color/depth target - u32 needs_rebind : 1; // the image needs to be rebound - u32 force_general : 1; // the image needs to be used in general layout - }; - u32 raw{}; - }; - - void Reset() { - raw = 0u; - } + u32 is_bound : 1; + u32 is_target : 1; + u32 needs_rebind : 1; + u32 force_general : 1; } binding{}; }; diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 47c60162b..8aa19a711 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -90,9 +90,9 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer, type = range.extent.layers > 1 ? AmdGpu::ImageType::Color2DArray : AmdGpu::ImageType::Color2D; } -ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image, - ImageId image_id_) - : image_id{image_id_}, info{info_} { +ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, + const Image& image) + : info{info_} { vk::ImageViewUsageCreateInfo usage_ci{.usage = image.usage_flags}; if (!info.is_storage) { usage_ci.usage &= ~vk::ImageUsageFlagBits::eStorage; @@ -113,7 +113,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info const vk::ImageViewCreateInfo image_view_ci = { .pNext = &usage_ci, - .image = image.image, + .image = image.GetImage(), .viewType = ConvertImageViewType(info.type), .format = instance.GetSupportedFormat(format, image.format_features), .components = info.mapping, diff --git a/src/video_core/texture_cache/image_view.h b/src/video_core/texture_cache/image_view.h index a0bcd157a..7bdf0ee95 100644 --- a/src/video_core/texture_cache/image_view.h +++ b/src/video_core/texture_cache/image_view.h @@ -35,8 +35,7 @@ struct ImageViewInfo { struct Image; struct ImageView { - ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, Image& image, - ImageId image_id); + ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, const Image& image); ~ImageView(); ImageView(const ImageView&) = delete; @@ -45,7 +44,6 @@ struct ImageView { ImageView(ImageView&&) = default; ImageView& operator=(ImageView&&) = default; - ImageId image_id; ImageViewInfo info; vk::UniqueImageView image_view; }; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index f5069f3c2..41c1ea09a 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -73,16 +73,15 @@ ImageId TextureCache::GetNullImage(const vk::Format format) { info.num_bits = 32; info.UpdateSize(); - const ImageId null_id = slot_images.insert(instance, scheduler, info); - auto& img = slot_images[null_id]; - - const vk::Image& null_image = img.image; - Vulkan::SetObjectName(instance.GetDevice(), null_image, + const ImageId null_id = + slot_images.insert(instance, scheduler, blit_helper, slot_image_views, info); + auto& image = slot_images[null_id]; + Vulkan::SetObjectName(instance.GetDevice(), image.GetImage(), fmt::format("Null Image ({})", vk::to_string(format))); - img.flags = ImageFlagBits::Empty; - img.track_addr = img.info.guest_address; - img.track_addr_end = img.info.guest_address + img.info.guest_size; + image.flags = ImageFlagBits::Empty; + image.track_addr = image.info.guest_address; + image.track_addr_end = image.info.guest_address + image.info.guest_size; null_images.emplace(format, null_id); return null_id; @@ -124,7 +123,7 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); - cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, + cmdbuf.copyImageToBuffer(image.GetImage(), vk::ImageLayout::eTransferSrcOptimal, download_buffer.Handle(), image_download); { @@ -269,7 +268,8 @@ ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, Bindi if (recreate) { auto new_info = requested_info; new_info.resources = std::max(requested_info.resources, cache_image.info.resources); - const auto new_image_id = slot_images.insert(instance, scheduler, new_info); + const auto new_image_id = + slot_images.insert(instance, scheduler, blit_helper, slot_image_views, new_info); RegisterImage(new_image_id); // Inherit image usage @@ -290,7 +290,14 @@ ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, Bindi } else if (cache_image.info.num_samples == 1 && new_info.props.is_depth && new_info.num_samples > 1) { // Perform a rendering pass to transfer the channels of source as samples in dest. - blit_helper.BlitColorToMsDepth(cache_image, new_image); + cache_image.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, + vk::AccessFlagBits2::eShaderRead, {}); + new_image.Transit(vk::ImageLayout::eDepthAttachmentOptimal, + vk::AccessFlagBits2::eDepthStencilAttachmentWrite, {}); + blit_helper.ReinterpretColorAsMsDepth( + new_info.size.width, new_info.size.height, new_info.num_samples, + cache_image.info.pixel_format, new_info.pixel_format, cache_image.GetImage(), + new_image.GetImage()); } else { LOG_WARNING(Render_Vulkan, "Unimplemented depth overlap copy"); } @@ -308,15 +315,16 @@ std::tuple TextureCache::ResolveOverlap(const ImageInfo& imag BindingType binding, ImageId cache_image_id, ImageId merged_image_id) { - auto& tex_cache_image = slot_images[cache_image_id]; - // We can assume it is safe to delete the image if it wasn't accessed in some number of frames. + auto& cache_image = slot_images[cache_image_id]; const bool safe_to_delete = - scheduler.CurrentTick() - tex_cache_image.tick_accessed_last > NumFramesBeforeRemoval; + scheduler.CurrentTick() - cache_image.tick_accessed_last > NumFramesBeforeRemoval; - if (image_info.guest_address == tex_cache_image.info.guest_address) { // Equal address - if (image_info.BlockDim() != tex_cache_image.info.BlockDim() || - image_info.num_bits * image_info.num_samples != - tex_cache_image.info.num_bits * tex_cache_image.info.num_samples) { + // Equal address + if (image_info.guest_address == cache_image.info.guest_address) { + const u32 lhs_block_size = image_info.num_bits * image_info.num_samples; + const u32 rhs_block_size = cache_image.info.num_bits * cache_image.info.num_samples; + if (image_info.BlockDim() != cache_image.info.BlockDim() || + lhs_block_size != rhs_block_size) { // Very likely this kind of overlap is caused by allocation from a pool. if (safe_to_delete) { FreeImage(cache_image_id); @@ -329,19 +337,19 @@ std::tuple TextureCache::ResolveOverlap(const ImageInfo& imag } // Compressed view of uncompressed image with same block size. - if (image_info.props.is_block && !tex_cache_image.info.props.is_block) { + if (image_info.props.is_block && !cache_image.info.props.is_block) { return {ExpandImage(image_info, cache_image_id), -1, -1}; } - if (image_info.guest_size == tex_cache_image.info.guest_size && + if (image_info.guest_size == cache_image.info.guest_size && (image_info.type == AmdGpu::ImageType::Color3D || - tex_cache_image.info.type == AmdGpu::ImageType::Color3D)) { + cache_image.info.type == AmdGpu::ImageType::Color3D)) { return {ExpandImage(image_info, cache_image_id), -1, -1}; } // Size and resources are less than or equal, use image view. - if (image_info.pixel_format != tex_cache_image.info.pixel_format || - image_info.guest_size <= tex_cache_image.info.guest_size) { + if (image_info.pixel_format != cache_image.info.pixel_format || + image_info.guest_size <= cache_image.info.guest_size) { auto result_id = merged_image_id ? merged_image_id : cache_image_id; const auto& result_image = slot_images[result_id]; const bool is_compatible = @@ -350,14 +358,14 @@ std::tuple TextureCache::ResolveOverlap(const ImageInfo& imag } // Size and resources are greater, expand the image. - if (image_info.type == tex_cache_image.info.type && - image_info.resources > tex_cache_image.info.resources) { + if (image_info.type == cache_image.info.type && + image_info.resources > cache_image.info.resources) { return {ExpandImage(image_info, cache_image_id), -1, -1}; } // Size is greater but resources are not, because the tiling mode is different. // Likely the address is reused for a image with a different tiling mode. - if (image_info.tile_mode != tex_cache_image.info.tile_mode) { + if (image_info.tile_mode != cache_image.info.tile_mode) { if (safe_to_delete) { FreeImage(cache_image_id); } @@ -368,9 +376,9 @@ std::tuple TextureCache::ResolveOverlap(const ImageInfo& imag } // Right overlap, the image requested is a possible subresource of the image from cache. - if (image_info.guest_address > tex_cache_image.info.guest_address) { - if (auto mip = image_info.MipOf(tex_cache_image.info); mip >= 0) { - if (auto slice = image_info.SliceOf(tex_cache_image.info, mip); slice >= 0) { + if (image_info.guest_address > cache_image.info.guest_address) { + if (auto mip = image_info.MipOf(cache_image.info); mip >= 0) { + if (auto slice = image_info.SliceOf(cache_image.info, mip); slice >= 0) { return {cache_image_id, mip, slice}; } } @@ -383,12 +391,12 @@ std::tuple TextureCache::ResolveOverlap(const ImageInfo& imag return {{}, -1, -1}; } else { // Left overlap, the image from cache is a possible subresource of the image requested - if (auto mip = tex_cache_image.info.MipOf(image_info); mip >= 0) { - if (auto slice = tex_cache_image.info.SliceOf(image_info, mip); slice >= 0) { + if (auto mip = cache_image.info.MipOf(image_info); mip >= 0) { + if (auto slice = cache_image.info.SliceOf(image_info, mip); slice >= 0) { // We have a larger image created and a separate one, representing a subres of it // bound as render target. In this case we need to rebind render target. - if (tex_cache_image.binding.is_target) { - tex_cache_image.binding.needs_rebind = 1u; + if (cache_image.binding.is_target) { + cache_image.binding.needs_rebind = 1u; if (merged_image_id) { GetImage(merged_image_id).binding.is_target = 1u; } @@ -399,15 +407,8 @@ std::tuple TextureCache::ResolveOverlap(const ImageInfo& imag // We need to have a larger, already allocated image to copy this one into if (merged_image_id) { - tex_cache_image.Transit(vk::ImageLayout::eTransferSrcOptimal, - vk::AccessFlagBits2::eTransferRead, {}); - - const auto num_mips_to_copy = tex_cache_image.info.resources.levels; - ASSERT(num_mips_to_copy == 1); - auto& merged_image = slot_images[merged_image_id]; - merged_image.CopyMip(tex_cache_image, mip, slice); - + merged_image.CopyMip(cache_image, mip, slice); FreeImage(cache_image_id); } } @@ -418,7 +419,8 @@ std::tuple TextureCache::ResolveOverlap(const ImageInfo& imag } ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) { - const auto new_image_id = slot_images.insert(instance, scheduler, info); + const auto new_image_id = + slot_images.insert(instance, scheduler, blit_helper, slot_image_views, info); RegisterImage(new_image_id); auto& src_image = slot_images[image_id]; @@ -507,7 +509,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, bool exact_fmt) { } // Create and register a new image if (!image_id) { - image_id = slot_images.insert(instance, scheduler, info); + image_id = slot_images.insert(instance, scheduler, blit_helper, slot_image_views, info); RegisterImage(image_id); } @@ -557,18 +559,6 @@ ImageId TextureCache::FindImageFromRange(VAddr address, size_t size, bool ensure return {}; } -ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo& view_info) { - Image& image = slot_images[image_id]; - if (const ImageViewId view_id = image.FindView(view_info); view_id) { - return slot_image_views[view_id]; - } - - const ImageViewId view_id = slot_image_views.insert(instance, view_info, image, image_id); - image.image_view_infos.emplace_back(view_info); - image.image_view_ids.emplace_back(view_id); - return slot_image_views[view_id]; -} - ImageView& TextureCache::FindTexture(ImageId image_id, const BaseDesc& desc) { Image& image = slot_images[image_id]; if (desc.type == BindingType::Storage) { @@ -579,11 +569,10 @@ ImageView& TextureCache::FindTexture(ImageId image_id, const BaseDesc& desc) { } } UpdateImage(image_id); - return RegisterImageView(image_id, desc.view_info); + return image.FindView(desc.view_info); } -ImageView& TextureCache::FindRenderTarget(BaseDesc& desc) { - const ImageId image_id = FindImage(desc); +ImageView& TextureCache::FindRenderTarget(ImageId image_id, const BaseDesc& desc) { Image& image = slot_images[image_id]; image.flags |= ImageFlagBits::GpuModified; image.usage.render_target = 1u; @@ -602,15 +591,13 @@ ImageView& TextureCache::FindRenderTarget(BaseDesc& desc) { image.info.meta_info.fmask_addr = desc.info.meta_info.fmask_addr; } - return RegisterImageView(image_id, desc.view_info); + return image.FindView(desc.view_info, false); } -ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) { - const ImageId image_id = FindImage(desc); +ImageView& TextureCache::FindDepthTarget(ImageId image_id, const BaseDesc& desc) { Image& image = slot_images[image_id]; image.flags |= ImageFlagBits::GpuModified; image.usage.depth_target = 1u; - image.usage.stencil = image.info.props.has_stencil; UpdateImage(image_id); // Register meta data for this depth buffer @@ -635,7 +622,8 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) { info.guest_address = desc.info.stencil_addr; info.guest_size = desc.info.stencil_size; info.size = desc.info.size; - stencil_id = slot_images.insert(instance, scheduler, info); + stencil_id = + slot_images.insert(instance, scheduler, blit_helper, slot_image_views, info); RegisterImage(stencil_id); } Image& image = slot_images[stencil_id]; @@ -643,10 +631,10 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) { image.AssociateDepth(image_id); } - return RegisterImageView(image_id, desc.view_info); + return image.FindView(desc.view_info, false); } -void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler /*= nullptr*/) { +void TextureCache::RefreshImage(Image& image) { if (False(image.flags & ImageFlagBits::Dirty) || image.info.num_samples > 1) { return; } @@ -678,7 +666,7 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule const bool is_gpu_modified = True(image.flags & ImageFlagBits::GpuModified); const bool is_gpu_dirty = True(image.flags & ImageFlagBits::GpuDirty); - boost::container::small_vector image_copy{}; + boost::container::small_vector image_copies; for (u32 m = 0; m < num_mips; m++) { const u32 width = std::max(image.info.size.width >> m, 1u); const u32 height = std::max(image.info.size.height >> m, 1u); @@ -698,7 +686,7 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule const u32 extent_width = mip_pitch ? std::min(mip_pitch, width) : width; const u32 extent_height = mip_height ? std::min(mip_height, height) : height; - image_copy.push_back({ + image_copies.push_back({ .bufferOffset = mip_offset, .bufferRowLength = mip_pitch, .bufferImageHeight = mip_height, @@ -713,21 +701,18 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule }); } - if (image_copy.empty()) { + if (image_copies.empty()) { image.flags &= ~ImageFlagBits::Dirty; return; } - auto* sched_ptr = custom_scheduler ? custom_scheduler : &scheduler; - sched_ptr->EndRendering(); + scheduler.EndRendering(); - const VAddr image_addr = image.info.guest_address; - const size_t image_size = image.info.guest_size; - const auto [in_buffer, in_offset] = buffer_cache.ObtainBufferForImage(image_addr, image_size); + const auto [in_buffer, in_offset] = + buffer_cache.ObtainBufferForImage(image.info.guest_address, image.info.guest_size); if (auto barrier = in_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead, vk::PipelineStageFlagBits2::eTransfer)) { - const auto cmdbuf = sched_ptr->CommandBuffer(); - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + scheduler.CommandBuffer().pipelineBarrier2(vk::DependencyInfo{ .dependencyFlags = vk::DependencyFlagBits::eByRegion, .bufferMemoryBarrierCount = 1, .pBufferMemoryBarriers = &barrier.value(), @@ -735,48 +720,12 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule } const auto [buffer, offset] = - !custom_scheduler ? tile_manager.DetileImage(in_buffer->Handle(), in_offset, image.info) - : std::make_pair(in_buffer->Handle(), in_offset); - for (auto& copy : image_copy) { + tile_manager.DetileImage(in_buffer->Handle(), in_offset, image.info); + for (auto& copy : image_copies) { copy.bufferOffset += offset; } - const vk::BufferMemoryBarrier2 pre_barrier{ - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = vk::AccessFlagBits2::eMemoryWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, - .dstAccessMask = vk::AccessFlagBits2::eTransferRead, - .buffer = buffer, - .offset = offset, - .size = image_size, - }; - const vk::BufferMemoryBarrier2 post_barrier{ - .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, - .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, - .buffer = buffer, - .offset = offset, - .size = image_size, - }; - const auto image_barriers = - image.GetBarriers(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, - vk::PipelineStageFlagBits2::eTransfer, {}); - const auto cmdbuf = sched_ptr->CommandBuffer(); - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &pre_barrier, - .imageMemoryBarrierCount = static_cast(image_barriers.size()), - .pImageMemoryBarriers = image_barriers.data(), - }); - cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy); - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &post_barrier, - }); - image.flags &= ~ImageFlagBits::Dirty; + image.Upload(image_copies, buffer, offset); } vk::Sampler TextureCache::GetSampler( @@ -1020,8 +969,10 @@ void TextureCache::DeleteImage(ImageId image_id) { // Reclaim image and any image views it references. scheduler.DeferOperation([this, image_id] { Image& image = slot_images[image_id]; - for (const ImageViewId image_view_id : image.image_view_ids) { - slot_image_views.erase(image_view_id); + for (auto& backing : image.backing_images) { + for (const ImageViewId image_view_id : backing.image_view_ids) { + slot_image_views.erase(image_view_id); + } } slot_images.erase(image_id); }); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 6edbadbf9..4bedea39d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -67,12 +67,14 @@ public: }; struct RenderTargetDesc : public BaseDesc { + RenderTargetDesc() = default; RenderTargetDesc(const AmdGpu::Liverpool::ColorBuffer& buffer, const AmdGpu::Liverpool::CbDbExtent& hint = {}) : BaseDesc{BindingType::RenderTarget, ImageInfo{buffer, hint}, ImageViewInfo{buffer}} {} }; struct DepthTargetDesc : public BaseDesc { + DepthTargetDesc() = default; DepthTargetDesc(const AmdGpu::Liverpool::DepthBuffer& buffer, const AmdGpu::Liverpool::DepthView& view, const AmdGpu::Liverpool::DepthControl& ctl, VAddr htile_address, @@ -118,20 +120,21 @@ public: [[nodiscard]] ImageView& FindTexture(ImageId image_id, const BaseDesc& desc); /// Retrieves the render target with specified properties - [[nodiscard]] ImageView& FindRenderTarget(BaseDesc& desc); + [[nodiscard]] ImageView& FindRenderTarget(ImageId image_id, const BaseDesc& desc); /// Retrieves the depth target with specified properties - [[nodiscard]] ImageView& FindDepthTarget(BaseDesc& desc); + [[nodiscard]] ImageView& FindDepthTarget(ImageId image_id, const BaseDesc& desc); /// Updates image contents if it was modified by CPU. - void UpdateImage(ImageId image_id, Vulkan::Scheduler* custom_scheduler = nullptr) { + void UpdateImage(ImageId image_id) { std::scoped_lock lock{mutex}; Image& image = slot_images[image_id]; TrackImage(image_id); TouchImage(image); - RefreshImage(image, custom_scheduler); + RefreshImage(image); } + /// Resolves overlap between existing cache image and pending merged image [[nodiscard]] std::tuple ResolveOverlap(const ImageInfo& info, BindingType binding, ImageId cache_img_id, @@ -145,7 +148,7 @@ public: [[nodiscard]] ImageId ExpandImage(const ImageInfo& info, ImageId image_id); /// Reuploads image contents. - void RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler = nullptr); + void RefreshImage(Image& image); /// Retrieves the sampler that matches the provided S# descriptor. [[nodiscard]] vk::Sampler GetSampler( @@ -161,16 +164,9 @@ public: /// Retrieves the image view with the specified id. [[nodiscard]] ImageView& GetImageView(ImageId id) { - auto& view = slot_image_views[id]; - // Maybe this is not needed. - Image& image = slot_images[view.image_id]; - TouchImage(image); - return view; + return slot_image_views[id]; } - /// Registers an image view for provided image - ImageView& RegisterImageView(ImageId image_id, const ImageViewInfo& view_info); - /// Returns true if the specified address is a metadata surface. bool IsMeta(VAddr address) const { return surface_metas.contains(address); diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index 5154dad46..d79bb315a 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -5,6 +5,7 @@ #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/texture_cache/image.h" #include "video_core/texture_cache/image_info.h" #include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/tile_manager.h" @@ -190,6 +191,8 @@ TileManager::Result TileManager::DetileImage(vk::Buffer in_buffer, u32 in_offset vmaDestroyBuffer(instance.GetAllocator(), out_buffer, out_allocation); }); + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, GetTilingPipeline(info, false)); @@ -238,15 +241,14 @@ TileManager::Result TileManager::DetileImage(vk::Buffer in_buffer, u32 in_offset return {out_buffer, 0}; } -void TileManager::TileImage(vk::Image in_image, std::span buffer_copies, - vk::Buffer out_buffer, u32 out_offset, const ImageInfo& info) { +void TileManager::TileImage(Image& in_image, std::span buffer_copies, + vk::Buffer out_buffer, u32 out_offset, u32 copy_size) { + const auto& info = in_image.info; if (!info.props.is_tiled) { for (auto& copy : buffer_copies) { copy.bufferOffset += out_offset; } - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.copyImageToBuffer(in_image, vk::ImageLayout::eTransferSrcOptimal, out_buffer, - buffer_copies); + in_image.Download(buffer_copies, out_buffer, out_offset, copy_size); return; } @@ -275,8 +277,8 @@ void TileManager::TileImage(vk::Image in_image, std::span b }); const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.copyImageToBuffer(in_image, vk::ImageLayout::eTransferSrcOptimal, temp_buffer, - buffer_copies); + in_image.Download(buffer_copies, temp_buffer, 0, copy_size); + cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, GetTilingPipeline(info, true)); const vk::DescriptorBufferInfo tiled_buffer_info{ diff --git a/src/video_core/texture_cache/tile_manager.h b/src/video_core/texture_cache/tile_manager.h index dc897a31e..6508fd252 100644 --- a/src/video_core/texture_cache/tile_manager.h +++ b/src/video_core/texture_cache/tile_manager.h @@ -10,6 +10,7 @@ namespace VideoCore { struct ImageInfo; +struct Image; class StreamBuffer; class TileManager { @@ -23,8 +24,8 @@ public: StreamBuffer& stream_buffer); ~TileManager(); - void TileImage(vk::Image in_image, std::span buffer_copies, - vk::Buffer out_buffer, u32 out_offset, const ImageInfo& info); + void TileImage(Image& in_image, std::span buffer_copies, + vk::Buffer out_buffer, u32 out_offset, u32 copy_size); Result DetileImage(vk::Buffer in_buffer, u32 in_offset, const ImageInfo& info);