Handle mixed samples attachments (V2) (#3667)

* video_core: Refactor render target bind to allow disabling MSAA

* video_core: Implement swapping of backing samples

* clang format

* video_core: Better implementation

Instead of downgrading to 1 sample, always try to match depth samples. This avoids needing to copy depth-stencil attachment and copying multisampled stencil is not possible on some vendors

* video_core: Small bugfixes

* image: Add null check

* vk_rasterizer: Swap backing samples on resolve dst

* vk_presenter: Reset backing samples before present

* video_core: Small refactor to make this implementation better

* reinterpret: Fix channel check for degamma

Seems this was simpler than I thought, hardware doesn't apply degamma on the W channel regardless of swizzle

* image: Add missing end rendering call

* blit_helper: Fix bug in old reinterpret path

* blit_helper: Remove unused layer vertex

Should be used in the future if copying many layers is needed

* vk_rasterizer: Apply suggestion

* vk_rasterizer: More bind refactor

* vk_instance: Re-enable extensions
This commit is contained in:
TheTurtle
2025-09-29 16:27:39 +03:00
committed by GitHub
parent cad027845f
commit a35c9f3586
32 changed files with 1166 additions and 847 deletions

View File

@@ -233,13 +233,8 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
}
if (!is_eop) {
// Before processing the flip we need to ask GPU thread to flush command list as at this
// point VO surface is ready to be presented, and we will need have an actual state of
// Vulkan image at the time of frame presentation.
liverpool->SendCommand([=, this]() {
presenter->FlushDraw();
SubmitFlipInternal(port, index, flip_arg, is_eop);
});
// Non EOP flips can arrive from any thread so ask GPU thread to perform them
liverpool->SendCommand([=, this]() { SubmitFlipInternal(port, index, flip_arg, is_eop); });
} else {
SubmitFlipInternal(port, index, flip_arg, is_eop);
}
@@ -247,15 +242,14 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
return true;
}
void VideoOutDriver::SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg,
bool is_eop /*= false*/) {
void VideoOutDriver::SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop) {
Vulkan::Frame* frame;
if (index == -1) {
frame = presenter->PrepareBlankFrame(is_eop);
frame = presenter->PrepareBlankFrame(false);
} else {
const auto& buffer = port->buffer_slots[index];
const auto& group = port->groups[buffer.group_index];
frame = presenter->PrepareFrame(group, buffer.address_left, is_eop);
frame = presenter->PrepareFrame(group, buffer.address_left);
}
std::scoped_lock lock{mutex};

View File

@@ -301,7 +301,8 @@ void SetupCapabilities(const Info& info, const Profile& profile, const RuntimeIn
ctx.AddExtension("SPV_KHR_fragment_shader_barycentric");
ctx.AddCapability(spv::Capability::FragmentBarycentricKHR);
}
if (runtime_info.fs_info.addr_flags.linear_sample_ena ||
if (info.loads.Get(IR::Attribute::SampleIndex) ||
runtime_info.fs_info.addr_flags.linear_sample_ena ||
runtime_info.fs_info.addr_flags.persp_sample_ena) {
ctx.AddCapability(spv::Capability::SampleRateShading);
}

View File

@@ -3,6 +3,7 @@
#pragma once
#include <optional>
#include <vector>
#include "common/types.h"
#include "shader_recompiler/info.h"

View File

@@ -1022,7 +1022,7 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
auto converted = ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion());
if (sampler.force_degamma && image.GetNumberFmt() != AmdGpu::NumberFormat::Srgb) {
converted = ApplyForceDegamma(ir, texel, image.DstSelect());
converted = ApplyForceDegamma(ir, texel);
}
inst.ReplaceUsesWith(converted);
}

View File

@@ -29,25 +29,15 @@ inline F32 ApplyGammaToLinear(IREmitter& ir, const F32& c) {
return IR::F32{ir.Select(ir.FPGreaterThan(c, ir.Imm32(0.04045f)), a, b)};
}
inline Value ApplyForceDegamma(IREmitter& ir, const Value& value,
const AmdGpu::CompMapping& mapping) {
inline Value ApplyForceDegamma(IREmitter& ir, const Value& value) {
auto x = F32{ir.CompositeExtract(value, 0)};
auto y = F32{ir.CompositeExtract(value, 1)};
auto z = F32{ir.CompositeExtract(value, 2)};
auto w = F32{ir.CompositeExtract(value, 3)};
// Gamma correction is only applied to RGB components
if (AmdGpu::IsRgb(mapping.r)) {
x = ApplyGammaToLinear(ir, x);
}
if (AmdGpu::IsRgb(mapping.g)) {
y = ApplyGammaToLinear(ir, y);
}
if (AmdGpu::IsRgb(mapping.b)) {
z = ApplyGammaToLinear(ir, z);
}
if (AmdGpu::IsRgb(mapping.a)) {
w = ApplyGammaToLinear(ir, w);
}
x = ApplyGammaToLinear(ir, x);
y = ApplyGammaToLinear(ir, y);
z = ApplyGammaToLinear(ir, z);
return ir.CompositeConstruct(x, y, z, w);
}

View File

@@ -1485,26 +1485,6 @@ struct Liverpool {
return nullptr;
}
u32 NumSamples() const {
// It seems that the number of samples > 1 set in the AA config doesn't mean we're
// always rendering with MSAA, so we need to derive MS ratio from the CB and DB
// settings.
u32 num_samples = 1u;
if (color_control.mode != ColorControl::OperationMode::Disable) {
for (auto cb = 0u; cb < NumColorBuffers; ++cb) {
const auto& col_buf = color_buffers[cb];
if (!col_buf) {
continue;
}
num_samples = std::max(num_samples, col_buf.NumSamples());
}
}
if (depth_buffer.DepthValid() || depth_buffer.StencilValid()) {
num_samples = std::max(num_samples, depth_buffer.NumSamples());
}
return num_samples;
}
bool IsClipDisabled() const {
return clipper_control.clip_disable || primitive_type == PrimitiveType::RectList;
}

View File

@@ -984,44 +984,8 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
if (copy_size == 0) {
return false;
}
scheduler.EndRendering();
const vk::BufferMemoryBarrier2 pre_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = buffer.Handle(),
.offset = buf_offset,
.size = copy_size,
};
const vk::BufferMemoryBarrier2 post_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
.buffer = buffer.Handle(),
.offset = buf_offset,
.size = copy_size,
};
auto barriers =
image.GetBarriers(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead,
vk::PipelineStageFlagBits2::eTransfer, {});
auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &pre_barrier,
.imageMemoryBarrierCount = static_cast<u32>(barriers.size()),
.pImageMemoryBarriers = barriers.data(),
});
auto& tile_manager = texture_cache.GetTileManager();
tile_manager.TileImage(image.image, buffer_copies, buffer.Handle(), buf_offset, image.info);
cmdbuf = scheduler.CommandBuffer();
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &post_barrier,
});
tile_manager.TileImage(image, buffer_copies, buffer.Handle(), buf_offset, copy_size);
return true;
}

View File

@@ -12,6 +12,7 @@ set(SHADER_FILES
detilers/micro_64bpp.comp
detilers/micro_8bpp.comp
color_to_ms_depth.frag
ms_image_blit.frag
fault_buffer_process.comp
fs_tri.vert
fsr.comp

View File

@@ -3,6 +3,10 @@
#version 450
#if defined(INSTANCE_AS_LAYER)
#extension GL_ARB_shader_viewport_layer_array : require
#endif
layout(location = 0) out vec2 uv;
void main() {
@@ -11,5 +15,8 @@ void main() {
float((gl_VertexIndex & 2u) << 1u)
);
gl_Position = vec4(pos - vec2(1.0, 1.0), 0.0, 1.0);
#if defined(INSTANCE_AS_LAYER)
gl_Layer = gl_InstanceIndex;
#endif
uv = pos * 0.5;
}

View File

@@ -0,0 +1,23 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#version 450 core
#extension GL_EXT_samplerless_texture_functions : require
#if defined(SRC_MSAA)
layout (binding = 0, set = 0) uniform texture2DMS in_tex;
#else
layout (binding = 0, set = 0) uniform texture2D in_tex;
#endif
layout (location = 0) in vec2 uv;
layout (location = 0) out vec4 out_color;
void main()
{
#if defined(SRC_MSAA)
out_color = texelFetch(in_tex, ivec2(gl_FragCoord.xy), gl_SampleID);
#else
out_color = texelFetch(in_tex, ivec2(gl_FragCoord.xy), 0);
#endif
}

View File

@@ -1,10 +1,10 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "fsr_pass.h"
#include "common/assert.h"
#include "common/config.h"
#include "video_core/host_shaders/fsr_comp.h"
#include "video_core/renderer_vulkan/host_passes/fsr_pass.h"
#include "video_core/renderer_vulkan/vk_platform.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
@@ -164,6 +164,12 @@ vk::ImageView FsrPass::Render(vk::CommandBuffer cmdbuf, vk::ImageView input,
CreateImages(img);
}
if (Config::getVkHostMarkersEnabled()) {
cmdbuf.beginDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{
.pLabelName = "Host/FSR",
});
}
static const int thread_group_work_region_dim = 16;
int dispatch_x = (width + (thread_group_work_region_dim - 1)) / thread_group_work_region_dim;
int dispatch_y = (height + (thread_group_work_region_dim - 1)) / thread_group_work_region_dim;
@@ -381,6 +387,10 @@ vk::ImageView FsrPass::Render(vk::CommandBuffer cmdbuf, vk::ImageView input,
.pImageMemoryBarriers = return_barrier.data(),
});
if (Config::getVkHostMarkersEnabled()) {
cmdbuf.endDebugUtilsLabelEXT();
}
return img.output_image_view.get();
}
@@ -442,4 +452,4 @@ void FsrPass::CreateImages(Img& img) const {
SetObjectName(device, img.output_image_view.get(), "FSR Output ImageView #{}", img.id);
}
} // namespace Vulkan::HostPasses
} // namespace Vulkan::HostPasses

View File

@@ -1,9 +1,10 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "pp_pass.h"
#include "video_core/renderer_vulkan/host_passes/pp_pass.h"
#include "common/assert.h"
#include "common/config.h"
#include "video_core/host_shaders/fs_tri_vert.h"
#include "video_core/host_shaders/post_process_frag.h"
#include "video_core/renderer_vulkan/vk_platform.h"
@@ -187,6 +188,17 @@ void PostProcessingPass::Create(vk::Device device, const vk::Format surface_form
void PostProcessingPass::Render(vk::CommandBuffer cmdbuf, vk::ImageView input,
vk::Extent2D input_size, Frame& frame, Settings settings) {
if (Config::getVkHostMarkersEnabled()) {
cmdbuf.beginDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{
.pLabelName = "Host/Post processing",
});
}
constexpr vk::ImageSubresourceRange simple_subresource = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = 1,
.layerCount = 1,
};
const std::array<vk::RenderingAttachmentInfo, 1> attachments{{
{
.imageView = frame.image_view,
@@ -250,6 +262,26 @@ void PostProcessingPass::Render(vk::CommandBuffer cmdbuf, vk::ImageView input,
cmdbuf.beginRendering(rendering_info);
cmdbuf.draw(3, 1, 0, 0);
cmdbuf.endRendering();
const auto post_barrier = vk::ImageMemoryBarrier2{
.srcStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput,
.srcAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eFragmentShader,
.dstAccessMask = vk::AccessFlagBits2::eShaderRead,
.oldLayout = vk::ImageLayout::eColorAttachmentOptimal,
.newLayout = vk::ImageLayout::eGeneral,
.image = frame.image,
.subresourceRange = simple_subresource,
};
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.imageMemoryBarrierCount = 1,
.pImageMemoryBarriers = &post_barrier,
});
if (Config::getVkHostMarkersEnabled()) {
cmdbuf.endDebugUtilsLabelEXT();
}
}
} // namespace Vulkan::HostPasses
} // namespace Vulkan::HostPasses

View File

@@ -135,8 +135,8 @@ GraphicsPipeline::GraphicsPipeline(
}
const vk::PipelineMultisampleStateCreateInfo multisampling = {
.rasterizationSamples =
LiverpoolToVK::NumSamples(key.num_samples, instance.GetFramebufferSampleCounts()),
.rasterizationSamples = LiverpoolToVK::NumSamples(
key.num_samples, instance.GetColorSampleCounts() & instance.GetDepthSampleCounts()),
.sampleShadingEnable =
fs_info.addr_flags.persp_sample_ena || fs_info.addr_flags.linear_sample_ena,
};
@@ -259,7 +259,20 @@ GraphicsPipeline::GraphicsPipeline(
color_formats[i] = color_format;
}
std::array<vk::SampleCountFlagBits, Liverpool::NumColorBuffers> color_samples;
std::ranges::transform(key.color_samples, color_samples.begin(), [&instance](u8 num_samples) {
return num_samples ? LiverpoolToVK::NumSamples(num_samples, instance.GetColorSampleCounts())
: vk::SampleCountFlagBits::e1;
});
const vk::AttachmentSampleCountInfoAMD mixed_samples = {
.colorAttachmentCount = key.num_color_attachments,
.pColorAttachmentSamples = color_samples.data(),
.depthStencilAttachmentSamples =
LiverpoolToVK::NumSamples(key.depth_samples, instance.GetDepthSampleCounts()),
};
const vk::PipelineRenderingCreateInfo pipeline_rendering_ci = {
.pNext = instance.IsMixedDepthSamplesSupported() ? &mixed_samples : nullptr,
.colorAttachmentCount = key.num_color_attachments,
.pColorAttachmentFormats = color_formats.data(),
.depthAttachmentFormat = key.z_format != Liverpool::DepthBuffer::ZFormat::Invalid

View File

@@ -41,7 +41,9 @@ struct GraphicsPipelineKey {
std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks;
Liverpool::ColorBufferMask cb_shader_mask;
Liverpool::ColorControl::LogicOp logic_op;
u32 num_samples;
u8 num_samples;
u8 depth_samples;
std::array<u8, Liverpool::NumColorBuffers> color_samples;
u32 mrt_mask;
struct {
Liverpool::DepthBuffer::ZFormat z_format : 2;
@@ -80,12 +82,8 @@ public:
return fetch_shader;
}
auto GetWriteMasks() const {
return key.write_masks;
}
u32 GetMrtMask() const {
return key.mrt_mask;
const GraphicsPipelineKey& GetGraphicsKey() const {
return key;
}
/// Gets the attributes and bindings for vertex inputs.

View File

@@ -297,6 +297,8 @@ bool Instance::CreateDevice() {
image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME);
amd_gcn_shader = add_extension(VK_AMD_GCN_SHADER_EXTENSION_NAME);
amd_shader_trinary_minmax = add_extension(VK_AMD_SHADER_TRINARY_MINMAX_EXTENSION_NAME);
nv_framebuffer_mixed_samples = add_extension(VK_NV_FRAMEBUFFER_MIXED_SAMPLES_EXTENSION_NAME);
amd_mixed_attachment_samples = add_extension(VK_AMD_MIXED_ATTACHMENT_SAMPLES_EXTENSION_NAME);
shader_atomic_float2 = add_extension(VK_EXT_SHADER_ATOMIC_FLOAT_2_EXTENSION_NAME);
if (shader_atomic_float2) {
shader_atomic_float2_features =

View File

@@ -239,6 +239,17 @@ public:
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess;
}
/// Returns true if VK_NV_framebuffer_mixed_samples or
/// VK_AMD_mixed_attachment_samples is supported
bool IsMixedDepthSamplesSupported() const {
return nv_framebuffer_mixed_samples || amd_mixed_attachment_samples;
}
/// Returns true if VK_AMD_mixed_attachment_samples is supported
bool IsMixedAnySamplesSupported() const {
return amd_mixed_attachment_samples;
}
/// Returns true when geometry shaders are supported by the device
bool IsGeometryStageSupported() const {
return features.geometryShader;
@@ -389,10 +400,14 @@ public:
return properties.limits.maxFramebufferHeight;
}
/// Returns the sample count flags supported by framebuffers.
vk::SampleCountFlags GetFramebufferSampleCounts() const {
return properties.limits.framebufferColorSampleCounts &
properties.limits.framebufferDepthSampleCounts &
/// Returns the sample count flags supported by color buffers.
vk::SampleCountFlags GetColorSampleCounts() const {
return properties.limits.framebufferColorSampleCounts;
}
/// Returns the sample count flags supported by depth buffer.
vk::SampleCountFlags GetDepthSampleCounts() const {
return properties.limits.framebufferDepthSampleCounts &
properties.limits.framebufferStencilSampleCounts;
}
@@ -481,6 +496,8 @@ private:
bool image_load_store_lod{};
bool amd_gcn_shader{};
bool amd_shader_trinary_minmax{};
bool nv_framebuffer_mixed_samples{};
bool amd_mixed_attachment_samples{};
bool shader_atomic_float2{};
bool workgroup_memory_explicit_layout{};
bool portability_subset{};

View File

@@ -325,6 +325,8 @@ bool PipelineCache::RefreshGraphicsKey() {
const auto& regs = liverpool->regs;
auto& key = graphics_key;
const bool db_enabled = regs.depth_buffer.DepthValid() || regs.depth_buffer.StencilValid();
key.z_format = regs.depth_buffer.DepthValid() ? regs.depth_buffer.z_info.format.Value()
: Liverpool::DepthBuffer::ZFormat::Invalid;
key.stencil_format = regs.depth_buffer.StencilValid()
@@ -339,17 +341,17 @@ bool PipelineCache::RefreshGraphicsKey() {
key.patch_control_points =
regs.stage_enable.hs_en ? regs.ls_hs_config.hs_input_control_points.Value() : 0;
key.logic_op = regs.color_control.rop3;
key.num_samples = regs.NumSamples();
key.depth_samples = db_enabled ? regs.depth_buffer.NumSamples() : 1;
key.num_samples = key.depth_samples;
key.cb_shader_mask = regs.color_shader_mask;
const bool skip_cb_binding =
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable;
// First pass to fill render target information
// First pass to fill render target information needed by shader recompiler
for (s32 cb = 0; cb < Liverpool::NumColorBuffers && !skip_cb_binding; ++cb) {
const auto& col_buf = regs.color_buffers[cb];
const u32 target_mask = regs.color_target_mask.GetMask(cb);
if (!col_buf || !target_mask) {
if (!col_buf || !regs.color_target_mask.GetMask(cb)) {
// No attachment bound or writing to it is disabled.
continue;
}
@@ -362,6 +364,26 @@ bool PipelineCache::RefreshGraphicsKey() {
.export_format = regs.color_export_format.GetFormat(cb),
.swizzle = col_buf.Swizzle(),
};
}
// Compile and bind shader stages
if (!RefreshGraphicsStages()) {
return false;
}
// Second pass to mask out render targets not written by shader and fill remaining info
u8 color_samples = 0;
bool all_color_samples_same = true;
for (s32 cb = 0; cb < key.num_color_attachments && !skip_cb_binding; ++cb) {
const auto& col_buf = regs.color_buffers[cb];
const u32 target_mask = regs.color_target_mask.GetMask(cb);
if (!col_buf || !target_mask) {
continue;
}
if ((key.mrt_mask & (1u << cb)) == 0) {
key.color_buffers[cb] = {};
continue;
}
// Fill color blending information
if (regs.blend_control[cb].enable && !col_buf.info.blend_bypass) {
@@ -371,22 +393,21 @@ bool PipelineCache::RefreshGraphicsKey() {
// Apply swizzle to target mask
key.write_masks[cb] =
vk::ColorComponentFlags{key.color_buffers[cb].swizzle.ApplyMask(target_mask)};
// Fill color samples
const u8 prev_color_samples = std::exchange(color_samples, col_buf.NumSamples());
all_color_samples_same &= color_samples == prev_color_samples || prev_color_samples == 0;
key.color_samples[cb] = color_samples;
key.num_samples = std::max(key.num_samples, color_samples);
}
// Compile and bind shader stages
if (!RefreshGraphicsStages()) {
return false;
}
// Second pass to mask out render targets not written by fragment shader
for (s32 cb = 0; cb < key.num_color_attachments && !skip_cb_binding; ++cb) {
const auto& col_buf = regs.color_buffers[cb];
if (!col_buf || !regs.color_target_mask.GetMask(cb)) {
continue;
}
if ((key.mrt_mask & (1u << cb)) == 0) {
// Attachment is bound and mask allows writes but shader does not output to it.
key.color_buffers[cb] = {};
// Force all color samples to match depth samples to avoid unsupported MSAA configuration
if (color_samples != 0) {
const bool depth_mismatch = db_enabled && color_samples != key.depth_samples;
if (!all_color_samples_same && !instance.IsMixedAnySamplesSupported() ||
all_color_samples_same && depth_mismatch && !instance.IsMixedDepthSamplesSupported()) {
key.color_samples.fill(key.depth_samples);
key.num_samples = key.depth_samples;
}
}

View File

@@ -3,26 +3,21 @@
#include "common/config.h"
#include "common/debug.h"
#include "common/elf_info.h"
#include "common/singleton.h"
#include "core/debug_state.h"
#include "core/devtools/layer.h"
#include "core/libraries/system/systemservice.h"
#include "imgui/renderer/imgui_core.h"
#include "imgui/renderer/imgui_impl_vulkan.h"
#include "sdl_window.h"
#include "video_core/renderer_vulkan/vk_platform.h"
#include "video_core/renderer_vulkan/vk_presenter.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/texture_cache/image.h"
#include "video_core/host_shaders/fs_tri_vert.h"
#include <vk_mem_alloc.h>
#include <imgui.h>
#include "common/elf_info.h"
#include "imgui/renderer/imgui_impl_vulkan.h"
#include <vk_mem_alloc.h>
namespace Vulkan {
@@ -291,26 +286,14 @@ static vk::Format GetFrameViewFormat(const Libraries::VideoOut::PixelFormat form
return {};
}
Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id,
const Libraries::VideoOut::PixelFormat format, bool is_eop) {
// Request a free presentation frame.
Frame* Presenter::PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
VAddr cpu_address) {
auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address};
const auto image_id = texture_cache.FindImage(desc);
texture_cache.UpdateImage(image_id);
Frame* frame = GetRenderFrame();
// EOP flips are triggered from GPU thread so use the drawing scheduler to record
// commands. Otherwise we are dealing with a CPU flip which could have arrived
// from any guest thread. Use a separate scheduler for that.
auto& scheduler = is_eop ? draw_scheduler : flip_scheduler;
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
bool vk_host_markers_enabled = Config::getVkHostMarkersEnabled();
if (vk_host_markers_enabled) {
const auto label = fmt::format("PrepareFrameInternal:{}", image_id.index);
cmdbuf.beginDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{
.pLabelName = label.c_str(),
});
}
const auto frame_subresources = vk::ImageSubresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
@@ -319,111 +302,116 @@ Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
};
const auto pre_barrier =
vk::ImageMemoryBarrier2{.srcStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput,
.srcAccessMask = vk::AccessFlagBits2::eColorAttachmentRead,
.dstStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput,
.dstAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite,
.oldLayout = vk::ImageLayout::eUndefined,
.newLayout = vk::ImageLayout::eColorAttachmentOptimal,
.image = frame->image,
.subresourceRange{frame_subresources}};
const auto pre_barrier = vk::ImageMemoryBarrier2{
.srcStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput,
.srcAccessMask = vk::AccessFlagBits2::eColorAttachmentRead,
.dstStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput,
.dstAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite,
.oldLayout = vk::ImageLayout::eUndefined,
.newLayout = vk::ImageLayout::eColorAttachmentOptimal,
.image = frame->image,
.subresourceRange{frame_subresources},
};
draw_scheduler.EndRendering();
const auto cmdbuf = draw_scheduler.CommandBuffer();
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.imageMemoryBarrierCount = 1,
.pImageMemoryBarriers = &pre_barrier,
});
VideoCore::ImageViewInfo view_info{};
view_info.format = GetFrameViewFormat(attribute.attrib.pixel_format);
// Exclude alpha from output frame to avoid blending with UI.
view_info.mapping.a = vk::ComponentSwizzle::eOne;
auto& image = texture_cache.GetImage(image_id);
auto image_view = *image.FindView(view_info).image_view;
image.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits2::eShaderRead, {});
const vk::Extent2D image_size = {image.info.size.width, image.info.size.height};
expected_ratio = static_cast<float>(image_size.width) / static_cast<float>(image_size.height);
image_view = fsr_pass.Render(cmdbuf, image_view, image_size, {frame->width, frame->height},
fsr_settings, frame->is_hdr);
pp_pass.Render(cmdbuf, image_view, image_size, *frame, pp_settings);
DebugState.game_resolution = {image_size.width, image_size.height};
DebugState.output_resolution = {frame->width, frame->height};
// Flush frame creation commands.
frame->ready_semaphore = draw_scheduler.GetMasterSemaphore()->Handle();
frame->ready_tick = draw_scheduler.CurrentTick();
SubmitInfo info{};
draw_scheduler.Flush(info);
return frame;
}
Frame* Presenter::PrepareBlankFrame(bool present_thread) {
// Request a free presentation frame.
Frame* frame = GetRenderFrame();
auto& scheduler = present_thread ? present_scheduler : draw_scheduler;
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
constexpr vk::ImageSubresourceRange simple_subresource = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = 1,
.layerCount = 1,
};
const auto pre_barrier = vk::ImageMemoryBarrier2{
.srcStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput,
.srcAccessMask = vk::AccessFlagBits2::eColorAttachmentRead,
.dstStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput,
.dstAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite,
.oldLayout = vk::ImageLayout::eUndefined,
.newLayout = vk::ImageLayout::eColorAttachmentOptimal,
.image = frame->image,
.subresourceRange = simple_subresource,
};
const auto post_barrier = vk::ImageMemoryBarrier2{
.srcStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput,
.srcAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eFragmentShader,
.dstAccessMask = vk::AccessFlagBits2::eShaderRead,
.oldLayout = vk::ImageLayout::eColorAttachmentOptimal,
.newLayout = vk::ImageLayout::eGeneral,
.image = frame->image,
.subresourceRange = simple_subresource,
};
const vk::RenderingAttachmentInfo attachment = {
.imageView = frame->image_view,
.imageLayout = vk::ImageLayout::eColorAttachmentOptimal,
.loadOp = vk::AttachmentLoadOp::eClear,
.storeOp = vk::AttachmentStoreOp::eStore,
};
const vk::RenderingInfo rendering_info = {
.renderArea =
{
.extent = {frame->width, frame->height},
},
.layerCount = 1,
.colorAttachmentCount = 1u,
.pColorAttachments = &attachment,
};
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.imageMemoryBarrierCount = 1,
.pImageMemoryBarriers = &pre_barrier,
});
if (image_id != VideoCore::NULL_IMAGE_ID) {
auto& image = texture_cache.GetImage(image_id);
vk::Extent2D image_size = {image.info.size.width, image.info.size.height};
float ratio = (float)image_size.width / (float)image_size.height;
if (ratio != expected_ratio) {
expected_ratio = ratio;
}
image.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits2::eShaderRead, {},
cmdbuf);
VideoCore::ImageViewInfo info{};
info.format = GetFrameViewFormat(format);
// Exclude alpha from output frame to avoid blending with UI.
info.mapping = vk::ComponentMapping{
.r = vk::ComponentSwizzle::eIdentity,
.g = vk::ComponentSwizzle::eIdentity,
.b = vk::ComponentSwizzle::eIdentity,
.a = vk::ComponentSwizzle::eOne,
};
vk::ImageView imageView;
if (auto view = image.FindView(info)) {
imageView = *texture_cache.GetImageView(view).image_view;
} else {
imageView = *texture_cache.RegisterImageView(image_id, info).image_view;
}
if (vk_host_markers_enabled) {
cmdbuf.beginDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{
.pLabelName = "Host/FSR",
});
}
imageView = fsr_pass.Render(cmdbuf, imageView, image_size, {frame->width, frame->height},
fsr_settings, frame->is_hdr);
if (vk_host_markers_enabled) {
cmdbuf.endDebugUtilsLabelEXT();
cmdbuf.beginDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{
.pLabelName = "Host/Post processing",
});
}
pp_pass.Render(cmdbuf, imageView, image_size, *frame, pp_settings);
if (vk_host_markers_enabled) {
cmdbuf.endDebugUtilsLabelEXT();
}
DebugState.game_resolution = {image_size.width, image_size.height};
DebugState.output_resolution = {frame->width, frame->height};
} else {
// Fix display of garbage images on startup on some drivers
const std::array<vk::RenderingAttachmentInfo, 1> attachments = {{
{
.imageView = frame->image_view,
.imageLayout = vk::ImageLayout::eColorAttachmentOptimal,
.loadOp = vk::AttachmentLoadOp::eClear,
.storeOp = vk::AttachmentStoreOp::eStore,
},
}};
const vk::RenderingInfo rendering_info{
.renderArea{
.extent{frame->width, frame->height},
},
.layerCount = 1,
.colorAttachmentCount = attachments.size(),
.pColorAttachments = attachments.data(),
};
cmdbuf.beginRendering(rendering_info);
cmdbuf.endRendering();
}
const auto post_barrier =
vk::ImageMemoryBarrier2{.srcStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput,
.srcAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput,
.dstAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite,
.oldLayout = vk::ImageLayout::eColorAttachmentOptimal,
.newLayout = vk::ImageLayout::eGeneral,
.image = frame->image,
.subresourceRange{frame_subresources}};
cmdbuf.beginRendering(rendering_info);
cmdbuf.endRendering();
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.imageMemoryBarrierCount = 1,
.pImageMemoryBarriers = &post_barrier,
});
if (vk_host_markers_enabled) {
cmdbuf.endDebugUtilsLabelEXT();
}
// Flush frame creation commands.
frame->ready_semaphore = scheduler.GetMasterSemaphore()->Handle();
frame->ready_tick = scheduler.CurrentTick();

View File

@@ -66,44 +66,6 @@ public:
return window;
}
Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
VAddr cpu_address, bool is_eop) {
auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address};
const auto image_id = texture_cache.FindImage(desc);
texture_cache.UpdateImage(image_id, is_eop ? nullptr : &flip_scheduler);
return PrepareFrameInternal(image_id, attribute.attrib.pixel_format, is_eop);
}
Frame* PrepareBlankFrame(bool is_eop) {
return PrepareFrameInternal(VideoCore::NULL_IMAGE_ID,
Libraries::VideoOut::PixelFormat::Unknown, is_eop);
}
VideoCore::Image& RegisterVideoOutSurface(
const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) {
vo_buffers_addr.emplace_back(cpu_address);
auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address};
const auto image_id = texture_cache.FindImage(desc);
auto& image = texture_cache.GetImage(image_id);
image.usage.vo_surface = 1u;
return image;
}
bool IsVideoOutSurface(const AmdGpu::Liverpool::ColorBuffer& color_buffer) {
return std::ranges::find_if(vo_buffers_addr, [&](VAddr vo_buffer) {
return vo_buffer == color_buffer.Address();
}) != vo_buffers_addr.cend();
}
void Present(Frame* frame, bool is_reusing_frame = false);
void RecreateFrame(Frame* frame, u32 width, u32 height);
Frame* PrepareLastFrame();
void FlushDraw() {
SubmitInfo info{};
draw_scheduler.Flush(info);
}
Rasterizer& GetRasterizer() const {
return *rasterizer.get();
}
@@ -120,11 +82,33 @@ public:
pp_settings.hdr = enable ? 1 : 0;
}
bool IsVideoOutSurface(const AmdGpu::Liverpool::ColorBuffer& color_buffer) const {
return std::ranges::find(vo_buffers_addr, color_buffer.Address()) != vo_buffers_addr.cend();
}
VideoCore::Image& RegisterVideoOutSurface(
const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) {
vo_buffers_addr.emplace_back(cpu_address);
auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address};
const auto image_id = texture_cache.FindImage(desc);
auto& image = texture_cache.GetImage(image_id);
image.usage.vo_surface = 1u;
return image;
}
Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
VAddr cpu_address);
Frame* PrepareBlankFrame(bool present_thread);
void Present(Frame* frame, bool is_reusing_frame = false);
Frame* PrepareLastFrame();
private:
Frame* PrepareFrameInternal(VideoCore::ImageId image_id,
Libraries::VideoOut::PixelFormat format, bool is_eop = true);
Frame* GetRenderFrame();
void RecreateFrame(Frame* frame, u32 width, u32 height);
void SetExpectedGameSize(s32 width, s32 height);
private:

View File

@@ -107,128 +107,44 @@ bool Rasterizer::FilterDraw() {
return true;
}
RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) {
// Prefetch color and depth buffers to let texture cache handle possible overlaps with bound
// textures (e.g. mipgen)
RenderState state;
state.width = instance.GetMaxFramebufferWidth();
state.height = instance.GetMaxFramebufferHeight();
state.num_layers = std::numeric_limits<u32>::max();
state.num_color_attachments = std::bit_width(mrt_mask);
cb_descs.clear();
db_desc.reset();
void Rasterizer::PrepareRenderState(const GraphicsPipeline* pipeline) {
// Prefetch render targets to handle overlaps with bound textures (e.g. mipgen)
const auto& key = pipeline->GetGraphicsKey();
const auto& regs = liverpool->regs;
if (regs.color_control.degamma_enable) {
LOG_WARNING(Render_Vulkan, "Color buffers require gamma correction");
}
const bool skip_cb_binding =
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable;
for (s32 cb = 0; cb < state.num_color_attachments && !skip_cb_binding; ++cb) {
for (s32 cb = 0; cb < std::bit_width(key.mrt_mask); ++cb) {
auto& [image_id, desc] = cb_descs[cb];
const auto& col_buf = regs.color_buffers[cb];
if (!col_buf) {
state.color_attachments[cb].imageView = VK_NULL_HANDLE;
const u32 target_mask = regs.color_target_mask.GetMask(cb);
if (skip_cb_binding || !col_buf || !target_mask || (key.mrt_mask & (1 << cb)) == 0) {
image_id = {};
continue;
}
// Skip stale color buffers if shader doesn't output to them. Otherwise it will perform
// an unnecessary transition and may result in state conflict if the resource is already
// bound for reading.
if ((mrt_mask & (1 << cb)) == 0) {
state.color_attachments[cb].imageView = VK_NULL_HANDLE;
continue;
}
// If the color buffer is still bound but rendering to it is disabled by the target
// mask, we need to prevent the render area from being affected by unbound render target
// extents.
if (!regs.color_target_mask.GetMask(cb)) {
state.color_attachments[cb].imageView = VK_NULL_HANDLE;
continue;
}
const auto& hint = liverpool->last_cb_extent[cb];
auto& [image_id, desc] = cb_descs.emplace_back(std::piecewise_construct, std::tuple{},
std::tuple{col_buf, hint});
const auto& image_view = texture_cache.FindRenderTarget(desc);
image_id = bound_images.emplace_back(image_view.image_id);
std::construct_at(&desc, col_buf, hint);
image_id = bound_images.emplace_back(texture_cache.FindImage(desc));
auto& image = texture_cache.GetImage(image_id);
image.binding.is_target = 1u;
const auto slice = image_view.info.range.base.layer;
const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress(), slice);
texture_cache.TouchMeta(col_buf.CmaskAddress(), slice, false);
const auto mip = image_view.info.range.base.level;
state.width = std::min<u32>(state.width, std::max(image.info.size.width >> mip, 1u));
state.height = std::min<u32>(state.height, std::max(image.info.size.height >> mip, 1u));
state.num_layers = std::min<u32>(state.num_layers, image_view.info.range.extent.layers);
state.color_attachments[cb] = {
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eUndefined,
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue =
is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{},
};
}
if ((regs.depth_control.depth_enable && regs.depth_buffer.DepthValid()) ||
(regs.depth_control.stencil_enable && regs.depth_buffer.StencilValid())) {
const auto htile_address = regs.depth_htile_data_base.GetAddress();
const auto& hint = liverpool->last_db_extent;
auto& [image_id, desc] =
db_desc.emplace(std::piecewise_construct, std::tuple{},
std::tuple{regs.depth_buffer, regs.depth_view, regs.depth_control,
htile_address, hint});
const auto& image_view = texture_cache.FindDepthTarget(desc);
image_id = bound_images.emplace_back(image_view.image_id);
auto& [image_id, desc] = db_desc;
std::construct_at(&desc, regs.depth_buffer, regs.depth_view, regs.depth_control,
htile_address, hint);
image_id = bound_images.emplace_back(texture_cache.FindImage(desc));
auto& image = texture_cache.GetImage(image_id);
image.binding.is_target = 1u;
const auto slice = image_view.info.range.base.layer;
const bool is_depth_clear = regs.depth_render_control.depth_clear_enable ||
texture_cache.IsMetaCleared(htile_address, slice);
const bool is_stencil_clear = regs.depth_render_control.stencil_clear_enable;
ASSERT(desc.view_info.range.extent.levels == 1);
state.width = std::min<u32>(state.width, image.info.size.width);
state.height = std::min<u32>(state.height, image.info.size.height);
state.has_depth = regs.depth_buffer.DepthValid();
state.has_stencil = regs.depth_buffer.StencilValid();
state.num_layers = std::min<u32>(state.num_layers, image_view.info.range.extent.layers);
if (state.has_depth) {
state.depth_attachment = {
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eUndefined,
.loadOp =
is_depth_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear}},
};
}
if (state.has_stencil) {
state.stencil_attachment = {
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eUndefined,
.loadOp =
is_stencil_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue = vk::ClearValue{.depthStencil = {.stencil = regs.stencil_clear}},
};
}
texture_cache.TouchMeta(htile_address, slice, false);
} else {
db_desc.first = {};
}
if (state.num_layers == std::numeric_limits<u32>::max()) {
state.num_layers = 1;
}
return state;
}
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets(
@@ -253,28 +169,20 @@ void Rasterizer::EliminateFastClear() {
return;
}
VideoCore::TextureCache::RenderTargetDesc desc(col_buf, liverpool->last_cb_extent[0]);
const auto& image_view = texture_cache.FindRenderTarget(desc);
const auto image_id = texture_cache.FindImage(desc);
const auto& image_view = texture_cache.FindRenderTarget(image_id, desc);
if (!texture_cache.IsMetaCleared(col_buf.CmaskAddress(), col_buf.view.slice_start)) {
return;
}
for (u32 slice = col_buf.view.slice_start; slice <= col_buf.view.slice_max; ++slice) {
texture_cache.TouchMeta(col_buf.CmaskAddress(), slice, false);
}
auto& image = texture_cache.GetImage(image_view.image_id);
const vk::ImageSubresourceRange range = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = col_buf.view.slice_start,
.layerCount = col_buf.view.slice_max - col_buf.view.slice_start + 1,
};
scheduler.EndRendering();
auto& image = texture_cache.GetImage(image_id);
const auto clear_value = LiverpoolToVK::ColorBufferClearValue(col_buf);
ScopeMarkerBegin(fmt::format("EliminateFastClear:MRT={:#x}:M={:#x}", col_buf.Address(),
col_buf.CmaskAddress()));
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
scheduler.CommandBuffer().clearColorImage(image.image, image.last_state.layout,
LiverpoolToVK::ColorBufferClearValue(col_buf).color,
range);
image.Clear(clear_value, desc.view_info.range);
ScopeMarkerEnd();
}
@@ -293,18 +201,20 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
return;
}
auto state = PrepareRenderState(pipeline->GetMrtMask());
PrepareRenderState(pipeline);
if (!BindResources(pipeline)) {
return;
}
const auto state = BeginRendering(pipeline);
buffer_cache.BindVertexBuffers(*pipeline);
if (is_indexed) {
buffer_cache.BindIndexBuffer(index_offset);
}
BeginRendering(*pipeline, state);
UpdateDynamicState(*pipeline, is_indexed);
pipeline->BindResources(set_writes, buffer_barriers, push_data);
UpdateDynamicState(pipeline, is_indexed);
scheduler.BeginRendering(state);
const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex);
const auto& fetch_shader = pipeline->GetFetchShader();
@@ -339,10 +249,11 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
return;
}
auto state = PrepareRenderState(pipeline->GetMrtMask());
PrepareRenderState(pipeline);
if (!BindResources(pipeline)) {
return;
}
const auto state = BeginRendering(pipeline);
buffer_cache.BindVertexBuffers(*pipeline);
if (is_indexed) {
@@ -358,8 +269,9 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
std::tie(count_buffer, count_base) = buffer_cache.ObtainBuffer(count_address, 4, false);
}
BeginRendering(*pipeline, state);
UpdateDynamicState(*pipeline, is_indexed);
pipeline->BindResources(set_writes, buffer_barriers, push_data);
UpdateDynamicState(pipeline, is_indexed);
scheduler.BeginRendering(state);
// We can safely ignore both SGPR UD indices and results of fetch shader parsing, as vertex and
// instance offsets will be automatically applied by Vulkan from indirect args buffer.
@@ -411,6 +323,7 @@ void Rasterizer::DispatchDirect() {
}
scheduler.EndRendering();
pipeline->BindResources(set_writes, buffer_barriers, push_data);
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle());
@@ -434,10 +347,11 @@ void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) {
return;
}
scheduler.EndRendering();
const auto [buffer, base] = buffer_cache.ObtainBuffer(address + offset, size, false);
scheduler.EndRendering();
pipeline->BindResources(set_writes, buffer_barriers, push_data);
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle());
cmdbuf.dispatchIndirect(buffer->Handle(), base);
@@ -480,7 +394,7 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
// Bind resource buffers and textures.
Shader::Backend::Bindings binding{};
Shader::PushData push_data = MakeUserData(liverpool->regs);
push_data = MakeUserData(liverpool->regs);
for (const auto* stage : pipeline->GetStages()) {
if (!stage) {
continue;
@@ -505,8 +419,6 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
fault_process_pending |= uses_dma;
pipeline->BindResources(set_writes, buffer_barriers, push_data);
return true;
}
@@ -740,7 +652,7 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
} else {
if (auto& old_image = texture_cache.GetImage(image_id);
old_image.binding.needs_rebind) {
old_image.binding.Reset(); // clean up previous image binding state
old_image.binding = {};
image_id = texture_cache.FindImage(desc);
}
@@ -781,7 +693,7 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
image.usage.texture |= !is_storage;
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view,
image.last_state.layout);
image.backing->state.layout);
}
set_writes.push_back({
@@ -816,55 +728,78 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
}
}
void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline, RenderState& state) {
int cb_index = 0;
RenderState Rasterizer::BeginRendering(const GraphicsPipeline* pipeline) {
attachment_feedback_loop = false;
for (auto attach_idx = 0u; attach_idx < state.num_color_attachments; ++attach_idx) {
if (state.color_attachments[attach_idx].imageView == VK_NULL_HANDLE) {
const auto& regs = liverpool->regs;
const auto& key = pipeline->GetGraphicsKey();
RenderState state;
state.width = instance.GetMaxFramebufferWidth();
state.height = instance.GetMaxFramebufferHeight();
state.num_layers = std::numeric_limits<u32>::max();
state.num_color_attachments = std::bit_width(key.mrt_mask);
for (auto cb = 0u; cb < state.num_color_attachments; ++cb) {
auto& [image_id, desc] = cb_descs[cb];
if (!image_id) {
continue;
}
auto& [image_id, desc] = cb_descs[cb_index++];
if (auto& old_img = texture_cache.GetImage(image_id); old_img.binding.needs_rebind) {
auto& view = texture_cache.FindRenderTarget(desc);
ASSERT(view.image_id != image_id);
image_id = bound_images.emplace_back(view.image_id);
auto& image = texture_cache.GetImage(view.image_id);
state.color_attachments[attach_idx].imageView = *view.image_view;
state.color_attachments[attach_idx].imageLayout = image.last_state.layout;
const auto mip = view.info.range.base.level;
state.width = std::min<u32>(state.width, std::max(image.info.size.width >> mip, 1u));
state.height = std::min<u32>(state.height, std::max(image.info.size.height >> mip, 1u));
auto* image = &texture_cache.GetImage(image_id);
if (image->binding.needs_rebind) {
image_id = bound_images.emplace_back(texture_cache.FindImage(desc));
image = &texture_cache.GetImage(image_id);
}
auto& image = texture_cache.GetImage(image_id);
if (image.binding.is_bound) {
ASSERT_MSG(!image.binding.force_general,
texture_cache.UpdateImage(image_id);
image->SetBackingSamples(key.color_samples[cb]);
const auto& image_view = texture_cache.FindRenderTarget(image_id, desc);
const auto slice = image_view.info.range.base.layer;
const auto mip = image_view.info.range.base.level;
const auto& col_buf = regs.color_buffers[cb];
const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress(), slice);
texture_cache.TouchMeta(col_buf.CmaskAddress(), slice, false);
if (image->binding.is_bound) {
ASSERT_MSG(!image->binding.force_general,
"Having image both as storage and render target is unsupported");
image.Transit(instance.IsAttachmentFeedbackLoopLayoutSupported()
? vk::ImageLayout::eAttachmentFeedbackLoopOptimalEXT
: vk::ImageLayout::eGeneral,
vk::AccessFlagBits2::eColorAttachmentWrite, {});
image->Transit(instance.IsAttachmentFeedbackLoopLayoutSupported()
? vk::ImageLayout::eAttachmentFeedbackLoopOptimalEXT
: vk::ImageLayout::eGeneral,
vk::AccessFlagBits2::eColorAttachmentWrite, {});
attachment_feedback_loop = true;
} else {
image.Transit(vk::ImageLayout::eColorAttachmentOptimal,
vk::AccessFlagBits2::eColorAttachmentWrite |
vk::AccessFlagBits2::eColorAttachmentRead,
desc.view_info.range);
image->Transit(vk::ImageLayout::eColorAttachmentOptimal,
vk::AccessFlagBits2::eColorAttachmentWrite |
vk::AccessFlagBits2::eColorAttachmentRead,
desc.view_info.range);
}
image.usage.render_target = 1u;
state.color_attachments[attach_idx].imageLayout = image.last_state.layout;
state.width = std::min<u32>(state.width, std::max(image->info.size.width >> mip, 1u));
state.height = std::min<u32>(state.height, std::max(image->info.size.height >> mip, 1u));
state.num_layers = std::min<u32>(state.num_layers, image_view.info.range.extent.layers);
state.color_attachments[cb] = {
.imageView = *image_view.image_view,
.imageLayout = image->backing->state.layout,
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue =
is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{},
};
image->usage.render_target = 1u;
}
if (db_desc) {
const auto& image_id = std::get<0>(*db_desc);
const auto& desc = std::get<1>(*db_desc);
if (auto image_id = db_desc.first; image_id) {
auto& desc = db_desc.second;
const auto htile_address = regs.depth_htile_data_base.GetAddress();
const auto& image_view = texture_cache.FindDepthTarget(image_id, desc);
auto& image = texture_cache.GetImage(image_id);
ASSERT(image.binding.needs_rebind == 0);
const bool has_stencil = image.usage.stencil;
if (has_stencil) {
image.aspect_mask |= vk::ImageAspectFlagBits::eStencil;
}
const auto slice = image_view.info.range.base.layer;
const bool is_depth_clear = regs.depth_render_control.depth_clear_enable ||
texture_cache.IsMetaCleared(htile_address, slice);
const bool is_stencil_clear = regs.depth_render_control.stencil_clear_enable;
texture_cache.TouchMeta(htile_address, slice, false);
ASSERT(desc.view_info.range.extent.levels == 1 && !image.binding.needs_rebind);
const bool has_stencil = image.info.props.has_stencil;
const auto new_layout = desc.view_info.is_storage
? has_stencil ? vk::ImageLayout::eDepthStencilAttachmentOptimal
: vk::ImageLayout::eDepthAttachmentOptimal
@@ -874,13 +809,41 @@ void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline, RenderState& s
vk::AccessFlagBits2::eDepthStencilAttachmentWrite |
vk::AccessFlagBits2::eDepthStencilAttachmentRead,
desc.view_info.range);
state.depth_attachment.imageLayout = image.last_state.layout;
state.stencil_attachment.imageLayout = image.last_state.layout;
state.width = std::min<u32>(state.width, image.info.size.width);
state.height = std::min<u32>(state.height, image.info.size.height);
state.has_depth = regs.depth_buffer.DepthValid();
state.has_stencil = regs.depth_buffer.StencilValid();
state.num_layers = std::min<u32>(state.num_layers, image_view.info.range.extent.layers);
if (state.has_depth) {
state.depth_attachment = {
.imageView = *image_view.image_view,
.imageLayout = image.backing->state.layout,
.loadOp =
is_depth_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear}},
};
}
if (state.has_stencil) {
state.stencil_attachment = {
.imageView = *image_view.image_view,
.imageLayout = image.backing->state.layout,
.loadOp =
is_stencil_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue = vk::ClearValue{.depthStencil = {.stencil = regs.stencil_clear}},
};
}
image.usage.depth_target = true;
image.usage.stencil = has_stencil;
}
scheduler.BeginRendering(state);
if (state.num_layers == std::numeric_limits<u32>::max()) {
state.num_layers = 1;
}
return state;
}
void Rasterizer::Resolve() {
@@ -904,66 +867,7 @@ void Rasterizer::Resolve() {
ScopeMarkerBegin(fmt::format("Resolve:MRT0={:#x}:MRT1={:#x}",
liverpool->regs.color_buffers[0].Address(),
liverpool->regs.color_buffers[1].Address()));
mrt0_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead,
mrt0_range);
mrt1_image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite,
mrt1_range);
if (mrt0_image.info.num_samples == 1) {
// Vulkan does not allow resolve from a single sample image, so change it to a copy.
// Note that resolving a single-sampled image doesn't really make sense, but a game might do
// it.
vk::ImageCopy region = {
.srcSubresource =
{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = mrt0_range.base.layer,
.layerCount = mrt0_range.extent.layers,
},
.srcOffset = {0, 0, 0},
.dstSubresource =
{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = mrt1_range.base.layer,
.layerCount = mrt1_range.extent.layers,
},
.dstOffset = {0, 0, 0},
.extent = {mrt1_image.info.size.width, mrt1_image.info.size.height, 1},
};
scheduler.CommandBuffer().copyImage(mrt0_image.image, vk::ImageLayout::eTransferSrcOptimal,
mrt1_image.image, vk::ImageLayout::eTransferDstOptimal,
region);
} else {
vk::ImageResolve region = {
.srcSubresource =
{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = mrt0_range.base.layer,
.layerCount = mrt0_range.extent.layers,
},
.srcOffset = {0, 0, 0},
.dstSubresource =
{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = mrt1_range.base.layer,
.layerCount = mrt1_range.extent.layers,
},
.dstOffset = {0, 0, 0},
.extent = {mrt1_image.info.size.width, mrt1_image.info.size.height, 1},
};
scheduler.CommandBuffer().resolveImage(
mrt0_image.image, vk::ImageLayout::eTransferSrcOptimal, mrt1_image.image,
vk::ImageLayout::eTransferDstOptimal, region);
}
mrt1_image.flags |= VideoCore::ImageFlagBits::GpuModified;
mrt1_image.flags &= ~VideoCore::ImageFlagBits::Dirty;
mrt1_image.Resolve(mrt0_image, mrt0_range, mrt1_range);
ScopeMarkerEnd();
}
@@ -1020,9 +924,9 @@ void Rasterizer::DepthStencilCopy(bool is_depth, bool is_stencil) {
.dstOffset = {0, 0, 0},
.extent = {write_image.info.size.width, write_image.info.size.height, 1},
};
scheduler.CommandBuffer().copyImage(read_image.image, vk::ImageLayout::eTransferSrcOptimal,
write_image.image, vk::ImageLayout::eTransferDstOptimal,
region);
scheduler.CommandBuffer().copyImage(read_image.GetImage(), vk::ImageLayout::eTransferSrcOptimal,
write_image.GetImage(),
vk::ImageLayout::eTransferDstOptimal, region);
ScopeMarkerEnd();
}
@@ -1090,18 +994,14 @@ void Rasterizer::UnmapMemory(VAddr addr, u64 size) {
}
}
void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline, const bool is_indexed) const {
void Rasterizer::UpdateDynamicState(const GraphicsPipeline* pipeline, const bool is_indexed) const {
UpdateViewportScissorState();
UpdateDepthStencilState();
UpdatePrimitiveState(is_indexed);
UpdateRasterizationState();
UpdateColorBlendingState(pipeline);
auto& dynamic_state = scheduler.GetDynamicState();
dynamic_state.SetBlendConstants(liverpool->regs.blend_constants);
dynamic_state.SetColorWriteMasks(pipeline.GetWriteMasks());
dynamic_state.SetAttachmentFeedbackLoopEnabled(attachment_feedback_loop);
// Commit new dynamic state to the command buffer.
dynamic_state.Commit(instance, scheduler.CommandBuffer());
}
@@ -1320,6 +1220,14 @@ void Rasterizer::UpdateRasterizationState() const {
dynamic_state.SetLineWidth(regs.line_control.Width());
}
void Rasterizer::UpdateColorBlendingState(const GraphicsPipeline* pipeline) const {
const auto& regs = liverpool->regs;
auto& dynamic_state = scheduler.GetDynamicState();
dynamic_state.SetBlendConstants(regs.blend_constants);
dynamic_state.SetColorWriteMasks(pipeline->GetGraphicsKey().write_masks);
dynamic_state.SetAttachmentFeedbackLoopEnabled(attachment_feedback_loop);
}
void Rasterizer::ScopeMarkerBegin(const std::string_view& str, bool from_guest) {
if ((from_guest && !Config::getVkGuestMarkersEnabled()) ||
(!from_guest && !Config::getVkHostMarkersEnabled())) {

View File

@@ -3,7 +3,6 @@
#pragma once
#include <shared_mutex>
#include "common/recursive_lock.h"
#include "common/shared_first_mutex.h"
#include "video_core/buffer_cache/buffer_cache.h"
@@ -84,29 +83,29 @@ public:
}
private:
RenderState PrepareRenderState(u32 mrt_mask);
void BeginRendering(const GraphicsPipeline& pipeline, RenderState& state);
void PrepareRenderState(const GraphicsPipeline* pipeline);
RenderState BeginRendering(const GraphicsPipeline* pipeline);
void Resolve();
void DepthStencilCopy(bool is_depth, bool is_stencil);
void EliminateFastClear();
void UpdateDynamicState(const GraphicsPipeline& pipeline, bool is_indexed) const;
void UpdateDynamicState(const GraphicsPipeline* pipeline, bool is_indexed) const;
void UpdateViewportScissorState() const;
void UpdateDepthStencilState() const;
void UpdatePrimitiveState(bool is_indexed) const;
void UpdateRasterizationState() const;
void UpdateColorBlendingState(const GraphicsPipeline* pipeline) const;
bool FilterDraw();
void BindBuffers(const Shader::Info& stage, Shader::Backend::Bindings& binding,
Shader::PushData& push_data);
void BindTextures(const Shader::Info& stage, Shader::Backend::Bindings& binding);
bool BindResources(const Pipeline* pipeline);
void ResetBindings() {
for (auto& image_id : bound_images) {
texture_cache.GetImage(image_id).binding.Reset();
texture_cache.GetImage(image_id).binding = {};
}
bound_images.clear();
}
@@ -128,16 +127,17 @@ private:
Common::SharedFirstMutex mapped_ranges_mutex;
PipelineCache pipeline_cache;
boost::container::static_vector<
std::pair<VideoCore::ImageId, VideoCore::TextureCache::RenderTargetDesc>, 8>
cb_descs;
std::optional<std::pair<VideoCore::ImageId, VideoCore::TextureCache::DepthTargetDesc>> db_desc;
using RenderTargetInfo =
std::pair<VideoCore::ImageId, VideoCore::TextureCache::RenderTargetDesc>;
std::array<RenderTargetInfo, Liverpool::NumColorBuffers> cb_descs;
std::pair<VideoCore::ImageId, VideoCore::TextureCache::DepthTargetDesc> db_desc;
boost::container::static_vector<vk::DescriptorImageInfo, Shader::NumImages> image_infos;
boost::container::static_vector<vk::DescriptorBufferInfo, Shader::NumBuffers> buffer_infos;
boost::container::static_vector<VideoCore::ImageId, Shader::NumImages> bound_images;
Pipeline::DescriptorWrites set_writes;
Pipeline::BufferBarriers buffer_barriers;
Shader::PushData push_data;
using BufferBindingInfo = std::tuple<VideoCore::BufferId, AmdGpu::Buffer, u64>;
boost::container::static_vector<BufferBindingInfo, Shader::NumBuffers> buffer_bindings;

View File

@@ -4,7 +4,6 @@
#include <cstddef>
#include <optional>
#include "common/assert.h"
#include "common/scope_exit.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"

View File

@@ -9,6 +9,7 @@
#include "video_core/host_shaders/color_to_ms_depth_frag.h"
#include "video_core/host_shaders/fs_tri_vert.h"
#include "video_core/host_shaders/ms_image_blit_frag.h"
namespace VideoCore {
@@ -35,19 +36,23 @@ BlitHelper::BlitHelper(const Vulkan::Instance& instance_, Vulkan::Scheduler& sch
CreatePipelineLayouts();
}
BlitHelper::~BlitHelper() = default;
void BlitHelper::BlitColorToMsDepth(Image& source, Image& dest) {
source.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits2::eShaderRead, {});
dest.Transit(vk::ImageLayout::eDepthAttachmentOptimal,
vk::AccessFlagBits2::eDepthStencilAttachmentWrite, {});
BlitHelper::~BlitHelper() {
const auto device = instance.GetDevice();
device.destroy(fs_tri_vertex);
device.destroy(color_to_ms_depth_frag);
device.destroy(src_msaa_copy_frag);
device.destroy(src_non_msaa_copy_frag);
}
void BlitHelper::ReinterpretColorAsMsDepth(u32 width, u32 height, u32 num_samples,
vk::Format src_pixel_format, vk::Format dst_pixel_format,
vk::Image source, vk::Image dest) {
const vk::ImageViewUsageCreateInfo color_usage_ci{.usage = vk::ImageUsageFlagBits::eSampled};
const vk::ImageViewCreateInfo color_view_ci = {
.pNext = &color_usage_ci,
.image = source.image,
.image = source,
.viewType = vk::ImageViewType::e2D,
.format = source.info.pixel_format,
.format = src_pixel_format,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0U,
@@ -64,9 +69,9 @@ void BlitHelper::BlitColorToMsDepth(Image& source, Image& dest) {
.usage = vk::ImageUsageFlagBits::eDepthStencilAttachment};
const vk::ImageViewCreateInfo depth_view_ci = {
.pNext = &depth_usage_ci,
.image = dest.image,
.image = dest,
.viewType = vk::ImageViewType::e2D,
.format = dest.info.pixel_format,
.format = dst_pixel_format,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eDepth,
.baseMipLevel = 0U,
@@ -86,8 +91,8 @@ void BlitHelper::BlitColorToMsDepth(Image& source, Image& dest) {
Vulkan::RenderState state{};
state.has_depth = true;
state.width = dest.info.size.width;
state.height = dest.info.size.height;
state.width = width;
state.height = height;
state.depth_attachment = vk::RenderingAttachmentInfo{
.imageView = depth_view,
.imageLayout = vk::ImageLayout::eDepthAttachmentOptimal,
@@ -114,9 +119,13 @@ void BlitHelper::BlitColorToMsDepth(Image& source, Image& dest) {
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *single_texture_pl_layout, 0U,
texture_write);
const DepthPipelineKey key{dest.info.num_samples, dest.info.pixel_format};
const vk::Pipeline depth_pipeline = GetDepthToMsPipeline(key);
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, depth_pipeline);
const MsPipelineKey key{num_samples, dst_pixel_format, false};
auto it = std::ranges::find(color_to_ms_depth_pl, key, &MsPipeline::first);
if (it == color_to_ms_depth_pl.end()) {
CreateColorToMSDepthPipeline(key);
it = --color_to_ms_depth_pl.end();
}
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, *it->second);
const vk::Viewport viewport = {
.x = 0,
@@ -136,24 +145,122 @@ void BlitHelper::BlitColorToMsDepth(Image& source, Image& dest) {
cmdbuf.draw(3, 1, 0, 0);
scheduler.EndRendering();
scheduler.GetDynamicState().Invalidate();
}
vk::Pipeline BlitHelper::GetDepthToMsPipeline(const DepthPipelineKey& key) {
auto it = std::ranges::find(color_to_ms_depth_pl, key, &DepthPipeline::first);
if (it != color_to_ms_depth_pl.end()) {
return *it->second;
void BlitHelper::CopyBetweenMsImages(u32 width, u32 height, u32 num_samples,
vk::Format pixel_format, bool src_msaa, vk::Image source,
vk::Image dest) {
const vk::ImageViewUsageCreateInfo src_usage_ci{.usage = vk::ImageUsageFlagBits::eSampled};
const vk::ImageViewCreateInfo src_view_ci = {
.pNext = &src_usage_ci,
.image = source,
.viewType = vk::ImageViewType::e2D,
.format = pixel_format,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0U,
.levelCount = 1U,
.baseArrayLayer = 0U,
.layerCount = 1U,
},
};
const auto [src_view_result, src_view] = instance.GetDevice().createImageView(src_view_ci);
ASSERT_MSG(src_view_result == vk::Result::eSuccess, "Failed to create image view: {}",
vk::to_string(src_view_result));
const vk::ImageViewUsageCreateInfo dst_usage_ci{.usage =
vk::ImageUsageFlagBits::eColorAttachment};
const vk::ImageViewCreateInfo dst_view_ci = {
.pNext = &dst_usage_ci,
.image = dest,
.viewType = vk::ImageViewType::e2D,
.format = pixel_format,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0U,
.levelCount = 1U,
.baseArrayLayer = 0U,
.layerCount = 1U,
},
};
const auto [dst_view_result, dst_view] = instance.GetDevice().createImageView(dst_view_ci);
ASSERT_MSG(dst_view_result == vk::Result::eSuccess, "Failed to create image view: {}",
vk::to_string(dst_view_result));
scheduler.DeferOperation([device = instance.GetDevice(), src_view, dst_view] {
device.destroyImageView(src_view);
device.destroyImageView(dst_view);
});
Vulkan::RenderState state{};
state.width = width;
state.height = height;
state.color_attachments[state.num_color_attachments++] = vk::RenderingAttachmentInfo{
.imageView = dst_view,
.imageLayout = vk::ImageLayout::eColorAttachmentOptimal,
.loadOp = vk::AttachmentLoadOp::eDontCare,
.storeOp = vk::AttachmentStoreOp::eStore,
};
scheduler.BeginRendering(state);
const auto cmdbuf = scheduler.CommandBuffer();
const vk::DescriptorImageInfo image_info = {
.sampler = VK_NULL_HANDLE,
.imageView = src_view,
.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal,
};
const vk::WriteDescriptorSet texture_write = {
.dstSet = VK_NULL_HANDLE,
.dstBinding = 0U,
.dstArrayElement = 0U,
.descriptorCount = 1U,
.descriptorType = vk::DescriptorType::eSampledImage,
.pImageInfo = &image_info,
};
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *single_texture_pl_layout, 0U,
texture_write);
const MsPipelineKey key{num_samples, pixel_format, src_msaa};
auto it = std::ranges::find(ms_image_copy_pl, key, &MsPipeline::first);
if (it == ms_image_copy_pl.end()) {
CreateMsCopyPipeline(key);
it = --ms_image_copy_pl.end();
}
CreateColorToMSDepthPipeline(key);
return *color_to_ms_depth_pl.back().second;
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, *it->second);
const vk::Viewport viewport = {
.x = 0,
.y = 0,
.width = float(state.width),
.height = float(state.height),
.minDepth = 0.f,
.maxDepth = 1.f,
};
cmdbuf.setViewportWithCount(viewport);
const vk::Rect2D scissor = {
.offset = {0, 0},
.extent = {state.width, state.height},
};
cmdbuf.setScissorWithCount(scissor);
cmdbuf.draw(3, 1, 0, 0);
scheduler.EndRendering();
scheduler.GetDynamicState().Invalidate();
}
void BlitHelper::CreateShaders() {
fs_tri_vertex = Vulkan::Compile(HostShaders::FS_TRI_VERT, vk::ShaderStageFlagBits::eVertex,
instance.GetDevice());
color_to_ms_depth_frag =
Vulkan::Compile(HostShaders::COLOR_TO_MS_DEPTH_FRAG, vk::ShaderStageFlagBits::eFragment,
instance.GetDevice());
const auto device = instance.GetDevice();
fs_tri_vertex =
Vulkan::Compile(HostShaders::FS_TRI_VERT, vk::ShaderStageFlagBits::eVertex, device);
color_to_ms_depth_frag = Vulkan::Compile(HostShaders::COLOR_TO_MS_DEPTH_FRAG,
vk::ShaderStageFlagBits::eFragment, device);
src_msaa_copy_frag = Vulkan::Compile(HostShaders::MS_IMAGE_BLIT_FRAG,
vk::ShaderStageFlagBits::eFragment, device, {"SRC_MSAA"});
src_non_msaa_copy_frag = Vulkan::Compile(HostShaders::MS_IMAGE_BLIT_FRAG,
vk::ShaderStageFlagBits::eFragment, device);
}
void BlitHelper::CreatePipelineLayouts() {
@@ -186,7 +293,7 @@ void BlitHelper::CreatePipelineLayouts() {
single_texture_pl_layout = std::move(pipeline_layout);
}
void BlitHelper::CreateColorToMSDepthPipeline(const DepthPipelineKey& key) {
void BlitHelper::CreateColorToMSDepthPipeline(const MsPipelineKey& key) {
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
.topology = vk::PrimitiveTopology::eTriangleList,
};
@@ -220,7 +327,7 @@ void BlitHelper::CreateColorToMSDepthPipeline(const DepthPipelineKey& key) {
const vk::PipelineRenderingCreateInfo pipeline_rendering_ci = {
.colorAttachmentCount = 0U,
.pColorAttachmentFormats = nullptr,
.depthAttachmentFormat = key.depth_format,
.depthAttachmentFormat = key.attachment_format,
.stencilAttachmentFormat = vk::Format::eUndefined,
};
@@ -253,4 +360,83 @@ void BlitHelper::CreateColorToMSDepthPipeline(const DepthPipelineKey& key) {
color_to_ms_depth_pl.emplace_back(key, std::move(pipeline));
}
void BlitHelper::CreateMsCopyPipeline(const MsPipelineKey& key) {
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
.topology = vk::PrimitiveTopology::eTriangleList,
};
const vk::PipelineMultisampleStateCreateInfo multisampling = {
.rasterizationSamples = ToSampleCount(key.num_samples),
};
const vk::PipelineDepthStencilStateCreateInfo depth_state = {
.depthTestEnable = false,
.depthWriteEnable = false,
.depthCompareOp = vk::CompareOp::eAlways,
};
const std::array dynamic_states = {vk::DynamicState::eViewportWithCount,
vk::DynamicState::eScissorWithCount};
const vk::PipelineDynamicStateCreateInfo dynamic_info = {
.dynamicStateCount = static_cast<u32>(dynamic_states.size()),
.pDynamicStates = dynamic_states.data(),
};
std::array<vk::PipelineShaderStageCreateInfo, 2> shader_stages;
shader_stages[0] = {
.stage = vk::ShaderStageFlagBits::eVertex,
.module = fs_tri_vertex,
.pName = "main",
};
shader_stages[1] = {
.stage = vk::ShaderStageFlagBits::eFragment,
.module = key.src_msaa ? src_msaa_copy_frag : src_non_msaa_copy_frag,
.pName = "main",
};
const vk::PipelineRenderingCreateInfo pipeline_rendering_ci = {
.colorAttachmentCount = 1u,
.pColorAttachmentFormats = &key.attachment_format,
.depthAttachmentFormat = vk::Format::eUndefined,
.stencilAttachmentFormat = vk::Format::eUndefined,
};
const vk::PipelineColorBlendAttachmentState attachment = {
.blendEnable = false,
.colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG |
vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA,
};
const vk::PipelineColorBlendStateCreateInfo color_blending = {
.logicOpEnable = false,
.logicOp = vk::LogicOp::eCopy,
.attachmentCount = 1u,
.pAttachments = &attachment,
};
const vk::PipelineViewportStateCreateInfo viewport_info{};
const vk::PipelineVertexInputStateCreateInfo vertex_input_info{};
const vk::PipelineRasterizationStateCreateInfo raster_state{.lineWidth = 1.f};
const vk::GraphicsPipelineCreateInfo pipeline_info = {
.pNext = &pipeline_rendering_ci,
.stageCount = static_cast<u32>(shader_stages.size()),
.pStages = shader_stages.data(),
.pVertexInputState = &vertex_input_info,
.pInputAssemblyState = &input_assembly,
.pViewportState = &viewport_info,
.pRasterizationState = &raster_state,
.pMultisampleState = &multisampling,
.pDepthStencilState = &depth_state,
.pColorBlendState = &color_blending,
.pDynamicState = &dynamic_info,
.layout = *single_texture_pl_layout,
};
auto [pipeline_result, pipeline] =
instance.GetDevice().createGraphicsPipelineUnique(VK_NULL_HANDLE, pipeline_info);
ASSERT_MSG(pipeline_result == vk::Result::eSuccess, "Failed to create graphics pipeline: {}",
vk::to_string(pipeline_result));
Vulkan::SetObjectName(instance.GetDevice(), *pipeline, "Non MS Image to MS Image {}",
key.num_samples);
ms_image_copy_pl.emplace_back(key, std::move(pipeline));
}
} // namespace VideoCore

View File

@@ -17,6 +17,7 @@ namespace VideoCore {
class Image;
class ImageView;
struct ImageInfo;
class BlitHelper {
static constexpr size_t MaxMsPipelines = 6;
@@ -25,20 +26,26 @@ public:
explicit BlitHelper(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler);
~BlitHelper();
void BlitColorToMsDepth(Image& source, Image& dest);
void ReinterpretColorAsMsDepth(u32 width, u32 height, u32 num_samples,
vk::Format src_pixel_format, vk::Format dst_pixel_format,
vk::Image source, vk::Image dest);
void CopyBetweenMsImages(u32 width, u32 height, u32 num_samples, vk::Format pixel_format,
bool src_msaa, vk::Image source, vk::Image dest);
private:
void CreateShaders();
void CreatePipelineLayouts();
struct DepthPipelineKey {
struct MsPipelineKey {
u32 num_samples;
vk::Format depth_format;
vk::Format attachment_format;
bool src_msaa;
auto operator<=>(const DepthPipelineKey&) const noexcept = default;
auto operator<=>(const MsPipelineKey&) const noexcept = default;
};
vk::Pipeline GetDepthToMsPipeline(const DepthPipelineKey& key);
void CreateColorToMSDepthPipeline(const DepthPipelineKey& key);
void CreateColorToMSDepthPipeline(const MsPipelineKey& key);
void CreateMsCopyPipeline(const MsPipelineKey& key);
private:
const Vulkan::Instance& instance;
@@ -47,9 +54,12 @@ private:
vk::UniquePipelineLayout single_texture_pl_layout;
vk::ShaderModule fs_tri_vertex;
vk::ShaderModule color_to_ms_depth_frag;
vk::ShaderModule src_msaa_copy_frag;
vk::ShaderModule src_non_msaa_copy_frag;
using DepthPipeline = std::pair<DepthPipelineKey, vk::UniquePipeline>;
std::vector<DepthPipeline> color_to_ms_depth_pl{};
using MsPipeline = std::pair<MsPipelineKey, vk::UniquePipeline>;
std::vector<MsPipeline> color_to_ms_depth_pl;
std::vector<MsPipeline> ms_image_copy_pl;
};
} // namespace VideoCore

View File

@@ -6,6 +6,7 @@
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture_cache/blit_helper.h"
#include "video_core/texture_cache/image.h"
#include <vk_mem_alloc.h>
@@ -75,11 +76,6 @@ static vk::FormatFeatureFlags2 FormatFeatureFlags(const vk::ImageUsageFlags usag
return feature_flags;
}
UniqueImage::UniqueImage() {}
UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_)
: device{device_}, allocator{allocator_} {}
UniqueImage::~UniqueImage() {
if (image) {
vmaDestroyImage(allocator, image, allocation);
@@ -87,9 +83,8 @@ UniqueImage::~UniqueImage() {
}
void UniqueImage::Create(const vk::ImageCreateInfo& image_ci) {
if (image) {
vmaDestroyImage(allocator, image, allocation);
}
this->image_ci = image_ci;
ASSERT(!image);
const VmaAllocationCreateInfo alloc_info = {
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT,
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
@@ -109,9 +104,10 @@ void UniqueImage::Create(const vk::ImageCreateInfo& image_ci) {
}
Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
BlitHelper& blit_helper_, Common::SlotVector<ImageView>& slot_image_views_,
const ImageInfo& info_)
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
image{instance->GetDevice(), instance->GetAllocator()} {
: instance{&instance_}, scheduler{&scheduler_}, blit_helper{&blit_helper_},
slot_image_views{&slot_image_views_}, info{info_} {
if (info.pixel_format == vk::Format::eUndefined) {
return;
}
@@ -130,20 +126,11 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
usage_flags = ImageUsageFlags(instance, info);
format_features = FormatFeatureFlags(usage_flags);
switch (info.pixel_format) {
case vk::Format::eD16Unorm:
case vk::Format::eD32Sfloat:
case vk::Format::eX8D24UnormPack32:
if (info.props.is_depth) {
aspect_mask = vk::ImageAspectFlagBits::eDepth;
break;
case vk::Format::eD16UnormS8Uint:
case vk::Format::eD24UnormS8Uint:
case vk::Format::eD32SfloatS8Uint:
aspect_mask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil;
break;
default:
break;
if (info.props.has_stencil) {
aspect_mask |= vk::ImageAspectFlagBits::eStencil;
}
}
constexpr auto tiling = vk::ImageTiling::eOptimal;
@@ -162,10 +149,9 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
vk::to_string(supported_format), vk::to_string(format_info.type),
vk::to_string(format_info.flags), vk::to_string(format_info.usage));
}
const auto supported_samples =
image_format_properties.result == vk::Result::eSuccess
? image_format_properties.value.imageFormatProperties.sampleCounts
: vk::SampleCountFlagBits::e1;
supported_samples = image_format_properties.result == vk::Result::eSuccess
? image_format_properties.value.imageFormatProperties.sampleCounts
: vk::SampleCountFlagBits::e1;
const vk::ImageCreateInfo image_ci = {
.flags = flags,
@@ -184,22 +170,48 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
.initialLayout = vk::ImageLayout::eUndefined,
};
image.Create(image_ci);
backing = &backing_images.emplace_back();
backing->num_samples = info.num_samples;
backing->image = UniqueImage{instance->GetDevice(), instance->GetAllocator()};
backing->image.Create(image_ci);
Vulkan::SetObjectName(instance->GetDevice(), (vk::Image)image, "Image {}x{}x{} {} {:#x}:{:#x}",
info.size.width, info.size.height, info.size.depth,
AmdGpu::NameOf(info.tile_mode), info.guest_address, info.guest_size);
Vulkan::SetObjectName(instance->GetDevice(), GetImage(),
"Image {}x{}x{} {} {} {:#x}:{:#x} L:{} M:{} S:{}", info.size.width,
info.size.height, info.size.depth, AmdGpu::NameOf(info.tile_mode),
vk::to_string(info.pixel_format), info.guest_address, info.guest_size,
info.resources.layers, info.resources.levels, info.num_samples);
}
boost::container::small_vector<vk::ImageMemoryBarrier2, 32> Image::GetBarriers(
vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
vk::PipelineStageFlags2 dst_stage, std::optional<SubresourceRange> subres_range) {
Image::~Image() = default;
ImageView& Image::FindView(const ImageViewInfo& view_info, bool ensure_guest_samples) {
if (ensure_guest_samples && backing->num_samples > 1 != info.num_samples > 1) {
SetBackingSamples(info.num_samples);
}
const auto& view_infos = backing->image_view_infos;
const auto it = std::ranges::find(view_infos, view_info);
if (it != view_infos.end()) {
const auto view_id = backing->image_view_ids[std::distance(view_infos.begin(), it)];
return (*slot_image_views)[view_id];
}
const auto view_id = slot_image_views->insert(*instance, view_info, *this);
backing->image_view_infos.emplace_back(view_info);
backing->image_view_ids.emplace_back(view_id);
return (*slot_image_views)[view_id];
}
Image::Barriers Image::GetBarriers(vk::ImageLayout dst_layout, vk::AccessFlags2 dst_mask,
vk::PipelineStageFlags2 dst_stage,
std::optional<SubresourceRange> subres_range) {
auto& last_state = backing->state;
auto& subresource_states = backing->subresource_states;
const bool needs_partial_transition =
subres_range &&
(subres_range->base != SubresourceBase{} || subres_range->extent != info.resources);
const bool partially_transited = !subresource_states.empty();
boost::container::small_vector<vk::ImageMemoryBarrier2, 32> barriers{};
Barriers barriers;
if (needs_partial_transition || partially_transited) {
if (!partially_transited) {
subresource_states.resize(info.resources.levels * info.resources.layers);
@@ -238,7 +250,7 @@ boost::container::small_vector<vk::ImageMemoryBarrier2, 32> Image::GetBarriers(
.newLayout = dst_layout,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.image = GetImage(),
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = mip,
@@ -271,7 +283,7 @@ boost::container::small_vector<vk::ImageMemoryBarrier2, 32> Image::GetBarriers(
.newLayout = dst_layout,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.image = GetImage(),
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
@@ -289,7 +301,7 @@ boost::container::small_vector<vk::ImageMemoryBarrier2, 32> Image::GetBarriers(
return barriers;
}
void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
void Image::Transit(vk::ImageLayout dst_layout, vk::AccessFlags2 dst_mask,
std::optional<SubresourceRange> range, vk::CommandBuffer cmdbuf /*= {}*/) {
// Adjust pipieline stage
const vk::PipelineStageFlags2 dst_pl_stage =
@@ -314,33 +326,91 @@ void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> d
});
}
void Image::Upload(vk::Buffer buffer, u64 offset) {
void Image::Upload(std::span<const vk::BufferImageCopy> upload_copies, vk::Buffer buffer,
u64 offset) {
SetBackingSamples(info.num_samples, false);
scheduler->EndRendering();
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
// Copy to the image.
const auto aspect = aspect_mask & vk::ImageAspectFlagBits::eStencil
? vk::ImageAspectFlagBits::eDepth
: aspect_mask;
const vk::BufferImageCopy image_copy = {
.bufferOffset = offset,
.bufferRowLength = info.pitch,
.bufferImageHeight = info.size.height,
.imageSubresource{
.aspectMask = aspect,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageOffset = {0, 0, 0},
.imageExtent = {info.size.width, info.size.height, 1},
const vk::BufferMemoryBarrier2 pre_barrier{
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferRead,
.buffer = buffer,
.offset = offset,
.size = info.guest_size,
};
const vk::BufferMemoryBarrier2 post_barrier{
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.buffer = buffer,
.offset = offset,
.size = info.guest_size,
};
const auto image_barriers =
GetBarriers(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite,
vk::PipelineStageFlagBits2::eCopy, {});
const auto cmdbuf = scheduler->CommandBuffer();
cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, image_copy);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &pre_barrier,
.imageMemoryBarrierCount = static_cast<u32>(image_barriers.size()),
.pImageMemoryBarriers = image_barriers.data(),
});
cmdbuf.copyBufferToImage(buffer, GetImage(), vk::ImageLayout::eTransferDstOptimal,
upload_copies);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &post_barrier,
});
flags &= ~ImageFlagBits::Dirty;
}
Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {});
void Image::Download(std::span<const vk::BufferImageCopy> download_copies, vk::Buffer buffer,
u64 offset, u64 download_size) {
SetBackingSamples(info.num_samples);
scheduler->EndRendering();
const vk::BufferMemoryBarrier2 pre_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
.dstStageMask = vk::PipelineStageFlagBits2::eCopy,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = buffer,
.offset = offset,
.size = download_size,
};
const vk::BufferMemoryBarrier2 post_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eCopy,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
.buffer = buffer,
.offset = offset,
.size = download_size,
};
const auto image_barriers =
GetBarriers(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead,
vk::PipelineStageFlagBits2::eCopy, {});
auto cmdbuf = scheduler->CommandBuffer();
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &pre_barrier,
.imageMemoryBarrierCount = static_cast<u32>(image_barriers.size()),
.pImageMemoryBarriers = image_barriers.data(),
});
cmdbuf.copyImageToBuffer(GetImage(), vk::ImageLayout::eTransferSrcOptimal, buffer,
download_copies);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &post_barrier,
});
}
void Image::CopyImage(Image& src_image) {
@@ -353,6 +423,9 @@ void Image::CopyImage(Image& src_image) {
const u32 depth =
info.type == AmdGpu::ImageType::Color3D ? info.size.depth : src_info.size.depth;
SetBackingSamples(info.num_samples, false);
src_image.SetBackingSamples(src_info.num_samples);
boost::container::small_vector<vk::ImageCopy, 8> image_copies;
for (u32 mip = 0; mip < num_mips; ++mip) {
const auto mip_w = std::max(width >> mip, 1u);
@@ -381,8 +454,8 @@ void Image::CopyImage(Image& src_image) {
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
auto cmdbuf = scheduler->CommandBuffer();
cmdbuf.copyImage(src_image.image, src_image.last_state.layout, image, last_state.layout,
image_copies);
cmdbuf.copyImage(src_image.GetImage(), src_image.backing->state.layout, GetImage(),
backing->state.layout, image_copies);
Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {});
@@ -393,6 +466,9 @@ void Image::CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset)
const u32 num_mips = std::min(src_info.resources.levels, info.resources.levels);
ASSERT(src_info.resources.layers == info.resources.layers || num_mips == 1);
SetBackingSamples(info.num_samples, false);
src_image.SetBackingSamples(src_info.num_samples);
boost::container::small_vector<vk::BufferImageCopy, 8> buffer_copies;
for (u32 mip = 0; mip < num_mips; ++mip) {
const auto mip_w = std::max(src_info.size.width >> mip, 1u);
@@ -445,7 +521,7 @@ void Image::CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset)
.pBufferMemoryBarriers = &pre_copy_barrier,
});
cmdbuf.copyImageToBuffer(src_image.image, vk::ImageLayout::eTransferSrcOptimal, buffer,
cmdbuf.copyImageToBuffer(src_image.GetImage(), vk::ImageLayout::eTransferSrcOptimal, buffer,
buffer_copies);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
@@ -458,15 +534,11 @@ void Image::CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset)
copy.imageSubresource.aspectMask = aspect_mask & ~vk::ImageAspectFlagBits::eStencil;
}
cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, buffer_copies);
cmdbuf.copyBufferToImage(buffer, GetImage(), vk::ImageLayout::eTransferDstOptimal,
buffer_copies);
}
void Image::CopyMip(const Image& src_image, u32 mip, u32 slice) {
scheduler->EndRendering();
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
auto cmdbuf = scheduler->CommandBuffer();
void Image::CopyMip(Image& src_image, u32 mip, u32 slice) {
const auto mip_w = std::max(info.size.width >> mip, 1u);
const auto mip_h = std::max(info.size.height >> mip, 1u);
const auto mip_d = std::max(info.size.depth >> mip, 1u);
@@ -491,13 +563,166 @@ void Image::CopyMip(const Image& src_image, u32 mip, u32 slice) {
},
.extent = {mip_w, mip_h, mip_d},
};
cmdbuf.copyImage(src_image.image, src_image.last_state.layout, image, last_state.layout,
image_copy);
Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {});
SetBackingSamples(info.num_samples);
src_image.SetBackingSamples(src_info.num_samples);
scheduler->EndRendering();
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
const auto cmdbuf = scheduler->CommandBuffer();
cmdbuf.copyImage(src_image.GetImage(), src_image.backing->state.layout, GetImage(),
backing->state.layout, image_copy);
}
Image::~Image() = default;
void Image::Resolve(Image& src_image, const VideoCore::SubresourceRange& mrt0_range,
const VideoCore::SubresourceRange& mrt1_range) {
SetBackingSamples(1, false);
scheduler->EndRendering();
src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead,
mrt0_range);
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, mrt1_range);
if (src_image.backing->num_samples == 1) {
const vk::ImageCopy region = {
.srcSubresource{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = mrt0_range.base.layer,
.layerCount = mrt0_range.extent.layers,
},
.srcOffset = {0, 0, 0},
.dstSubresource{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = mrt1_range.base.layer,
.layerCount = mrt1_range.extent.layers,
},
.dstOffset = {0, 0, 0},
.extent = {info.size.width, info.size.height, 1},
};
scheduler->CommandBuffer().copyImage(src_image.GetImage(),
vk::ImageLayout::eTransferSrcOptimal, GetImage(),
vk::ImageLayout::eTransferDstOptimal, region);
} else {
const vk::ImageResolve region = {
.srcSubresource{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = mrt0_range.base.layer,
.layerCount = mrt0_range.extent.layers,
},
.srcOffset = {0, 0, 0},
.dstSubresource{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = mrt1_range.base.layer,
.layerCount = mrt1_range.extent.layers,
},
.dstOffset = {0, 0, 0},
.extent = {info.size.width, info.size.height, 1},
};
scheduler->CommandBuffer().resolveImage(src_image.GetImage(),
vk::ImageLayout::eTransferSrcOptimal, GetImage(),
vk::ImageLayout::eTransferDstOptimal, region);
}
flags |= VideoCore::ImageFlagBits::GpuModified;
flags &= ~VideoCore::ImageFlagBits::Dirty;
}
void Image::Clear(const vk::ClearValue& clear_value, const VideoCore::SubresourceRange& range) {
const vk::ImageSubresourceRange vk_range = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = range.base.level,
.levelCount = range.extent.levels,
.baseArrayLayer = range.base.layer,
.layerCount = range.extent.layers,
};
scheduler->EndRendering();
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
const auto cmdbuf = scheduler->CommandBuffer();
cmdbuf.clearColorImage(GetImage(), vk::ImageLayout::eTransferDstOptimal, clear_value.color,
vk_range);
}
void Image::SetBackingSamples(u32 num_samples, bool copy_backing) {
if (!backing || backing->num_samples == num_samples) {
return;
}
ASSERT_MSG(!info.props.is_depth, "Swapping samples is only valid for color images");
BackingImage* new_backing;
auto it = std::ranges::find(backing_images, num_samples, &BackingImage::num_samples);
if (it == backing_images.end()) {
auto new_image_ci = backing->image.image_ci;
new_image_ci.samples = LiverpoolToVK::NumSamples(num_samples, supported_samples);
new_backing = &backing_images.emplace_back();
new_backing->num_samples = num_samples;
new_backing->image = UniqueImage{instance->GetDevice(), instance->GetAllocator()};
new_backing->image.Create(new_image_ci);
Vulkan::SetObjectName(instance->GetDevice(), new_backing->image.image,
"Image {}x{}x{} {} {} {:#x}:{:#x} L:{} M:{} S:{} (backing)",
info.size.width, info.size.height, info.size.depth,
AmdGpu::NameOf(info.tile_mode), vk::to_string(info.pixel_format),
info.guest_address, info.guest_size, info.resources.layers,
info.resources.levels, num_samples);
} else {
new_backing = std::addressof(*it);
}
if (copy_backing) {
scheduler->EndRendering();
ASSERT(info.resources.levels == 1 && info.resources.layers == 1);
// Transition current backing to shader read layout
auto barriers =
GetBarriers(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits2::eShaderRead,
vk::PipelineStageFlagBits2::eFragmentShader, std::nullopt);
// Transition dest backing to color attachment layout, not caring of previous contents
constexpr auto dst_stage = vk::PipelineStageFlagBits2::eColorAttachmentOutput;
constexpr auto dst_access = vk::AccessFlagBits2::eColorAttachmentWrite;
constexpr auto dst_layout = vk::ImageLayout::eColorAttachmentOptimal;
barriers.push_back(vk::ImageMemoryBarrier2{
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eNone,
.dstStageMask = dst_stage,
.dstAccessMask = dst_access,
.oldLayout = vk::ImageLayout::eUndefined,
.newLayout = dst_layout,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = new_backing->image,
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = info.resources.layers,
},
});
const auto cmdbuf = scheduler->CommandBuffer();
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.imageMemoryBarrierCount = static_cast<u32>(barriers.size()),
.pImageMemoryBarriers = barriers.data(),
});
// Copy between ms and non ms backing images
blit_helper->CopyBetweenMsImages(
info.size.width, info.size.height, new_backing->num_samples, info.pixel_format,
backing->num_samples > 1, backing->image, new_backing->image);
// Update current layout in tracker to new backings layout
new_backing->state.layout = dst_layout;
new_backing->state.access_mask = dst_access;
new_backing->state.pl_stage = dst_stage;
}
backing = new_backing;
}
} // namespace VideoCore

View File

@@ -9,6 +9,7 @@
#include "video_core/texture_cache/image_info.h"
#include "video_core/texture_cache/image_view.h"
#include <deque>
#include <optional>
namespace Vulkan {
@@ -34,8 +35,9 @@ enum ImageFlagBits : u32 {
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
struct UniqueImage {
explicit UniqueImage();
explicit UniqueImage(vk::Device device, VmaAllocator allocator);
explicit UniqueImage() = default;
explicit UniqueImage(vk::Device device, VmaAllocator allocator)
: device{device}, allocator{allocator} {}
~UniqueImage();
UniqueImage(const UniqueImage&) = delete;
@@ -44,11 +46,12 @@ struct UniqueImage {
UniqueImage(UniqueImage&& other)
: allocator{std::exchange(other.allocator, VK_NULL_HANDLE)},
allocation{std::exchange(other.allocation, VK_NULL_HANDLE)},
image{std::exchange(other.image, VK_NULL_HANDLE)} {}
image{std::exchange(other.image, VK_NULL_HANDLE)}, image_ci{std::move(other.image_ci)} {}
UniqueImage& operator=(UniqueImage&& other) {
image = std::exchange(other.image, VK_NULL_HANDLE);
allocator = std::exchange(other.allocator, VK_NULL_HANDLE);
allocation = std::exchange(other.allocation, VK_NULL_HANDLE);
image_ci = std::move(other.image_ci);
return *this;
}
@@ -58,17 +61,25 @@ struct UniqueImage {
return image;
}
private:
vk::Device device;
VmaAllocator allocator;
VmaAllocation allocation;
operator bool() const {
return image;
}
public:
vk::Device device{};
VmaAllocator allocator{};
VmaAllocation allocation{};
vk::Image image{};
vk::ImageCreateInfo image_ci{};
};
constexpr Common::SlotId NULL_IMAGE_ID{0};
class BlitHelper;
struct Image {
Image(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, const ImageInfo& info);
Image(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, BlitHelper& blit_helper,
Common::SlotVector<ImageView>& slot_image_views, const ImageInfo& info);
~Image();
Image(const Image&) = delete;
@@ -77,94 +88,100 @@ struct Image {
Image(Image&&) = default;
Image& operator=(Image&&) = default;
[[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
const VAddr overlap_end = overlap_cpu_addr + overlap_size;
const auto image_addr = info.guest_address;
const auto image_end = info.guest_address + info.guest_size;
return image_addr < overlap_end && overlap_cpu_addr < image_end;
}
ImageViewId FindView(const ImageViewInfo& info) const {
const auto it = std::ranges::find(image_view_infos, info);
if (it == image_view_infos.end()) {
return {};
}
return image_view_ids[std::distance(image_view_infos.begin(), it)];
vk::Image GetImage() const {
return backing->image.image;
}
void AssociateDepth(ImageId image_id) {
depth_id = image_id;
}
boost::container::small_vector<vk::ImageMemoryBarrier2, 32> GetBarriers(
vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
vk::PipelineStageFlags2 dst_stage, std::optional<SubresourceRange> subres_range);
void Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
std::optional<SubresourceRange> range, vk::CommandBuffer cmdbuf = {});
void Upload(vk::Buffer buffer, u64 offset);
void CopyImage(Image& src_image);
void CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset);
void CopyMip(const Image& src_image, u32 mip, u32 slice);
bool IsTracked() {
return track_addr != 0 && track_addr_end != 0;
}
bool SafeToDownload() const {
return True(flags & ImageFlagBits::GpuModified) &&
False(flags & (ImageFlagBits::GpuDirty | ImageFlagBits::CpuDirty));
return True(flags & ImageFlagBits::GpuModified) && False(flags & (ImageFlagBits::Dirty));
}
void AssociateDepth(ImageId image_id) {
depth_id = image_id;
}
ImageView& FindView(const ImageViewInfo& view_info, bool ensure_guest_samples = true);
using Barriers = boost::container::small_vector<vk::ImageMemoryBarrier2, 32>;
Barriers GetBarriers(vk::ImageLayout dst_layout, vk::AccessFlags2 dst_mask,
vk::PipelineStageFlags2 dst_stage,
std::optional<SubresourceRange> subres_range);
void Transit(vk::ImageLayout dst_layout, vk::AccessFlags2 dst_mask,
std::optional<SubresourceRange> range, vk::CommandBuffer cmdbuf = {});
void Upload(std::span<const vk::BufferImageCopy> upload_copies, vk::Buffer buffer, u64 offset);
void Download(std::span<const vk::BufferImageCopy> download_copies, vk::Buffer buffer,
u64 offset, u64 download_size);
void CopyImage(Image& src_image);
void CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset);
void CopyMip(Image& src_image, u32 mip, u32 slice);
void Resolve(Image& src_image, const VideoCore::SubresourceRange& mrt0_range,
const VideoCore::SubresourceRange& mrt1_range);
void Clear(const vk::ClearValue& clear_value, const VideoCore::SubresourceRange& range);
void SetBackingSamples(u32 num_samples, bool copy_backing = true);
public:
const Vulkan::Instance* instance;
Vulkan::Scheduler* scheduler;
BlitHelper* blit_helper;
Common::SlotVector<ImageView>* slot_image_views;
ImageInfo info;
UniqueImage image;
vk::ImageAspectFlags aspect_mask = vk::ImageAspectFlagBits::eColor;
vk::SampleCountFlags supported_samples = vk::SampleCountFlagBits::e1;
ImageFlagBits flags = ImageFlagBits::Dirty;
VAddr track_addr = 0;
VAddr track_addr_end = 0;
std::vector<ImageViewInfo> image_view_infos;
std::vector<ImageViewId> image_view_ids;
ImageId depth_id{};
u64 lru_id{};
// Resource state tracking
vk::ImageUsageFlags usage_flags;
vk::FormatFeatureFlags2 format_features;
struct State {
vk::PipelineStageFlags2 pl_stage = vk::PipelineStageFlagBits2::eAllCommands;
vk::AccessFlags2 access_mask = vk::AccessFlagBits2::eNone;
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
};
struct BackingImage {
UniqueImage image;
State state;
std::vector<State> subresource_states;
boost::container::small_vector<ImageViewInfo, 4> image_view_infos;
boost::container::small_vector<ImageViewId, 4> image_view_ids;
u32 num_samples;
};
std::deque<BackingImage> backing_images;
BackingImage* backing{};
boost::container::static_vector<u64, 16> mip_hashes{};
u64 lru_id{};
u64 tick_accessed_last{};
u64 hash{};
struct {
u32 texture : 1;
u32 storage : 1;
u32 render_target : 1;
u32 depth_target : 1;
u32 stencil : 1;
u32 vo_surface : 1;
} usage{};
vk::ImageUsageFlags usage_flags;
vk::FormatFeatureFlags2 format_features;
struct State {
vk::Flags<vk::PipelineStageFlagBits2> pl_stage = vk::PipelineStageFlagBits2::eAllCommands;
vk::Flags<vk::AccessFlagBits2> access_mask = vk::AccessFlagBits2::eNone;
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
};
State last_state{};
std::vector<State> subresource_states{};
boost::container::small_vector<u64, 14> mip_hashes{};
u64 tick_accessed_last{0};
u64 hash{0};
struct {
union {
struct {
u32 is_bound : 1; // the image is bound to a descriptor set
u32 is_target : 1; // the image is bound as color/depth target
u32 needs_rebind : 1; // the image needs to be rebound
u32 force_general : 1; // the image needs to be used in general layout
};
u32 raw{};
};
void Reset() {
raw = 0u;
}
u32 is_bound : 1;
u32 is_target : 1;
u32 needs_rebind : 1;
u32 force_general : 1;
} binding{};
};

View File

@@ -90,9 +90,9 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer,
type = range.extent.layers > 1 ? AmdGpu::ImageType::Color2DArray : AmdGpu::ImageType::Color2D;
}
ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image,
ImageId image_id_)
: image_id{image_id_}, info{info_} {
ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_,
const Image& image)
: info{info_} {
vk::ImageViewUsageCreateInfo usage_ci{.usage = image.usage_flags};
if (!info.is_storage) {
usage_ci.usage &= ~vk::ImageUsageFlagBits::eStorage;
@@ -113,7 +113,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
const vk::ImageViewCreateInfo image_view_ci = {
.pNext = &usage_ci,
.image = image.image,
.image = image.GetImage(),
.viewType = ConvertImageViewType(info.type),
.format = instance.GetSupportedFormat(format, image.format_features),
.components = info.mapping,

View File

@@ -35,8 +35,7 @@ struct ImageViewInfo {
struct Image;
struct ImageView {
ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, Image& image,
ImageId image_id);
ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, const Image& image);
~ImageView();
ImageView(const ImageView&) = delete;
@@ -45,7 +44,6 @@ struct ImageView {
ImageView(ImageView&&) = default;
ImageView& operator=(ImageView&&) = default;
ImageId image_id;
ImageViewInfo info;
vk::UniqueImageView image_view;
};

View File

@@ -73,16 +73,15 @@ ImageId TextureCache::GetNullImage(const vk::Format format) {
info.num_bits = 32;
info.UpdateSize();
const ImageId null_id = slot_images.insert(instance, scheduler, info);
auto& img = slot_images[null_id];
const vk::Image& null_image = img.image;
Vulkan::SetObjectName(instance.GetDevice(), null_image,
const ImageId null_id =
slot_images.insert(instance, scheduler, blit_helper, slot_image_views, info);
auto& image = slot_images[null_id];
Vulkan::SetObjectName(instance.GetDevice(), image.GetImage(),
fmt::format("Null Image ({})", vk::to_string(format)));
img.flags = ImageFlagBits::Empty;
img.track_addr = img.info.guest_address;
img.track_addr_end = img.info.guest_address + img.info.guest_size;
image.flags = ImageFlagBits::Empty;
image.track_addr = image.info.guest_address;
image.track_addr_end = image.info.guest_address + image.info.guest_size;
null_images.emplace(format, null_id);
return null_id;
@@ -124,7 +123,7 @@ void TextureCache::DownloadImageMemory(ImageId image_id) {
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal,
cmdbuf.copyImageToBuffer(image.GetImage(), vk::ImageLayout::eTransferSrcOptimal,
download_buffer.Handle(), image_download);
{
@@ -269,7 +268,8 @@ ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, Bindi
if (recreate) {
auto new_info = requested_info;
new_info.resources = std::max(requested_info.resources, cache_image.info.resources);
const auto new_image_id = slot_images.insert(instance, scheduler, new_info);
const auto new_image_id =
slot_images.insert(instance, scheduler, blit_helper, slot_image_views, new_info);
RegisterImage(new_image_id);
// Inherit image usage
@@ -290,7 +290,14 @@ ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, Bindi
} else if (cache_image.info.num_samples == 1 && new_info.props.is_depth &&
new_info.num_samples > 1) {
// Perform a rendering pass to transfer the channels of source as samples in dest.
blit_helper.BlitColorToMsDepth(cache_image, new_image);
cache_image.Transit(vk::ImageLayout::eShaderReadOnlyOptimal,
vk::AccessFlagBits2::eShaderRead, {});
new_image.Transit(vk::ImageLayout::eDepthAttachmentOptimal,
vk::AccessFlagBits2::eDepthStencilAttachmentWrite, {});
blit_helper.ReinterpretColorAsMsDepth(
new_info.size.width, new_info.size.height, new_info.num_samples,
cache_image.info.pixel_format, new_info.pixel_format, cache_image.GetImage(),
new_image.GetImage());
} else {
LOG_WARNING(Render_Vulkan, "Unimplemented depth overlap copy");
}
@@ -308,15 +315,16 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
BindingType binding,
ImageId cache_image_id,
ImageId merged_image_id) {
auto& tex_cache_image = slot_images[cache_image_id];
// We can assume it is safe to delete the image if it wasn't accessed in some number of frames.
auto& cache_image = slot_images[cache_image_id];
const bool safe_to_delete =
scheduler.CurrentTick() - tex_cache_image.tick_accessed_last > NumFramesBeforeRemoval;
scheduler.CurrentTick() - cache_image.tick_accessed_last > NumFramesBeforeRemoval;
if (image_info.guest_address == tex_cache_image.info.guest_address) { // Equal address
if (image_info.BlockDim() != tex_cache_image.info.BlockDim() ||
image_info.num_bits * image_info.num_samples !=
tex_cache_image.info.num_bits * tex_cache_image.info.num_samples) {
// Equal address
if (image_info.guest_address == cache_image.info.guest_address) {
const u32 lhs_block_size = image_info.num_bits * image_info.num_samples;
const u32 rhs_block_size = cache_image.info.num_bits * cache_image.info.num_samples;
if (image_info.BlockDim() != cache_image.info.BlockDim() ||
lhs_block_size != rhs_block_size) {
// Very likely this kind of overlap is caused by allocation from a pool.
if (safe_to_delete) {
FreeImage(cache_image_id);
@@ -329,19 +337,19 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
}
// Compressed view of uncompressed image with same block size.
if (image_info.props.is_block && !tex_cache_image.info.props.is_block) {
if (image_info.props.is_block && !cache_image.info.props.is_block) {
return {ExpandImage(image_info, cache_image_id), -1, -1};
}
if (image_info.guest_size == tex_cache_image.info.guest_size &&
if (image_info.guest_size == cache_image.info.guest_size &&
(image_info.type == AmdGpu::ImageType::Color3D ||
tex_cache_image.info.type == AmdGpu::ImageType::Color3D)) {
cache_image.info.type == AmdGpu::ImageType::Color3D)) {
return {ExpandImage(image_info, cache_image_id), -1, -1};
}
// Size and resources are less than or equal, use image view.
if (image_info.pixel_format != tex_cache_image.info.pixel_format ||
image_info.guest_size <= tex_cache_image.info.guest_size) {
if (image_info.pixel_format != cache_image.info.pixel_format ||
image_info.guest_size <= cache_image.info.guest_size) {
auto result_id = merged_image_id ? merged_image_id : cache_image_id;
const auto& result_image = slot_images[result_id];
const bool is_compatible =
@@ -350,14 +358,14 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
}
// Size and resources are greater, expand the image.
if (image_info.type == tex_cache_image.info.type &&
image_info.resources > tex_cache_image.info.resources) {
if (image_info.type == cache_image.info.type &&
image_info.resources > cache_image.info.resources) {
return {ExpandImage(image_info, cache_image_id), -1, -1};
}
// Size is greater but resources are not, because the tiling mode is different.
// Likely the address is reused for a image with a different tiling mode.
if (image_info.tile_mode != tex_cache_image.info.tile_mode) {
if (image_info.tile_mode != cache_image.info.tile_mode) {
if (safe_to_delete) {
FreeImage(cache_image_id);
}
@@ -368,9 +376,9 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
}
// Right overlap, the image requested is a possible subresource of the image from cache.
if (image_info.guest_address > tex_cache_image.info.guest_address) {
if (auto mip = image_info.MipOf(tex_cache_image.info); mip >= 0) {
if (auto slice = image_info.SliceOf(tex_cache_image.info, mip); slice >= 0) {
if (image_info.guest_address > cache_image.info.guest_address) {
if (auto mip = image_info.MipOf(cache_image.info); mip >= 0) {
if (auto slice = image_info.SliceOf(cache_image.info, mip); slice >= 0) {
return {cache_image_id, mip, slice};
}
}
@@ -383,12 +391,12 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
return {{}, -1, -1};
} else {
// Left overlap, the image from cache is a possible subresource of the image requested
if (auto mip = tex_cache_image.info.MipOf(image_info); mip >= 0) {
if (auto slice = tex_cache_image.info.SliceOf(image_info, mip); slice >= 0) {
if (auto mip = cache_image.info.MipOf(image_info); mip >= 0) {
if (auto slice = cache_image.info.SliceOf(image_info, mip); slice >= 0) {
// We have a larger image created and a separate one, representing a subres of it
// bound as render target. In this case we need to rebind render target.
if (tex_cache_image.binding.is_target) {
tex_cache_image.binding.needs_rebind = 1u;
if (cache_image.binding.is_target) {
cache_image.binding.needs_rebind = 1u;
if (merged_image_id) {
GetImage(merged_image_id).binding.is_target = 1u;
}
@@ -399,15 +407,8 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
// We need to have a larger, already allocated image to copy this one into
if (merged_image_id) {
tex_cache_image.Transit(vk::ImageLayout::eTransferSrcOptimal,
vk::AccessFlagBits2::eTransferRead, {});
const auto num_mips_to_copy = tex_cache_image.info.resources.levels;
ASSERT(num_mips_to_copy == 1);
auto& merged_image = slot_images[merged_image_id];
merged_image.CopyMip(tex_cache_image, mip, slice);
merged_image.CopyMip(cache_image, mip, slice);
FreeImage(cache_image_id);
}
}
@@ -418,7 +419,8 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
}
ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) {
const auto new_image_id = slot_images.insert(instance, scheduler, info);
const auto new_image_id =
slot_images.insert(instance, scheduler, blit_helper, slot_image_views, info);
RegisterImage(new_image_id);
auto& src_image = slot_images[image_id];
@@ -507,7 +509,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, bool exact_fmt) {
}
// Create and register a new image
if (!image_id) {
image_id = slot_images.insert(instance, scheduler, info);
image_id = slot_images.insert(instance, scheduler, blit_helper, slot_image_views, info);
RegisterImage(image_id);
}
@@ -557,18 +559,6 @@ ImageId TextureCache::FindImageFromRange(VAddr address, size_t size, bool ensure
return {};
}
ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo& view_info) {
Image& image = slot_images[image_id];
if (const ImageViewId view_id = image.FindView(view_info); view_id) {
return slot_image_views[view_id];
}
const ImageViewId view_id = slot_image_views.insert(instance, view_info, image, image_id);
image.image_view_infos.emplace_back(view_info);
image.image_view_ids.emplace_back(view_id);
return slot_image_views[view_id];
}
ImageView& TextureCache::FindTexture(ImageId image_id, const BaseDesc& desc) {
Image& image = slot_images[image_id];
if (desc.type == BindingType::Storage) {
@@ -579,11 +569,10 @@ ImageView& TextureCache::FindTexture(ImageId image_id, const BaseDesc& desc) {
}
}
UpdateImage(image_id);
return RegisterImageView(image_id, desc.view_info);
return image.FindView(desc.view_info);
}
ImageView& TextureCache::FindRenderTarget(BaseDesc& desc) {
const ImageId image_id = FindImage(desc);
ImageView& TextureCache::FindRenderTarget(ImageId image_id, const BaseDesc& desc) {
Image& image = slot_images[image_id];
image.flags |= ImageFlagBits::GpuModified;
image.usage.render_target = 1u;
@@ -602,15 +591,13 @@ ImageView& TextureCache::FindRenderTarget(BaseDesc& desc) {
image.info.meta_info.fmask_addr = desc.info.meta_info.fmask_addr;
}
return RegisterImageView(image_id, desc.view_info);
return image.FindView(desc.view_info, false);
}
ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) {
const ImageId image_id = FindImage(desc);
ImageView& TextureCache::FindDepthTarget(ImageId image_id, const BaseDesc& desc) {
Image& image = slot_images[image_id];
image.flags |= ImageFlagBits::GpuModified;
image.usage.depth_target = 1u;
image.usage.stencil = image.info.props.has_stencil;
UpdateImage(image_id);
// Register meta data for this depth buffer
@@ -635,7 +622,8 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) {
info.guest_address = desc.info.stencil_addr;
info.guest_size = desc.info.stencil_size;
info.size = desc.info.size;
stencil_id = slot_images.insert(instance, scheduler, info);
stencil_id =
slot_images.insert(instance, scheduler, blit_helper, slot_image_views, info);
RegisterImage(stencil_id);
}
Image& image = slot_images[stencil_id];
@@ -643,10 +631,10 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) {
image.AssociateDepth(image_id);
}
return RegisterImageView(image_id, desc.view_info);
return image.FindView(desc.view_info, false);
}
void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler /*= nullptr*/) {
void TextureCache::RefreshImage(Image& image) {
if (False(image.flags & ImageFlagBits::Dirty) || image.info.num_samples > 1) {
return;
}
@@ -678,7 +666,7 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
const bool is_gpu_modified = True(image.flags & ImageFlagBits::GpuModified);
const bool is_gpu_dirty = True(image.flags & ImageFlagBits::GpuDirty);
boost::container::small_vector<vk::BufferImageCopy, 14> image_copy{};
boost::container::small_vector<vk::BufferImageCopy, 14> image_copies;
for (u32 m = 0; m < num_mips; m++) {
const u32 width = std::max(image.info.size.width >> m, 1u);
const u32 height = std::max(image.info.size.height >> m, 1u);
@@ -698,7 +686,7 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
const u32 extent_width = mip_pitch ? std::min(mip_pitch, width) : width;
const u32 extent_height = mip_height ? std::min(mip_height, height) : height;
image_copy.push_back({
image_copies.push_back({
.bufferOffset = mip_offset,
.bufferRowLength = mip_pitch,
.bufferImageHeight = mip_height,
@@ -713,21 +701,18 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
});
}
if (image_copy.empty()) {
if (image_copies.empty()) {
image.flags &= ~ImageFlagBits::Dirty;
return;
}
auto* sched_ptr = custom_scheduler ? custom_scheduler : &scheduler;
sched_ptr->EndRendering();
scheduler.EndRendering();
const VAddr image_addr = image.info.guest_address;
const size_t image_size = image.info.guest_size;
const auto [in_buffer, in_offset] = buffer_cache.ObtainBufferForImage(image_addr, image_size);
const auto [in_buffer, in_offset] =
buffer_cache.ObtainBufferForImage(image.info.guest_address, image.info.guest_size);
if (auto barrier = in_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead,
vk::PipelineStageFlagBits2::eTransfer)) {
const auto cmdbuf = sched_ptr->CommandBuffer();
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
scheduler.CommandBuffer().pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &barrier.value(),
@@ -735,48 +720,12 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
}
const auto [buffer, offset] =
!custom_scheduler ? tile_manager.DetileImage(in_buffer->Handle(), in_offset, image.info)
: std::make_pair(in_buffer->Handle(), in_offset);
for (auto& copy : image_copy) {
tile_manager.DetileImage(in_buffer->Handle(), in_offset, image.info);
for (auto& copy : image_copies) {
copy.bufferOffset += offset;
}
const vk::BufferMemoryBarrier2 pre_barrier{
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferRead,
.buffer = buffer,
.offset = offset,
.size = image_size,
};
const vk::BufferMemoryBarrier2 post_barrier{
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.buffer = buffer,
.offset = offset,
.size = image_size,
};
const auto image_barriers =
image.GetBarriers(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite,
vk::PipelineStageFlagBits2::eTransfer, {});
const auto cmdbuf = sched_ptr->CommandBuffer();
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &pre_barrier,
.imageMemoryBarrierCount = static_cast<u32>(image_barriers.size()),
.pImageMemoryBarriers = image_barriers.data(),
});
cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &post_barrier,
});
image.flags &= ~ImageFlagBits::Dirty;
image.Upload(image_copies, buffer, offset);
}
vk::Sampler TextureCache::GetSampler(
@@ -1020,8 +969,10 @@ void TextureCache::DeleteImage(ImageId image_id) {
// Reclaim image and any image views it references.
scheduler.DeferOperation([this, image_id] {
Image& image = slot_images[image_id];
for (const ImageViewId image_view_id : image.image_view_ids) {
slot_image_views.erase(image_view_id);
for (auto& backing : image.backing_images) {
for (const ImageViewId image_view_id : backing.image_view_ids) {
slot_image_views.erase(image_view_id);
}
}
slot_images.erase(image_id);
});

View File

@@ -67,12 +67,14 @@ public:
};
struct RenderTargetDesc : public BaseDesc {
RenderTargetDesc() = default;
RenderTargetDesc(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint = {})
: BaseDesc{BindingType::RenderTarget, ImageInfo{buffer, hint}, ImageViewInfo{buffer}} {}
};
struct DepthTargetDesc : public BaseDesc {
DepthTargetDesc() = default;
DepthTargetDesc(const AmdGpu::Liverpool::DepthBuffer& buffer,
const AmdGpu::Liverpool::DepthView& view,
const AmdGpu::Liverpool::DepthControl& ctl, VAddr htile_address,
@@ -118,20 +120,21 @@ public:
[[nodiscard]] ImageView& FindTexture(ImageId image_id, const BaseDesc& desc);
/// Retrieves the render target with specified properties
[[nodiscard]] ImageView& FindRenderTarget(BaseDesc& desc);
[[nodiscard]] ImageView& FindRenderTarget(ImageId image_id, const BaseDesc& desc);
/// Retrieves the depth target with specified properties
[[nodiscard]] ImageView& FindDepthTarget(BaseDesc& desc);
[[nodiscard]] ImageView& FindDepthTarget(ImageId image_id, const BaseDesc& desc);
/// Updates image contents if it was modified by CPU.
void UpdateImage(ImageId image_id, Vulkan::Scheduler* custom_scheduler = nullptr) {
void UpdateImage(ImageId image_id) {
std::scoped_lock lock{mutex};
Image& image = slot_images[image_id];
TrackImage(image_id);
TouchImage(image);
RefreshImage(image, custom_scheduler);
RefreshImage(image);
}
/// Resolves overlap between existing cache image and pending merged image
[[nodiscard]] std::tuple<ImageId, int, int> ResolveOverlap(const ImageInfo& info,
BindingType binding,
ImageId cache_img_id,
@@ -145,7 +148,7 @@ public:
[[nodiscard]] ImageId ExpandImage(const ImageInfo& info, ImageId image_id);
/// Reuploads image contents.
void RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler = nullptr);
void RefreshImage(Image& image);
/// Retrieves the sampler that matches the provided S# descriptor.
[[nodiscard]] vk::Sampler GetSampler(
@@ -161,16 +164,9 @@ public:
/// Retrieves the image view with the specified id.
[[nodiscard]] ImageView& GetImageView(ImageId id) {
auto& view = slot_image_views[id];
// Maybe this is not needed.
Image& image = slot_images[view.image_id];
TouchImage(image);
return view;
return slot_image_views[id];
}
/// Registers an image view for provided image
ImageView& RegisterImageView(ImageId image_id, const ImageViewInfo& view_info);
/// Returns true if the specified address is a metadata surface.
bool IsMeta(VAddr address) const {
return surface_metas.contains(address);

View File

@@ -5,6 +5,7 @@
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/texture_cache/image.h"
#include "video_core/texture_cache/image_info.h"
#include "video_core/texture_cache/image_view.h"
#include "video_core/texture_cache/tile_manager.h"
@@ -190,6 +191,8 @@ TileManager::Result TileManager::DetileImage(vk::Buffer in_buffer, u32 in_offset
vmaDestroyBuffer(instance.GetAllocator(), out_buffer, out_allocation);
});
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, GetTilingPipeline(info, false));
@@ -238,15 +241,14 @@ TileManager::Result TileManager::DetileImage(vk::Buffer in_buffer, u32 in_offset
return {out_buffer, 0};
}
void TileManager::TileImage(vk::Image in_image, std::span<vk::BufferImageCopy> buffer_copies,
vk::Buffer out_buffer, u32 out_offset, const ImageInfo& info) {
void TileManager::TileImage(Image& in_image, std::span<vk::BufferImageCopy> buffer_copies,
vk::Buffer out_buffer, u32 out_offset, u32 copy_size) {
const auto& info = in_image.info;
if (!info.props.is_tiled) {
for (auto& copy : buffer_copies) {
copy.bufferOffset += out_offset;
}
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.copyImageToBuffer(in_image, vk::ImageLayout::eTransferSrcOptimal, out_buffer,
buffer_copies);
in_image.Download(buffer_copies, out_buffer, out_offset, copy_size);
return;
}
@@ -275,8 +277,8 @@ void TileManager::TileImage(vk::Image in_image, std::span<vk::BufferImageCopy> b
});
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.copyImageToBuffer(in_image, vk::ImageLayout::eTransferSrcOptimal, temp_buffer,
buffer_copies);
in_image.Download(buffer_copies, temp_buffer, 0, copy_size);
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, GetTilingPipeline(info, true));
const vk::DescriptorBufferInfo tiled_buffer_info{

View File

@@ -10,6 +10,7 @@
namespace VideoCore {
struct ImageInfo;
struct Image;
class StreamBuffer;
class TileManager {
@@ -23,8 +24,8 @@ public:
StreamBuffer& stream_buffer);
~TileManager();
void TileImage(vk::Image in_image, std::span<vk::BufferImageCopy> buffer_copies,
vk::Buffer out_buffer, u32 out_offset, const ImageInfo& info);
void TileImage(Image& in_image, std::span<vk::BufferImageCopy> buffer_copies,
vk::Buffer out_buffer, u32 out_offset, u32 copy_size);
Result DetileImage(vk::Buffer in_buffer, u32 in_offset, const ImageInfo& info);