mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-08 20:58:41 +00:00
Handle mixed samples attachments (V2) (#3667)
* video_core: Refactor render target bind to allow disabling MSAA * video_core: Implement swapping of backing samples * clang format * video_core: Better implementation Instead of downgrading to 1 sample, always try to match depth samples. This avoids needing to copy depth-stencil attachment and copying multisampled stencil is not possible on some vendors * video_core: Small bugfixes * image: Add null check * vk_rasterizer: Swap backing samples on resolve dst * vk_presenter: Reset backing samples before present * video_core: Small refactor to make this implementation better * reinterpret: Fix channel check for degamma Seems this was simpler than I thought, hardware doesn't apply degamma on the W channel regardless of swizzle * image: Add missing end rendering call * blit_helper: Fix bug in old reinterpret path * blit_helper: Remove unused layer vertex Should be used in the future if copying many layers is needed * vk_rasterizer: Apply suggestion * vk_rasterizer: More bind refactor * vk_instance: Re-enable extensions
This commit is contained in:
@@ -233,13 +233,8 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
|
||||
}
|
||||
|
||||
if (!is_eop) {
|
||||
// Before processing the flip we need to ask GPU thread to flush command list as at this
|
||||
// point VO surface is ready to be presented, and we will need have an actual state of
|
||||
// Vulkan image at the time of frame presentation.
|
||||
liverpool->SendCommand([=, this]() {
|
||||
presenter->FlushDraw();
|
||||
SubmitFlipInternal(port, index, flip_arg, is_eop);
|
||||
});
|
||||
// Non EOP flips can arrive from any thread so ask GPU thread to perform them
|
||||
liverpool->SendCommand([=, this]() { SubmitFlipInternal(port, index, flip_arg, is_eop); });
|
||||
} else {
|
||||
SubmitFlipInternal(port, index, flip_arg, is_eop);
|
||||
}
|
||||
@@ -247,15 +242,14 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
|
||||
return true;
|
||||
}
|
||||
|
||||
void VideoOutDriver::SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg,
|
||||
bool is_eop /*= false*/) {
|
||||
void VideoOutDriver::SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop) {
|
||||
Vulkan::Frame* frame;
|
||||
if (index == -1) {
|
||||
frame = presenter->PrepareBlankFrame(is_eop);
|
||||
frame = presenter->PrepareBlankFrame(false);
|
||||
} else {
|
||||
const auto& buffer = port->buffer_slots[index];
|
||||
const auto& group = port->groups[buffer.group_index];
|
||||
frame = presenter->PrepareFrame(group, buffer.address_left, is_eop);
|
||||
frame = presenter->PrepareFrame(group, buffer.address_left);
|
||||
}
|
||||
|
||||
std::scoped_lock lock{mutex};
|
||||
|
||||
@@ -301,7 +301,8 @@ void SetupCapabilities(const Info& info, const Profile& profile, const RuntimeIn
|
||||
ctx.AddExtension("SPV_KHR_fragment_shader_barycentric");
|
||||
ctx.AddCapability(spv::Capability::FragmentBarycentricKHR);
|
||||
}
|
||||
if (runtime_info.fs_info.addr_flags.linear_sample_ena ||
|
||||
if (info.loads.Get(IR::Attribute::SampleIndex) ||
|
||||
runtime_info.fs_info.addr_flags.linear_sample_ena ||
|
||||
runtime_info.fs_info.addr_flags.persp_sample_ena) {
|
||||
ctx.AddCapability(spv::Capability::SampleRateShading);
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
|
||||
@@ -1022,7 +1022,7 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
|
||||
auto converted = ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion());
|
||||
if (sampler.force_degamma && image.GetNumberFmt() != AmdGpu::NumberFormat::Srgb) {
|
||||
converted = ApplyForceDegamma(ir, texel, image.DstSelect());
|
||||
converted = ApplyForceDegamma(ir, texel);
|
||||
}
|
||||
inst.ReplaceUsesWith(converted);
|
||||
}
|
||||
|
||||
@@ -29,25 +29,15 @@ inline F32 ApplyGammaToLinear(IREmitter& ir, const F32& c) {
|
||||
return IR::F32{ir.Select(ir.FPGreaterThan(c, ir.Imm32(0.04045f)), a, b)};
|
||||
}
|
||||
|
||||
inline Value ApplyForceDegamma(IREmitter& ir, const Value& value,
|
||||
const AmdGpu::CompMapping& mapping) {
|
||||
inline Value ApplyForceDegamma(IREmitter& ir, const Value& value) {
|
||||
auto x = F32{ir.CompositeExtract(value, 0)};
|
||||
auto y = F32{ir.CompositeExtract(value, 1)};
|
||||
auto z = F32{ir.CompositeExtract(value, 2)};
|
||||
auto w = F32{ir.CompositeExtract(value, 3)};
|
||||
// Gamma correction is only applied to RGB components
|
||||
if (AmdGpu::IsRgb(mapping.r)) {
|
||||
x = ApplyGammaToLinear(ir, x);
|
||||
}
|
||||
if (AmdGpu::IsRgb(mapping.g)) {
|
||||
y = ApplyGammaToLinear(ir, y);
|
||||
}
|
||||
if (AmdGpu::IsRgb(mapping.b)) {
|
||||
z = ApplyGammaToLinear(ir, z);
|
||||
}
|
||||
if (AmdGpu::IsRgb(mapping.a)) {
|
||||
w = ApplyGammaToLinear(ir, w);
|
||||
}
|
||||
x = ApplyGammaToLinear(ir, x);
|
||||
y = ApplyGammaToLinear(ir, y);
|
||||
z = ApplyGammaToLinear(ir, z);
|
||||
return ir.CompositeConstruct(x, y, z, w);
|
||||
}
|
||||
|
||||
|
||||
@@ -1485,26 +1485,6 @@ struct Liverpool {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
u32 NumSamples() const {
|
||||
// It seems that the number of samples > 1 set in the AA config doesn't mean we're
|
||||
// always rendering with MSAA, so we need to derive MS ratio from the CB and DB
|
||||
// settings.
|
||||
u32 num_samples = 1u;
|
||||
if (color_control.mode != ColorControl::OperationMode::Disable) {
|
||||
for (auto cb = 0u; cb < NumColorBuffers; ++cb) {
|
||||
const auto& col_buf = color_buffers[cb];
|
||||
if (!col_buf) {
|
||||
continue;
|
||||
}
|
||||
num_samples = std::max(num_samples, col_buf.NumSamples());
|
||||
}
|
||||
}
|
||||
if (depth_buffer.DepthValid() || depth_buffer.StencilValid()) {
|
||||
num_samples = std::max(num_samples, depth_buffer.NumSamples());
|
||||
}
|
||||
return num_samples;
|
||||
}
|
||||
|
||||
bool IsClipDisabled() const {
|
||||
return clipper_control.clip_disable || primitive_type == PrimitiveType::RectList;
|
||||
}
|
||||
|
||||
@@ -984,44 +984,8 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
|
||||
if (copy_size == 0) {
|
||||
return false;
|
||||
}
|
||||
scheduler.EndRendering();
|
||||
const vk::BufferMemoryBarrier2 pre_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.buffer = buffer.Handle(),
|
||||
.offset = buf_offset,
|
||||
.size = copy_size,
|
||||
};
|
||||
const vk::BufferMemoryBarrier2 post_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.buffer = buffer.Handle(),
|
||||
.offset = buf_offset,
|
||||
.size = copy_size,
|
||||
};
|
||||
auto barriers =
|
||||
image.GetBarriers(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead,
|
||||
vk::PipelineStageFlagBits2::eTransfer, {});
|
||||
auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &pre_barrier,
|
||||
.imageMemoryBarrierCount = static_cast<u32>(barriers.size()),
|
||||
.pImageMemoryBarriers = barriers.data(),
|
||||
});
|
||||
auto& tile_manager = texture_cache.GetTileManager();
|
||||
tile_manager.TileImage(image.image, buffer_copies, buffer.Handle(), buf_offset, image.info);
|
||||
cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &post_barrier,
|
||||
});
|
||||
tile_manager.TileImage(image, buffer_copies, buffer.Handle(), buf_offset, copy_size);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ set(SHADER_FILES
|
||||
detilers/micro_64bpp.comp
|
||||
detilers/micro_8bpp.comp
|
||||
color_to_ms_depth.frag
|
||||
ms_image_blit.frag
|
||||
fault_buffer_process.comp
|
||||
fs_tri.vert
|
||||
fsr.comp
|
||||
|
||||
@@ -3,6 +3,10 @@
|
||||
|
||||
#version 450
|
||||
|
||||
#if defined(INSTANCE_AS_LAYER)
|
||||
#extension GL_ARB_shader_viewport_layer_array : require
|
||||
#endif
|
||||
|
||||
layout(location = 0) out vec2 uv;
|
||||
|
||||
void main() {
|
||||
@@ -11,5 +15,8 @@ void main() {
|
||||
float((gl_VertexIndex & 2u) << 1u)
|
||||
);
|
||||
gl_Position = vec4(pos - vec2(1.0, 1.0), 0.0, 1.0);
|
||||
#if defined(INSTANCE_AS_LAYER)
|
||||
gl_Layer = gl_InstanceIndex;
|
||||
#endif
|
||||
uv = pos * 0.5;
|
||||
}
|
||||
|
||||
23
src/video_core/host_shaders/ms_image_blit.frag
Normal file
23
src/video_core/host_shaders/ms_image_blit.frag
Normal file
@@ -0,0 +1,23 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#version 450 core
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
|
||||
#if defined(SRC_MSAA)
|
||||
layout (binding = 0, set = 0) uniform texture2DMS in_tex;
|
||||
#else
|
||||
layout (binding = 0, set = 0) uniform texture2D in_tex;
|
||||
#endif
|
||||
|
||||
layout (location = 0) in vec2 uv;
|
||||
layout (location = 0) out vec4 out_color;
|
||||
|
||||
void main()
|
||||
{
|
||||
#if defined(SRC_MSAA)
|
||||
out_color = texelFetch(in_tex, ivec2(gl_FragCoord.xy), gl_SampleID);
|
||||
#else
|
||||
out_color = texelFetch(in_tex, ivec2(gl_FragCoord.xy), 0);
|
||||
#endif
|
||||
}
|
||||
@@ -1,10 +1,10 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "fsr_pass.h"
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/config.h"
|
||||
#include "video_core/host_shaders/fsr_comp.h"
|
||||
#include "video_core/renderer_vulkan/host_passes/fsr_pass.h"
|
||||
#include "video_core/renderer_vulkan/vk_platform.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
|
||||
@@ -164,6 +164,12 @@ vk::ImageView FsrPass::Render(vk::CommandBuffer cmdbuf, vk::ImageView input,
|
||||
CreateImages(img);
|
||||
}
|
||||
|
||||
if (Config::getVkHostMarkersEnabled()) {
|
||||
cmdbuf.beginDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{
|
||||
.pLabelName = "Host/FSR",
|
||||
});
|
||||
}
|
||||
|
||||
static const int thread_group_work_region_dim = 16;
|
||||
int dispatch_x = (width + (thread_group_work_region_dim - 1)) / thread_group_work_region_dim;
|
||||
int dispatch_y = (height + (thread_group_work_region_dim - 1)) / thread_group_work_region_dim;
|
||||
@@ -381,6 +387,10 @@ vk::ImageView FsrPass::Render(vk::CommandBuffer cmdbuf, vk::ImageView input,
|
||||
.pImageMemoryBarriers = return_barrier.data(),
|
||||
});
|
||||
|
||||
if (Config::getVkHostMarkersEnabled()) {
|
||||
cmdbuf.endDebugUtilsLabelEXT();
|
||||
}
|
||||
|
||||
return img.output_image_view.get();
|
||||
}
|
||||
|
||||
@@ -442,4 +452,4 @@ void FsrPass::CreateImages(Img& img) const {
|
||||
SetObjectName(device, img.output_image_view.get(), "FSR Output ImageView #{}", img.id);
|
||||
}
|
||||
|
||||
} // namespace Vulkan::HostPasses
|
||||
} // namespace Vulkan::HostPasses
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "pp_pass.h"
|
||||
#include "video_core/renderer_vulkan/host_passes/pp_pass.h"
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/config.h"
|
||||
#include "video_core/host_shaders/fs_tri_vert.h"
|
||||
#include "video_core/host_shaders/post_process_frag.h"
|
||||
#include "video_core/renderer_vulkan/vk_platform.h"
|
||||
@@ -187,6 +188,17 @@ void PostProcessingPass::Create(vk::Device device, const vk::Format surface_form
|
||||
|
||||
void PostProcessingPass::Render(vk::CommandBuffer cmdbuf, vk::ImageView input,
|
||||
vk::Extent2D input_size, Frame& frame, Settings settings) {
|
||||
if (Config::getVkHostMarkersEnabled()) {
|
||||
cmdbuf.beginDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{
|
||||
.pLabelName = "Host/Post processing",
|
||||
});
|
||||
}
|
||||
|
||||
constexpr vk::ImageSubresourceRange simple_subresource = {
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.levelCount = 1,
|
||||
.layerCount = 1,
|
||||
};
|
||||
const std::array<vk::RenderingAttachmentInfo, 1> attachments{{
|
||||
{
|
||||
.imageView = frame.image_view,
|
||||
@@ -250,6 +262,26 @@ void PostProcessingPass::Render(vk::CommandBuffer cmdbuf, vk::ImageView input,
|
||||
cmdbuf.beginRendering(rendering_info);
|
||||
cmdbuf.draw(3, 1, 0, 0);
|
||||
cmdbuf.endRendering();
|
||||
|
||||
const auto post_barrier = vk::ImageMemoryBarrier2{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eFragmentShader,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eShaderRead,
|
||||
.oldLayout = vk::ImageLayout::eColorAttachmentOptimal,
|
||||
.newLayout = vk::ImageLayout::eGeneral,
|
||||
.image = frame.image,
|
||||
.subresourceRange = simple_subresource,
|
||||
};
|
||||
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.imageMemoryBarrierCount = 1,
|
||||
.pImageMemoryBarriers = &post_barrier,
|
||||
});
|
||||
|
||||
if (Config::getVkHostMarkersEnabled()) {
|
||||
cmdbuf.endDebugUtilsLabelEXT();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan::HostPasses
|
||||
} // namespace Vulkan::HostPasses
|
||||
|
||||
@@ -135,8 +135,8 @@ GraphicsPipeline::GraphicsPipeline(
|
||||
}
|
||||
|
||||
const vk::PipelineMultisampleStateCreateInfo multisampling = {
|
||||
.rasterizationSamples =
|
||||
LiverpoolToVK::NumSamples(key.num_samples, instance.GetFramebufferSampleCounts()),
|
||||
.rasterizationSamples = LiverpoolToVK::NumSamples(
|
||||
key.num_samples, instance.GetColorSampleCounts() & instance.GetDepthSampleCounts()),
|
||||
.sampleShadingEnable =
|
||||
fs_info.addr_flags.persp_sample_ena || fs_info.addr_flags.linear_sample_ena,
|
||||
};
|
||||
@@ -259,7 +259,20 @@ GraphicsPipeline::GraphicsPipeline(
|
||||
color_formats[i] = color_format;
|
||||
}
|
||||
|
||||
std::array<vk::SampleCountFlagBits, Liverpool::NumColorBuffers> color_samples;
|
||||
std::ranges::transform(key.color_samples, color_samples.begin(), [&instance](u8 num_samples) {
|
||||
return num_samples ? LiverpoolToVK::NumSamples(num_samples, instance.GetColorSampleCounts())
|
||||
: vk::SampleCountFlagBits::e1;
|
||||
});
|
||||
const vk::AttachmentSampleCountInfoAMD mixed_samples = {
|
||||
.colorAttachmentCount = key.num_color_attachments,
|
||||
.pColorAttachmentSamples = color_samples.data(),
|
||||
.depthStencilAttachmentSamples =
|
||||
LiverpoolToVK::NumSamples(key.depth_samples, instance.GetDepthSampleCounts()),
|
||||
};
|
||||
|
||||
const vk::PipelineRenderingCreateInfo pipeline_rendering_ci = {
|
||||
.pNext = instance.IsMixedDepthSamplesSupported() ? &mixed_samples : nullptr,
|
||||
.colorAttachmentCount = key.num_color_attachments,
|
||||
.pColorAttachmentFormats = color_formats.data(),
|
||||
.depthAttachmentFormat = key.z_format != Liverpool::DepthBuffer::ZFormat::Invalid
|
||||
|
||||
@@ -41,7 +41,9 @@ struct GraphicsPipelineKey {
|
||||
std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks;
|
||||
Liverpool::ColorBufferMask cb_shader_mask;
|
||||
Liverpool::ColorControl::LogicOp logic_op;
|
||||
u32 num_samples;
|
||||
u8 num_samples;
|
||||
u8 depth_samples;
|
||||
std::array<u8, Liverpool::NumColorBuffers> color_samples;
|
||||
u32 mrt_mask;
|
||||
struct {
|
||||
Liverpool::DepthBuffer::ZFormat z_format : 2;
|
||||
@@ -80,12 +82,8 @@ public:
|
||||
return fetch_shader;
|
||||
}
|
||||
|
||||
auto GetWriteMasks() const {
|
||||
return key.write_masks;
|
||||
}
|
||||
|
||||
u32 GetMrtMask() const {
|
||||
return key.mrt_mask;
|
||||
const GraphicsPipelineKey& GetGraphicsKey() const {
|
||||
return key;
|
||||
}
|
||||
|
||||
/// Gets the attributes and bindings for vertex inputs.
|
||||
|
||||
@@ -297,6 +297,8 @@ bool Instance::CreateDevice() {
|
||||
image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME);
|
||||
amd_gcn_shader = add_extension(VK_AMD_GCN_SHADER_EXTENSION_NAME);
|
||||
amd_shader_trinary_minmax = add_extension(VK_AMD_SHADER_TRINARY_MINMAX_EXTENSION_NAME);
|
||||
nv_framebuffer_mixed_samples = add_extension(VK_NV_FRAMEBUFFER_MIXED_SAMPLES_EXTENSION_NAME);
|
||||
amd_mixed_attachment_samples = add_extension(VK_AMD_MIXED_ATTACHMENT_SAMPLES_EXTENSION_NAME);
|
||||
shader_atomic_float2 = add_extension(VK_EXT_SHADER_ATOMIC_FLOAT_2_EXTENSION_NAME);
|
||||
if (shader_atomic_float2) {
|
||||
shader_atomic_float2_features =
|
||||
|
||||
@@ -239,6 +239,17 @@ public:
|
||||
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess;
|
||||
}
|
||||
|
||||
/// Returns true if VK_NV_framebuffer_mixed_samples or
|
||||
/// VK_AMD_mixed_attachment_samples is supported
|
||||
bool IsMixedDepthSamplesSupported() const {
|
||||
return nv_framebuffer_mixed_samples || amd_mixed_attachment_samples;
|
||||
}
|
||||
|
||||
/// Returns true if VK_AMD_mixed_attachment_samples is supported
|
||||
bool IsMixedAnySamplesSupported() const {
|
||||
return amd_mixed_attachment_samples;
|
||||
}
|
||||
|
||||
/// Returns true when geometry shaders are supported by the device
|
||||
bool IsGeometryStageSupported() const {
|
||||
return features.geometryShader;
|
||||
@@ -389,10 +400,14 @@ public:
|
||||
return properties.limits.maxFramebufferHeight;
|
||||
}
|
||||
|
||||
/// Returns the sample count flags supported by framebuffers.
|
||||
vk::SampleCountFlags GetFramebufferSampleCounts() const {
|
||||
return properties.limits.framebufferColorSampleCounts &
|
||||
properties.limits.framebufferDepthSampleCounts &
|
||||
/// Returns the sample count flags supported by color buffers.
|
||||
vk::SampleCountFlags GetColorSampleCounts() const {
|
||||
return properties.limits.framebufferColorSampleCounts;
|
||||
}
|
||||
|
||||
/// Returns the sample count flags supported by depth buffer.
|
||||
vk::SampleCountFlags GetDepthSampleCounts() const {
|
||||
return properties.limits.framebufferDepthSampleCounts &
|
||||
properties.limits.framebufferStencilSampleCounts;
|
||||
}
|
||||
|
||||
@@ -481,6 +496,8 @@ private:
|
||||
bool image_load_store_lod{};
|
||||
bool amd_gcn_shader{};
|
||||
bool amd_shader_trinary_minmax{};
|
||||
bool nv_framebuffer_mixed_samples{};
|
||||
bool amd_mixed_attachment_samples{};
|
||||
bool shader_atomic_float2{};
|
||||
bool workgroup_memory_explicit_layout{};
|
||||
bool portability_subset{};
|
||||
|
||||
@@ -325,6 +325,8 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||
const auto& regs = liverpool->regs;
|
||||
auto& key = graphics_key;
|
||||
|
||||
const bool db_enabled = regs.depth_buffer.DepthValid() || regs.depth_buffer.StencilValid();
|
||||
|
||||
key.z_format = regs.depth_buffer.DepthValid() ? regs.depth_buffer.z_info.format.Value()
|
||||
: Liverpool::DepthBuffer::ZFormat::Invalid;
|
||||
key.stencil_format = regs.depth_buffer.StencilValid()
|
||||
@@ -339,17 +341,17 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||
key.patch_control_points =
|
||||
regs.stage_enable.hs_en ? regs.ls_hs_config.hs_input_control_points.Value() : 0;
|
||||
key.logic_op = regs.color_control.rop3;
|
||||
key.num_samples = regs.NumSamples();
|
||||
key.depth_samples = db_enabled ? regs.depth_buffer.NumSamples() : 1;
|
||||
key.num_samples = key.depth_samples;
|
||||
key.cb_shader_mask = regs.color_shader_mask;
|
||||
|
||||
const bool skip_cb_binding =
|
||||
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable;
|
||||
|
||||
// First pass to fill render target information
|
||||
// First pass to fill render target information needed by shader recompiler
|
||||
for (s32 cb = 0; cb < Liverpool::NumColorBuffers && !skip_cb_binding; ++cb) {
|
||||
const auto& col_buf = regs.color_buffers[cb];
|
||||
const u32 target_mask = regs.color_target_mask.GetMask(cb);
|
||||
if (!col_buf || !target_mask) {
|
||||
if (!col_buf || !regs.color_target_mask.GetMask(cb)) {
|
||||
// No attachment bound or writing to it is disabled.
|
||||
continue;
|
||||
}
|
||||
@@ -362,6 +364,26 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||
.export_format = regs.color_export_format.GetFormat(cb),
|
||||
.swizzle = col_buf.Swizzle(),
|
||||
};
|
||||
}
|
||||
|
||||
// Compile and bind shader stages
|
||||
if (!RefreshGraphicsStages()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Second pass to mask out render targets not written by shader and fill remaining info
|
||||
u8 color_samples = 0;
|
||||
bool all_color_samples_same = true;
|
||||
for (s32 cb = 0; cb < key.num_color_attachments && !skip_cb_binding; ++cb) {
|
||||
const auto& col_buf = regs.color_buffers[cb];
|
||||
const u32 target_mask = regs.color_target_mask.GetMask(cb);
|
||||
if (!col_buf || !target_mask) {
|
||||
continue;
|
||||
}
|
||||
if ((key.mrt_mask & (1u << cb)) == 0) {
|
||||
key.color_buffers[cb] = {};
|
||||
continue;
|
||||
}
|
||||
|
||||
// Fill color blending information
|
||||
if (regs.blend_control[cb].enable && !col_buf.info.blend_bypass) {
|
||||
@@ -371,22 +393,21 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||
// Apply swizzle to target mask
|
||||
key.write_masks[cb] =
|
||||
vk::ColorComponentFlags{key.color_buffers[cb].swizzle.ApplyMask(target_mask)};
|
||||
|
||||
// Fill color samples
|
||||
const u8 prev_color_samples = std::exchange(color_samples, col_buf.NumSamples());
|
||||
all_color_samples_same &= color_samples == prev_color_samples || prev_color_samples == 0;
|
||||
key.color_samples[cb] = color_samples;
|
||||
key.num_samples = std::max(key.num_samples, color_samples);
|
||||
}
|
||||
|
||||
// Compile and bind shader stages
|
||||
if (!RefreshGraphicsStages()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Second pass to mask out render targets not written by fragment shader
|
||||
for (s32 cb = 0; cb < key.num_color_attachments && !skip_cb_binding; ++cb) {
|
||||
const auto& col_buf = regs.color_buffers[cb];
|
||||
if (!col_buf || !regs.color_target_mask.GetMask(cb)) {
|
||||
continue;
|
||||
}
|
||||
if ((key.mrt_mask & (1u << cb)) == 0) {
|
||||
// Attachment is bound and mask allows writes but shader does not output to it.
|
||||
key.color_buffers[cb] = {};
|
||||
// Force all color samples to match depth samples to avoid unsupported MSAA configuration
|
||||
if (color_samples != 0) {
|
||||
const bool depth_mismatch = db_enabled && color_samples != key.depth_samples;
|
||||
if (!all_color_samples_same && !instance.IsMixedAnySamplesSupported() ||
|
||||
all_color_samples_same && depth_mismatch && !instance.IsMixedDepthSamplesSupported()) {
|
||||
key.color_samples.fill(key.depth_samples);
|
||||
key.num_samples = key.depth_samples;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,26 +3,21 @@
|
||||
|
||||
#include "common/config.h"
|
||||
#include "common/debug.h"
|
||||
#include "common/elf_info.h"
|
||||
#include "common/singleton.h"
|
||||
#include "core/debug_state.h"
|
||||
#include "core/devtools/layer.h"
|
||||
#include "core/libraries/system/systemservice.h"
|
||||
#include "imgui/renderer/imgui_core.h"
|
||||
#include "imgui/renderer/imgui_impl_vulkan.h"
|
||||
#include "sdl_window.h"
|
||||
#include "video_core/renderer_vulkan/vk_platform.h"
|
||||
#include "video_core/renderer_vulkan/vk_presenter.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
#include "video_core/texture_cache/image.h"
|
||||
|
||||
#include "video_core/host_shaders/fs_tri_vert.h"
|
||||
|
||||
#include <vk_mem_alloc.h>
|
||||
|
||||
#include <imgui.h>
|
||||
|
||||
#include "common/elf_info.h"
|
||||
#include "imgui/renderer/imgui_impl_vulkan.h"
|
||||
#include <vk_mem_alloc.h>
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
@@ -291,26 +286,14 @@ static vk::Format GetFrameViewFormat(const Libraries::VideoOut::PixelFormat form
|
||||
return {};
|
||||
}
|
||||
|
||||
Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id,
|
||||
const Libraries::VideoOut::PixelFormat format, bool is_eop) {
|
||||
// Request a free presentation frame.
|
||||
Frame* Presenter::PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
|
||||
VAddr cpu_address) {
|
||||
auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address};
|
||||
const auto image_id = texture_cache.FindImage(desc);
|
||||
texture_cache.UpdateImage(image_id);
|
||||
|
||||
Frame* frame = GetRenderFrame();
|
||||
|
||||
// EOP flips are triggered from GPU thread so use the drawing scheduler to record
|
||||
// commands. Otherwise we are dealing with a CPU flip which could have arrived
|
||||
// from any guest thread. Use a separate scheduler for that.
|
||||
auto& scheduler = is_eop ? draw_scheduler : flip_scheduler;
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
|
||||
bool vk_host_markers_enabled = Config::getVkHostMarkersEnabled();
|
||||
if (vk_host_markers_enabled) {
|
||||
const auto label = fmt::format("PrepareFrameInternal:{}", image_id.index);
|
||||
cmdbuf.beginDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{
|
||||
.pLabelName = label.c_str(),
|
||||
});
|
||||
}
|
||||
|
||||
const auto frame_subresources = vk::ImageSubresourceRange{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = 0,
|
||||
@@ -319,111 +302,116 @@ Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
};
|
||||
|
||||
const auto pre_barrier =
|
||||
vk::ImageMemoryBarrier2{.srcStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eColorAttachmentRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite,
|
||||
.oldLayout = vk::ImageLayout::eUndefined,
|
||||
.newLayout = vk::ImageLayout::eColorAttachmentOptimal,
|
||||
.image = frame->image,
|
||||
.subresourceRange{frame_subresources}};
|
||||
const auto pre_barrier = vk::ImageMemoryBarrier2{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eColorAttachmentRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite,
|
||||
.oldLayout = vk::ImageLayout::eUndefined,
|
||||
.newLayout = vk::ImageLayout::eColorAttachmentOptimal,
|
||||
.image = frame->image,
|
||||
.subresourceRange{frame_subresources},
|
||||
};
|
||||
|
||||
draw_scheduler.EndRendering();
|
||||
const auto cmdbuf = draw_scheduler.CommandBuffer();
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.imageMemoryBarrierCount = 1,
|
||||
.pImageMemoryBarriers = &pre_barrier,
|
||||
});
|
||||
|
||||
VideoCore::ImageViewInfo view_info{};
|
||||
view_info.format = GetFrameViewFormat(attribute.attrib.pixel_format);
|
||||
// Exclude alpha from output frame to avoid blending with UI.
|
||||
view_info.mapping.a = vk::ComponentSwizzle::eOne;
|
||||
|
||||
auto& image = texture_cache.GetImage(image_id);
|
||||
auto image_view = *image.FindView(view_info).image_view;
|
||||
image.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits2::eShaderRead, {});
|
||||
|
||||
const vk::Extent2D image_size = {image.info.size.width, image.info.size.height};
|
||||
expected_ratio = static_cast<float>(image_size.width) / static_cast<float>(image_size.height);
|
||||
|
||||
image_view = fsr_pass.Render(cmdbuf, image_view, image_size, {frame->width, frame->height},
|
||||
fsr_settings, frame->is_hdr);
|
||||
pp_pass.Render(cmdbuf, image_view, image_size, *frame, pp_settings);
|
||||
|
||||
DebugState.game_resolution = {image_size.width, image_size.height};
|
||||
DebugState.output_resolution = {frame->width, frame->height};
|
||||
|
||||
// Flush frame creation commands.
|
||||
frame->ready_semaphore = draw_scheduler.GetMasterSemaphore()->Handle();
|
||||
frame->ready_tick = draw_scheduler.CurrentTick();
|
||||
SubmitInfo info{};
|
||||
draw_scheduler.Flush(info);
|
||||
return frame;
|
||||
}
|
||||
|
||||
Frame* Presenter::PrepareBlankFrame(bool present_thread) {
|
||||
// Request a free presentation frame.
|
||||
Frame* frame = GetRenderFrame();
|
||||
|
||||
auto& scheduler = present_thread ? present_scheduler : draw_scheduler;
|
||||
scheduler.EndRendering();
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
|
||||
constexpr vk::ImageSubresourceRange simple_subresource = {
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.levelCount = 1,
|
||||
.layerCount = 1,
|
||||
};
|
||||
const auto pre_barrier = vk::ImageMemoryBarrier2{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eColorAttachmentRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite,
|
||||
.oldLayout = vk::ImageLayout::eUndefined,
|
||||
.newLayout = vk::ImageLayout::eColorAttachmentOptimal,
|
||||
.image = frame->image,
|
||||
.subresourceRange = simple_subresource,
|
||||
};
|
||||
|
||||
const auto post_barrier = vk::ImageMemoryBarrier2{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eFragmentShader,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eShaderRead,
|
||||
.oldLayout = vk::ImageLayout::eColorAttachmentOptimal,
|
||||
.newLayout = vk::ImageLayout::eGeneral,
|
||||
.image = frame->image,
|
||||
.subresourceRange = simple_subresource,
|
||||
};
|
||||
|
||||
const vk::RenderingAttachmentInfo attachment = {
|
||||
.imageView = frame->image_view,
|
||||
.imageLayout = vk::ImageLayout::eColorAttachmentOptimal,
|
||||
.loadOp = vk::AttachmentLoadOp::eClear,
|
||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||
};
|
||||
const vk::RenderingInfo rendering_info = {
|
||||
.renderArea =
|
||||
{
|
||||
.extent = {frame->width, frame->height},
|
||||
},
|
||||
.layerCount = 1,
|
||||
.colorAttachmentCount = 1u,
|
||||
.pColorAttachments = &attachment,
|
||||
};
|
||||
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.imageMemoryBarrierCount = 1,
|
||||
.pImageMemoryBarriers = &pre_barrier,
|
||||
});
|
||||
|
||||
if (image_id != VideoCore::NULL_IMAGE_ID) {
|
||||
auto& image = texture_cache.GetImage(image_id);
|
||||
vk::Extent2D image_size = {image.info.size.width, image.info.size.height};
|
||||
float ratio = (float)image_size.width / (float)image_size.height;
|
||||
if (ratio != expected_ratio) {
|
||||
expected_ratio = ratio;
|
||||
}
|
||||
|
||||
image.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits2::eShaderRead, {},
|
||||
cmdbuf);
|
||||
|
||||
VideoCore::ImageViewInfo info{};
|
||||
info.format = GetFrameViewFormat(format);
|
||||
// Exclude alpha from output frame to avoid blending with UI.
|
||||
info.mapping = vk::ComponentMapping{
|
||||
.r = vk::ComponentSwizzle::eIdentity,
|
||||
.g = vk::ComponentSwizzle::eIdentity,
|
||||
.b = vk::ComponentSwizzle::eIdentity,
|
||||
.a = vk::ComponentSwizzle::eOne,
|
||||
};
|
||||
vk::ImageView imageView;
|
||||
if (auto view = image.FindView(info)) {
|
||||
imageView = *texture_cache.GetImageView(view).image_view;
|
||||
} else {
|
||||
imageView = *texture_cache.RegisterImageView(image_id, info).image_view;
|
||||
}
|
||||
|
||||
if (vk_host_markers_enabled) {
|
||||
cmdbuf.beginDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{
|
||||
.pLabelName = "Host/FSR",
|
||||
});
|
||||
}
|
||||
|
||||
imageView = fsr_pass.Render(cmdbuf, imageView, image_size, {frame->width, frame->height},
|
||||
fsr_settings, frame->is_hdr);
|
||||
|
||||
if (vk_host_markers_enabled) {
|
||||
cmdbuf.endDebugUtilsLabelEXT();
|
||||
cmdbuf.beginDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{
|
||||
.pLabelName = "Host/Post processing",
|
||||
});
|
||||
}
|
||||
pp_pass.Render(cmdbuf, imageView, image_size, *frame, pp_settings);
|
||||
if (vk_host_markers_enabled) {
|
||||
cmdbuf.endDebugUtilsLabelEXT();
|
||||
}
|
||||
|
||||
DebugState.game_resolution = {image_size.width, image_size.height};
|
||||
DebugState.output_resolution = {frame->width, frame->height};
|
||||
} else {
|
||||
// Fix display of garbage images on startup on some drivers
|
||||
const std::array<vk::RenderingAttachmentInfo, 1> attachments = {{
|
||||
{
|
||||
.imageView = frame->image_view,
|
||||
.imageLayout = vk::ImageLayout::eColorAttachmentOptimal,
|
||||
.loadOp = vk::AttachmentLoadOp::eClear,
|
||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||
},
|
||||
}};
|
||||
const vk::RenderingInfo rendering_info{
|
||||
.renderArea{
|
||||
.extent{frame->width, frame->height},
|
||||
},
|
||||
.layerCount = 1,
|
||||
.colorAttachmentCount = attachments.size(),
|
||||
.pColorAttachments = attachments.data(),
|
||||
};
|
||||
cmdbuf.beginRendering(rendering_info);
|
||||
cmdbuf.endRendering();
|
||||
}
|
||||
|
||||
const auto post_barrier =
|
||||
vk::ImageMemoryBarrier2{.srcStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite,
|
||||
.oldLayout = vk::ImageLayout::eColorAttachmentOptimal,
|
||||
.newLayout = vk::ImageLayout::eGeneral,
|
||||
.image = frame->image,
|
||||
.subresourceRange{frame_subresources}};
|
||||
cmdbuf.beginRendering(rendering_info);
|
||||
cmdbuf.endRendering();
|
||||
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.imageMemoryBarrierCount = 1,
|
||||
.pImageMemoryBarriers = &post_barrier,
|
||||
});
|
||||
|
||||
if (vk_host_markers_enabled) {
|
||||
cmdbuf.endDebugUtilsLabelEXT();
|
||||
}
|
||||
|
||||
// Flush frame creation commands.
|
||||
frame->ready_semaphore = scheduler.GetMasterSemaphore()->Handle();
|
||||
frame->ready_tick = scheduler.CurrentTick();
|
||||
|
||||
@@ -66,44 +66,6 @@ public:
|
||||
return window;
|
||||
}
|
||||
|
||||
Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
|
||||
VAddr cpu_address, bool is_eop) {
|
||||
auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address};
|
||||
const auto image_id = texture_cache.FindImage(desc);
|
||||
texture_cache.UpdateImage(image_id, is_eop ? nullptr : &flip_scheduler);
|
||||
return PrepareFrameInternal(image_id, attribute.attrib.pixel_format, is_eop);
|
||||
}
|
||||
|
||||
Frame* PrepareBlankFrame(bool is_eop) {
|
||||
return PrepareFrameInternal(VideoCore::NULL_IMAGE_ID,
|
||||
Libraries::VideoOut::PixelFormat::Unknown, is_eop);
|
||||
}
|
||||
|
||||
VideoCore::Image& RegisterVideoOutSurface(
|
||||
const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) {
|
||||
vo_buffers_addr.emplace_back(cpu_address);
|
||||
auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address};
|
||||
const auto image_id = texture_cache.FindImage(desc);
|
||||
auto& image = texture_cache.GetImage(image_id);
|
||||
image.usage.vo_surface = 1u;
|
||||
return image;
|
||||
}
|
||||
|
||||
bool IsVideoOutSurface(const AmdGpu::Liverpool::ColorBuffer& color_buffer) {
|
||||
return std::ranges::find_if(vo_buffers_addr, [&](VAddr vo_buffer) {
|
||||
return vo_buffer == color_buffer.Address();
|
||||
}) != vo_buffers_addr.cend();
|
||||
}
|
||||
|
||||
void Present(Frame* frame, bool is_reusing_frame = false);
|
||||
void RecreateFrame(Frame* frame, u32 width, u32 height);
|
||||
Frame* PrepareLastFrame();
|
||||
|
||||
void FlushDraw() {
|
||||
SubmitInfo info{};
|
||||
draw_scheduler.Flush(info);
|
||||
}
|
||||
|
||||
Rasterizer& GetRasterizer() const {
|
||||
return *rasterizer.get();
|
||||
}
|
||||
@@ -120,11 +82,33 @@ public:
|
||||
pp_settings.hdr = enable ? 1 : 0;
|
||||
}
|
||||
|
||||
bool IsVideoOutSurface(const AmdGpu::Liverpool::ColorBuffer& color_buffer) const {
|
||||
return std::ranges::find(vo_buffers_addr, color_buffer.Address()) != vo_buffers_addr.cend();
|
||||
}
|
||||
|
||||
VideoCore::Image& RegisterVideoOutSurface(
|
||||
const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) {
|
||||
vo_buffers_addr.emplace_back(cpu_address);
|
||||
auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address};
|
||||
const auto image_id = texture_cache.FindImage(desc);
|
||||
auto& image = texture_cache.GetImage(image_id);
|
||||
image.usage.vo_surface = 1u;
|
||||
return image;
|
||||
}
|
||||
|
||||
Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
|
||||
VAddr cpu_address);
|
||||
|
||||
Frame* PrepareBlankFrame(bool present_thread);
|
||||
|
||||
void Present(Frame* frame, bool is_reusing_frame = false);
|
||||
Frame* PrepareLastFrame();
|
||||
|
||||
private:
|
||||
Frame* PrepareFrameInternal(VideoCore::ImageId image_id,
|
||||
Libraries::VideoOut::PixelFormat format, bool is_eop = true);
|
||||
Frame* GetRenderFrame();
|
||||
|
||||
void RecreateFrame(Frame* frame, u32 width, u32 height);
|
||||
|
||||
void SetExpectedGameSize(s32 width, s32 height);
|
||||
|
||||
private:
|
||||
|
||||
@@ -107,128 +107,44 @@ bool Rasterizer::FilterDraw() {
|
||||
return true;
|
||||
}
|
||||
|
||||
RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) {
|
||||
// Prefetch color and depth buffers to let texture cache handle possible overlaps with bound
|
||||
// textures (e.g. mipgen)
|
||||
RenderState state;
|
||||
state.width = instance.GetMaxFramebufferWidth();
|
||||
state.height = instance.GetMaxFramebufferHeight();
|
||||
state.num_layers = std::numeric_limits<u32>::max();
|
||||
state.num_color_attachments = std::bit_width(mrt_mask);
|
||||
|
||||
cb_descs.clear();
|
||||
db_desc.reset();
|
||||
|
||||
void Rasterizer::PrepareRenderState(const GraphicsPipeline* pipeline) {
|
||||
// Prefetch render targets to handle overlaps with bound textures (e.g. mipgen)
|
||||
const auto& key = pipeline->GetGraphicsKey();
|
||||
const auto& regs = liverpool->regs;
|
||||
|
||||
if (regs.color_control.degamma_enable) {
|
||||
LOG_WARNING(Render_Vulkan, "Color buffers require gamma correction");
|
||||
}
|
||||
|
||||
const bool skip_cb_binding =
|
||||
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable;
|
||||
|
||||
for (s32 cb = 0; cb < state.num_color_attachments && !skip_cb_binding; ++cb) {
|
||||
for (s32 cb = 0; cb < std::bit_width(key.mrt_mask); ++cb) {
|
||||
auto& [image_id, desc] = cb_descs[cb];
|
||||
const auto& col_buf = regs.color_buffers[cb];
|
||||
if (!col_buf) {
|
||||
state.color_attachments[cb].imageView = VK_NULL_HANDLE;
|
||||
const u32 target_mask = regs.color_target_mask.GetMask(cb);
|
||||
if (skip_cb_binding || !col_buf || !target_mask || (key.mrt_mask & (1 << cb)) == 0) {
|
||||
image_id = {};
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip stale color buffers if shader doesn't output to them. Otherwise it will perform
|
||||
// an unnecessary transition and may result in state conflict if the resource is already
|
||||
// bound for reading.
|
||||
if ((mrt_mask & (1 << cb)) == 0) {
|
||||
state.color_attachments[cb].imageView = VK_NULL_HANDLE;
|
||||
continue;
|
||||
}
|
||||
|
||||
// If the color buffer is still bound but rendering to it is disabled by the target
|
||||
// mask, we need to prevent the render area from being affected by unbound render target
|
||||
// extents.
|
||||
if (!regs.color_target_mask.GetMask(cb)) {
|
||||
state.color_attachments[cb].imageView = VK_NULL_HANDLE;
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto& hint = liverpool->last_cb_extent[cb];
|
||||
auto& [image_id, desc] = cb_descs.emplace_back(std::piecewise_construct, std::tuple{},
|
||||
std::tuple{col_buf, hint});
|
||||
const auto& image_view = texture_cache.FindRenderTarget(desc);
|
||||
image_id = bound_images.emplace_back(image_view.image_id);
|
||||
std::construct_at(&desc, col_buf, hint);
|
||||
image_id = bound_images.emplace_back(texture_cache.FindImage(desc));
|
||||
auto& image = texture_cache.GetImage(image_id);
|
||||
image.binding.is_target = 1u;
|
||||
|
||||
const auto slice = image_view.info.range.base.layer;
|
||||
const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress(), slice);
|
||||
texture_cache.TouchMeta(col_buf.CmaskAddress(), slice, false);
|
||||
|
||||
const auto mip = image_view.info.range.base.level;
|
||||
state.width = std::min<u32>(state.width, std::max(image.info.size.width >> mip, 1u));
|
||||
state.height = std::min<u32>(state.height, std::max(image.info.size.height >> mip, 1u));
|
||||
state.num_layers = std::min<u32>(state.num_layers, image_view.info.range.extent.layers);
|
||||
state.color_attachments[cb] = {
|
||||
.imageView = *image_view.image_view,
|
||||
.imageLayout = vk::ImageLayout::eUndefined,
|
||||
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
|
||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||
.clearValue =
|
||||
is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{},
|
||||
};
|
||||
}
|
||||
|
||||
if ((regs.depth_control.depth_enable && regs.depth_buffer.DepthValid()) ||
|
||||
(regs.depth_control.stencil_enable && regs.depth_buffer.StencilValid())) {
|
||||
const auto htile_address = regs.depth_htile_data_base.GetAddress();
|
||||
const auto& hint = liverpool->last_db_extent;
|
||||
auto& [image_id, desc] =
|
||||
db_desc.emplace(std::piecewise_construct, std::tuple{},
|
||||
std::tuple{regs.depth_buffer, regs.depth_view, regs.depth_control,
|
||||
htile_address, hint});
|
||||
const auto& image_view = texture_cache.FindDepthTarget(desc);
|
||||
image_id = bound_images.emplace_back(image_view.image_id);
|
||||
auto& [image_id, desc] = db_desc;
|
||||
std::construct_at(&desc, regs.depth_buffer, regs.depth_view, regs.depth_control,
|
||||
htile_address, hint);
|
||||
image_id = bound_images.emplace_back(texture_cache.FindImage(desc));
|
||||
auto& image = texture_cache.GetImage(image_id);
|
||||
image.binding.is_target = 1u;
|
||||
|
||||
const auto slice = image_view.info.range.base.layer;
|
||||
const bool is_depth_clear = regs.depth_render_control.depth_clear_enable ||
|
||||
texture_cache.IsMetaCleared(htile_address, slice);
|
||||
const bool is_stencil_clear = regs.depth_render_control.stencil_clear_enable;
|
||||
ASSERT(desc.view_info.range.extent.levels == 1);
|
||||
|
||||
state.width = std::min<u32>(state.width, image.info.size.width);
|
||||
state.height = std::min<u32>(state.height, image.info.size.height);
|
||||
state.has_depth = regs.depth_buffer.DepthValid();
|
||||
state.has_stencil = regs.depth_buffer.StencilValid();
|
||||
state.num_layers = std::min<u32>(state.num_layers, image_view.info.range.extent.layers);
|
||||
if (state.has_depth) {
|
||||
state.depth_attachment = {
|
||||
.imageView = *image_view.image_view,
|
||||
.imageLayout = vk::ImageLayout::eUndefined,
|
||||
.loadOp =
|
||||
is_depth_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
|
||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||
.clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear}},
|
||||
};
|
||||
}
|
||||
if (state.has_stencil) {
|
||||
state.stencil_attachment = {
|
||||
.imageView = *image_view.image_view,
|
||||
.imageLayout = vk::ImageLayout::eUndefined,
|
||||
.loadOp =
|
||||
is_stencil_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
|
||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||
.clearValue = vk::ClearValue{.depthStencil = {.stencil = regs.stencil_clear}},
|
||||
};
|
||||
}
|
||||
texture_cache.TouchMeta(htile_address, slice, false);
|
||||
} else {
|
||||
db_desc.first = {};
|
||||
}
|
||||
|
||||
if (state.num_layers == std::numeric_limits<u32>::max()) {
|
||||
state.num_layers = 1;
|
||||
}
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets(
|
||||
@@ -253,28 +169,20 @@ void Rasterizer::EliminateFastClear() {
|
||||
return;
|
||||
}
|
||||
VideoCore::TextureCache::RenderTargetDesc desc(col_buf, liverpool->last_cb_extent[0]);
|
||||
const auto& image_view = texture_cache.FindRenderTarget(desc);
|
||||
const auto image_id = texture_cache.FindImage(desc);
|
||||
const auto& image_view = texture_cache.FindRenderTarget(image_id, desc);
|
||||
if (!texture_cache.IsMetaCleared(col_buf.CmaskAddress(), col_buf.view.slice_start)) {
|
||||
return;
|
||||
}
|
||||
for (u32 slice = col_buf.view.slice_start; slice <= col_buf.view.slice_max; ++slice) {
|
||||
texture_cache.TouchMeta(col_buf.CmaskAddress(), slice, false);
|
||||
}
|
||||
auto& image = texture_cache.GetImage(image_view.image_id);
|
||||
const vk::ImageSubresourceRange range = {
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = col_buf.view.slice_start,
|
||||
.layerCount = col_buf.view.slice_max - col_buf.view.slice_start + 1,
|
||||
};
|
||||
scheduler.EndRendering();
|
||||
auto& image = texture_cache.GetImage(image_id);
|
||||
const auto clear_value = LiverpoolToVK::ColorBufferClearValue(col_buf);
|
||||
|
||||
ScopeMarkerBegin(fmt::format("EliminateFastClear:MRT={:#x}:M={:#x}", col_buf.Address(),
|
||||
col_buf.CmaskAddress()));
|
||||
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
|
||||
scheduler.CommandBuffer().clearColorImage(image.image, image.last_state.layout,
|
||||
LiverpoolToVK::ColorBufferClearValue(col_buf).color,
|
||||
range);
|
||||
image.Clear(clear_value, desc.view_info.range);
|
||||
ScopeMarkerEnd();
|
||||
}
|
||||
|
||||
@@ -293,18 +201,20 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto state = PrepareRenderState(pipeline->GetMrtMask());
|
||||
PrepareRenderState(pipeline);
|
||||
if (!BindResources(pipeline)) {
|
||||
return;
|
||||
}
|
||||
const auto state = BeginRendering(pipeline);
|
||||
|
||||
buffer_cache.BindVertexBuffers(*pipeline);
|
||||
if (is_indexed) {
|
||||
buffer_cache.BindIndexBuffer(index_offset);
|
||||
}
|
||||
|
||||
BeginRendering(*pipeline, state);
|
||||
UpdateDynamicState(*pipeline, is_indexed);
|
||||
pipeline->BindResources(set_writes, buffer_barriers, push_data);
|
||||
UpdateDynamicState(pipeline, is_indexed);
|
||||
scheduler.BeginRendering(state);
|
||||
|
||||
const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex);
|
||||
const auto& fetch_shader = pipeline->GetFetchShader();
|
||||
@@ -339,10 +249,11 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
|
||||
return;
|
||||
}
|
||||
|
||||
auto state = PrepareRenderState(pipeline->GetMrtMask());
|
||||
PrepareRenderState(pipeline);
|
||||
if (!BindResources(pipeline)) {
|
||||
return;
|
||||
}
|
||||
const auto state = BeginRendering(pipeline);
|
||||
|
||||
buffer_cache.BindVertexBuffers(*pipeline);
|
||||
if (is_indexed) {
|
||||
@@ -358,8 +269,9 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
|
||||
std::tie(count_buffer, count_base) = buffer_cache.ObtainBuffer(count_address, 4, false);
|
||||
}
|
||||
|
||||
BeginRendering(*pipeline, state);
|
||||
UpdateDynamicState(*pipeline, is_indexed);
|
||||
pipeline->BindResources(set_writes, buffer_barriers, push_data);
|
||||
UpdateDynamicState(pipeline, is_indexed);
|
||||
scheduler.BeginRendering(state);
|
||||
|
||||
// We can safely ignore both SGPR UD indices and results of fetch shader parsing, as vertex and
|
||||
// instance offsets will be automatically applied by Vulkan from indirect args buffer.
|
||||
@@ -411,6 +323,7 @@ void Rasterizer::DispatchDirect() {
|
||||
}
|
||||
|
||||
scheduler.EndRendering();
|
||||
pipeline->BindResources(set_writes, buffer_barriers, push_data);
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle());
|
||||
@@ -434,10 +347,11 @@ void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) {
|
||||
return;
|
||||
}
|
||||
|
||||
scheduler.EndRendering();
|
||||
|
||||
const auto [buffer, base] = buffer_cache.ObtainBuffer(address + offset, size, false);
|
||||
|
||||
scheduler.EndRendering();
|
||||
pipeline->BindResources(set_writes, buffer_barriers, push_data);
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle());
|
||||
cmdbuf.dispatchIndirect(buffer->Handle(), base);
|
||||
@@ -480,7 +394,7 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
||||
|
||||
// Bind resource buffers and textures.
|
||||
Shader::Backend::Bindings binding{};
|
||||
Shader::PushData push_data = MakeUserData(liverpool->regs);
|
||||
push_data = MakeUserData(liverpool->regs);
|
||||
for (const auto* stage : pipeline->GetStages()) {
|
||||
if (!stage) {
|
||||
continue;
|
||||
@@ -505,8 +419,6 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
||||
|
||||
fault_process_pending |= uses_dma;
|
||||
|
||||
pipeline->BindResources(set_writes, buffer_barriers, push_data);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -740,7 +652,7 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
|
||||
} else {
|
||||
if (auto& old_image = texture_cache.GetImage(image_id);
|
||||
old_image.binding.needs_rebind) {
|
||||
old_image.binding.Reset(); // clean up previous image binding state
|
||||
old_image.binding = {};
|
||||
image_id = texture_cache.FindImage(desc);
|
||||
}
|
||||
|
||||
@@ -781,7 +693,7 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
|
||||
image.usage.texture |= !is_storage;
|
||||
|
||||
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view,
|
||||
image.last_state.layout);
|
||||
image.backing->state.layout);
|
||||
}
|
||||
|
||||
set_writes.push_back({
|
||||
@@ -816,55 +728,78 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
|
||||
}
|
||||
}
|
||||
|
||||
void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline, RenderState& state) {
|
||||
int cb_index = 0;
|
||||
RenderState Rasterizer::BeginRendering(const GraphicsPipeline* pipeline) {
|
||||
attachment_feedback_loop = false;
|
||||
for (auto attach_idx = 0u; attach_idx < state.num_color_attachments; ++attach_idx) {
|
||||
if (state.color_attachments[attach_idx].imageView == VK_NULL_HANDLE) {
|
||||
const auto& regs = liverpool->regs;
|
||||
const auto& key = pipeline->GetGraphicsKey();
|
||||
RenderState state;
|
||||
state.width = instance.GetMaxFramebufferWidth();
|
||||
state.height = instance.GetMaxFramebufferHeight();
|
||||
state.num_layers = std::numeric_limits<u32>::max();
|
||||
state.num_color_attachments = std::bit_width(key.mrt_mask);
|
||||
for (auto cb = 0u; cb < state.num_color_attachments; ++cb) {
|
||||
auto& [image_id, desc] = cb_descs[cb];
|
||||
if (!image_id) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto& [image_id, desc] = cb_descs[cb_index++];
|
||||
if (auto& old_img = texture_cache.GetImage(image_id); old_img.binding.needs_rebind) {
|
||||
auto& view = texture_cache.FindRenderTarget(desc);
|
||||
ASSERT(view.image_id != image_id);
|
||||
image_id = bound_images.emplace_back(view.image_id);
|
||||
auto& image = texture_cache.GetImage(view.image_id);
|
||||
state.color_attachments[attach_idx].imageView = *view.image_view;
|
||||
state.color_attachments[attach_idx].imageLayout = image.last_state.layout;
|
||||
|
||||
const auto mip = view.info.range.base.level;
|
||||
state.width = std::min<u32>(state.width, std::max(image.info.size.width >> mip, 1u));
|
||||
state.height = std::min<u32>(state.height, std::max(image.info.size.height >> mip, 1u));
|
||||
auto* image = &texture_cache.GetImage(image_id);
|
||||
if (image->binding.needs_rebind) {
|
||||
image_id = bound_images.emplace_back(texture_cache.FindImage(desc));
|
||||
image = &texture_cache.GetImage(image_id);
|
||||
}
|
||||
auto& image = texture_cache.GetImage(image_id);
|
||||
if (image.binding.is_bound) {
|
||||
ASSERT_MSG(!image.binding.force_general,
|
||||
texture_cache.UpdateImage(image_id);
|
||||
image->SetBackingSamples(key.color_samples[cb]);
|
||||
const auto& image_view = texture_cache.FindRenderTarget(image_id, desc);
|
||||
const auto slice = image_view.info.range.base.layer;
|
||||
const auto mip = image_view.info.range.base.level;
|
||||
|
||||
const auto& col_buf = regs.color_buffers[cb];
|
||||
const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress(), slice);
|
||||
texture_cache.TouchMeta(col_buf.CmaskAddress(), slice, false);
|
||||
|
||||
if (image->binding.is_bound) {
|
||||
ASSERT_MSG(!image->binding.force_general,
|
||||
"Having image both as storage and render target is unsupported");
|
||||
image.Transit(instance.IsAttachmentFeedbackLoopLayoutSupported()
|
||||
? vk::ImageLayout::eAttachmentFeedbackLoopOptimalEXT
|
||||
: vk::ImageLayout::eGeneral,
|
||||
vk::AccessFlagBits2::eColorAttachmentWrite, {});
|
||||
image->Transit(instance.IsAttachmentFeedbackLoopLayoutSupported()
|
||||
? vk::ImageLayout::eAttachmentFeedbackLoopOptimalEXT
|
||||
: vk::ImageLayout::eGeneral,
|
||||
vk::AccessFlagBits2::eColorAttachmentWrite, {});
|
||||
attachment_feedback_loop = true;
|
||||
} else {
|
||||
image.Transit(vk::ImageLayout::eColorAttachmentOptimal,
|
||||
vk::AccessFlagBits2::eColorAttachmentWrite |
|
||||
vk::AccessFlagBits2::eColorAttachmentRead,
|
||||
desc.view_info.range);
|
||||
image->Transit(vk::ImageLayout::eColorAttachmentOptimal,
|
||||
vk::AccessFlagBits2::eColorAttachmentWrite |
|
||||
vk::AccessFlagBits2::eColorAttachmentRead,
|
||||
desc.view_info.range);
|
||||
}
|
||||
image.usage.render_target = 1u;
|
||||
state.color_attachments[attach_idx].imageLayout = image.last_state.layout;
|
||||
|
||||
state.width = std::min<u32>(state.width, std::max(image->info.size.width >> mip, 1u));
|
||||
state.height = std::min<u32>(state.height, std::max(image->info.size.height >> mip, 1u));
|
||||
state.num_layers = std::min<u32>(state.num_layers, image_view.info.range.extent.layers);
|
||||
state.color_attachments[cb] = {
|
||||
.imageView = *image_view.image_view,
|
||||
.imageLayout = image->backing->state.layout,
|
||||
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
|
||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||
.clearValue =
|
||||
is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{},
|
||||
};
|
||||
image->usage.render_target = 1u;
|
||||
}
|
||||
|
||||
if (db_desc) {
|
||||
const auto& image_id = std::get<0>(*db_desc);
|
||||
const auto& desc = std::get<1>(*db_desc);
|
||||
if (auto image_id = db_desc.first; image_id) {
|
||||
auto& desc = db_desc.second;
|
||||
const auto htile_address = regs.depth_htile_data_base.GetAddress();
|
||||
const auto& image_view = texture_cache.FindDepthTarget(image_id, desc);
|
||||
auto& image = texture_cache.GetImage(image_id);
|
||||
ASSERT(image.binding.needs_rebind == 0);
|
||||
const bool has_stencil = image.usage.stencil;
|
||||
if (has_stencil) {
|
||||
image.aspect_mask |= vk::ImageAspectFlagBits::eStencil;
|
||||
}
|
||||
|
||||
const auto slice = image_view.info.range.base.layer;
|
||||
const bool is_depth_clear = regs.depth_render_control.depth_clear_enable ||
|
||||
texture_cache.IsMetaCleared(htile_address, slice);
|
||||
const bool is_stencil_clear = regs.depth_render_control.stencil_clear_enable;
|
||||
texture_cache.TouchMeta(htile_address, slice, false);
|
||||
ASSERT(desc.view_info.range.extent.levels == 1 && !image.binding.needs_rebind);
|
||||
|
||||
const bool has_stencil = image.info.props.has_stencil;
|
||||
const auto new_layout = desc.view_info.is_storage
|
||||
? has_stencil ? vk::ImageLayout::eDepthStencilAttachmentOptimal
|
||||
: vk::ImageLayout::eDepthAttachmentOptimal
|
||||
@@ -874,13 +809,41 @@ void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline, RenderState& s
|
||||
vk::AccessFlagBits2::eDepthStencilAttachmentWrite |
|
||||
vk::AccessFlagBits2::eDepthStencilAttachmentRead,
|
||||
desc.view_info.range);
|
||||
state.depth_attachment.imageLayout = image.last_state.layout;
|
||||
state.stencil_attachment.imageLayout = image.last_state.layout;
|
||||
|
||||
state.width = std::min<u32>(state.width, image.info.size.width);
|
||||
state.height = std::min<u32>(state.height, image.info.size.height);
|
||||
state.has_depth = regs.depth_buffer.DepthValid();
|
||||
state.has_stencil = regs.depth_buffer.StencilValid();
|
||||
state.num_layers = std::min<u32>(state.num_layers, image_view.info.range.extent.layers);
|
||||
if (state.has_depth) {
|
||||
state.depth_attachment = {
|
||||
.imageView = *image_view.image_view,
|
||||
.imageLayout = image.backing->state.layout,
|
||||
.loadOp =
|
||||
is_depth_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
|
||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||
.clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear}},
|
||||
};
|
||||
}
|
||||
if (state.has_stencil) {
|
||||
state.stencil_attachment = {
|
||||
.imageView = *image_view.image_view,
|
||||
.imageLayout = image.backing->state.layout,
|
||||
.loadOp =
|
||||
is_stencil_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
|
||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||
.clearValue = vk::ClearValue{.depthStencil = {.stencil = regs.stencil_clear}},
|
||||
};
|
||||
}
|
||||
|
||||
image.usage.depth_target = true;
|
||||
image.usage.stencil = has_stencil;
|
||||
}
|
||||
|
||||
scheduler.BeginRendering(state);
|
||||
if (state.num_layers == std::numeric_limits<u32>::max()) {
|
||||
state.num_layers = 1;
|
||||
}
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
void Rasterizer::Resolve() {
|
||||
@@ -904,66 +867,7 @@ void Rasterizer::Resolve() {
|
||||
ScopeMarkerBegin(fmt::format("Resolve:MRT0={:#x}:MRT1={:#x}",
|
||||
liverpool->regs.color_buffers[0].Address(),
|
||||
liverpool->regs.color_buffers[1].Address()));
|
||||
|
||||
mrt0_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead,
|
||||
mrt0_range);
|
||||
mrt1_image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite,
|
||||
mrt1_range);
|
||||
|
||||
if (mrt0_image.info.num_samples == 1) {
|
||||
// Vulkan does not allow resolve from a single sample image, so change it to a copy.
|
||||
// Note that resolving a single-sampled image doesn't really make sense, but a game might do
|
||||
// it.
|
||||
vk::ImageCopy region = {
|
||||
.srcSubresource =
|
||||
{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = mrt0_range.base.layer,
|
||||
.layerCount = mrt0_range.extent.layers,
|
||||
},
|
||||
.srcOffset = {0, 0, 0},
|
||||
.dstSubresource =
|
||||
{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = mrt1_range.base.layer,
|
||||
.layerCount = mrt1_range.extent.layers,
|
||||
},
|
||||
.dstOffset = {0, 0, 0},
|
||||
.extent = {mrt1_image.info.size.width, mrt1_image.info.size.height, 1},
|
||||
};
|
||||
scheduler.CommandBuffer().copyImage(mrt0_image.image, vk::ImageLayout::eTransferSrcOptimal,
|
||||
mrt1_image.image, vk::ImageLayout::eTransferDstOptimal,
|
||||
region);
|
||||
} else {
|
||||
vk::ImageResolve region = {
|
||||
.srcSubresource =
|
||||
{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = mrt0_range.base.layer,
|
||||
.layerCount = mrt0_range.extent.layers,
|
||||
},
|
||||
.srcOffset = {0, 0, 0},
|
||||
.dstSubresource =
|
||||
{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = mrt1_range.base.layer,
|
||||
.layerCount = mrt1_range.extent.layers,
|
||||
},
|
||||
.dstOffset = {0, 0, 0},
|
||||
.extent = {mrt1_image.info.size.width, mrt1_image.info.size.height, 1},
|
||||
};
|
||||
scheduler.CommandBuffer().resolveImage(
|
||||
mrt0_image.image, vk::ImageLayout::eTransferSrcOptimal, mrt1_image.image,
|
||||
vk::ImageLayout::eTransferDstOptimal, region);
|
||||
}
|
||||
|
||||
mrt1_image.flags |= VideoCore::ImageFlagBits::GpuModified;
|
||||
mrt1_image.flags &= ~VideoCore::ImageFlagBits::Dirty;
|
||||
|
||||
mrt1_image.Resolve(mrt0_image, mrt0_range, mrt1_range);
|
||||
ScopeMarkerEnd();
|
||||
}
|
||||
|
||||
@@ -1020,9 +924,9 @@ void Rasterizer::DepthStencilCopy(bool is_depth, bool is_stencil) {
|
||||
.dstOffset = {0, 0, 0},
|
||||
.extent = {write_image.info.size.width, write_image.info.size.height, 1},
|
||||
};
|
||||
scheduler.CommandBuffer().copyImage(read_image.image, vk::ImageLayout::eTransferSrcOptimal,
|
||||
write_image.image, vk::ImageLayout::eTransferDstOptimal,
|
||||
region);
|
||||
scheduler.CommandBuffer().copyImage(read_image.GetImage(), vk::ImageLayout::eTransferSrcOptimal,
|
||||
write_image.GetImage(),
|
||||
vk::ImageLayout::eTransferDstOptimal, region);
|
||||
|
||||
ScopeMarkerEnd();
|
||||
}
|
||||
@@ -1090,18 +994,14 @@ void Rasterizer::UnmapMemory(VAddr addr, u64 size) {
|
||||
}
|
||||
}
|
||||
|
||||
void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline, const bool is_indexed) const {
|
||||
void Rasterizer::UpdateDynamicState(const GraphicsPipeline* pipeline, const bool is_indexed) const {
|
||||
UpdateViewportScissorState();
|
||||
UpdateDepthStencilState();
|
||||
UpdatePrimitiveState(is_indexed);
|
||||
UpdateRasterizationState();
|
||||
UpdateColorBlendingState(pipeline);
|
||||
|
||||
auto& dynamic_state = scheduler.GetDynamicState();
|
||||
dynamic_state.SetBlendConstants(liverpool->regs.blend_constants);
|
||||
dynamic_state.SetColorWriteMasks(pipeline.GetWriteMasks());
|
||||
dynamic_state.SetAttachmentFeedbackLoopEnabled(attachment_feedback_loop);
|
||||
|
||||
// Commit new dynamic state to the command buffer.
|
||||
dynamic_state.Commit(instance, scheduler.CommandBuffer());
|
||||
}
|
||||
|
||||
@@ -1320,6 +1220,14 @@ void Rasterizer::UpdateRasterizationState() const {
|
||||
dynamic_state.SetLineWidth(regs.line_control.Width());
|
||||
}
|
||||
|
||||
void Rasterizer::UpdateColorBlendingState(const GraphicsPipeline* pipeline) const {
|
||||
const auto& regs = liverpool->regs;
|
||||
auto& dynamic_state = scheduler.GetDynamicState();
|
||||
dynamic_state.SetBlendConstants(regs.blend_constants);
|
||||
dynamic_state.SetColorWriteMasks(pipeline->GetGraphicsKey().write_masks);
|
||||
dynamic_state.SetAttachmentFeedbackLoopEnabled(attachment_feedback_loop);
|
||||
}
|
||||
|
||||
void Rasterizer::ScopeMarkerBegin(const std::string_view& str, bool from_guest) {
|
||||
if ((from_guest && !Config::getVkGuestMarkersEnabled()) ||
|
||||
(!from_guest && !Config::getVkHostMarkersEnabled())) {
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <shared_mutex>
|
||||
#include "common/recursive_lock.h"
|
||||
#include "common/shared_first_mutex.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
@@ -84,29 +83,29 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
RenderState PrepareRenderState(u32 mrt_mask);
|
||||
void BeginRendering(const GraphicsPipeline& pipeline, RenderState& state);
|
||||
void PrepareRenderState(const GraphicsPipeline* pipeline);
|
||||
RenderState BeginRendering(const GraphicsPipeline* pipeline);
|
||||
void Resolve();
|
||||
void DepthStencilCopy(bool is_depth, bool is_stencil);
|
||||
void EliminateFastClear();
|
||||
|
||||
void UpdateDynamicState(const GraphicsPipeline& pipeline, bool is_indexed) const;
|
||||
void UpdateDynamicState(const GraphicsPipeline* pipeline, bool is_indexed) const;
|
||||
void UpdateViewportScissorState() const;
|
||||
void UpdateDepthStencilState() const;
|
||||
void UpdatePrimitiveState(bool is_indexed) const;
|
||||
void UpdateRasterizationState() const;
|
||||
void UpdateColorBlendingState(const GraphicsPipeline* pipeline) const;
|
||||
|
||||
bool FilterDraw();
|
||||
|
||||
void BindBuffers(const Shader::Info& stage, Shader::Backend::Bindings& binding,
|
||||
Shader::PushData& push_data);
|
||||
|
||||
void BindTextures(const Shader::Info& stage, Shader::Backend::Bindings& binding);
|
||||
|
||||
bool BindResources(const Pipeline* pipeline);
|
||||
|
||||
void ResetBindings() {
|
||||
for (auto& image_id : bound_images) {
|
||||
texture_cache.GetImage(image_id).binding.Reset();
|
||||
texture_cache.GetImage(image_id).binding = {};
|
||||
}
|
||||
bound_images.clear();
|
||||
}
|
||||
@@ -128,16 +127,17 @@ private:
|
||||
Common::SharedFirstMutex mapped_ranges_mutex;
|
||||
PipelineCache pipeline_cache;
|
||||
|
||||
boost::container::static_vector<
|
||||
std::pair<VideoCore::ImageId, VideoCore::TextureCache::RenderTargetDesc>, 8>
|
||||
cb_descs;
|
||||
std::optional<std::pair<VideoCore::ImageId, VideoCore::TextureCache::DepthTargetDesc>> db_desc;
|
||||
using RenderTargetInfo =
|
||||
std::pair<VideoCore::ImageId, VideoCore::TextureCache::RenderTargetDesc>;
|
||||
std::array<RenderTargetInfo, Liverpool::NumColorBuffers> cb_descs;
|
||||
std::pair<VideoCore::ImageId, VideoCore::TextureCache::DepthTargetDesc> db_desc;
|
||||
boost::container::static_vector<vk::DescriptorImageInfo, Shader::NumImages> image_infos;
|
||||
boost::container::static_vector<vk::DescriptorBufferInfo, Shader::NumBuffers> buffer_infos;
|
||||
boost::container::static_vector<VideoCore::ImageId, Shader::NumImages> bound_images;
|
||||
|
||||
Pipeline::DescriptorWrites set_writes;
|
||||
Pipeline::BufferBarriers buffer_barriers;
|
||||
Shader::PushData push_data;
|
||||
|
||||
using BufferBindingInfo = std::tuple<VideoCore::BufferId, AmdGpu::Buffer, u64>;
|
||||
boost::container::static_vector<BufferBindingInfo, Shader::NumBuffers> buffer_bindings;
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
#include <cstddef>
|
||||
#include <optional>
|
||||
#include "common/assert.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_pool.h"
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
|
||||
#include "video_core/host_shaders/color_to_ms_depth_frag.h"
|
||||
#include "video_core/host_shaders/fs_tri_vert.h"
|
||||
#include "video_core/host_shaders/ms_image_blit_frag.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
@@ -35,19 +36,23 @@ BlitHelper::BlitHelper(const Vulkan::Instance& instance_, Vulkan::Scheduler& sch
|
||||
CreatePipelineLayouts();
|
||||
}
|
||||
|
||||
BlitHelper::~BlitHelper() = default;
|
||||
|
||||
void BlitHelper::BlitColorToMsDepth(Image& source, Image& dest) {
|
||||
source.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits2::eShaderRead, {});
|
||||
dest.Transit(vk::ImageLayout::eDepthAttachmentOptimal,
|
||||
vk::AccessFlagBits2::eDepthStencilAttachmentWrite, {});
|
||||
BlitHelper::~BlitHelper() {
|
||||
const auto device = instance.GetDevice();
|
||||
device.destroy(fs_tri_vertex);
|
||||
device.destroy(color_to_ms_depth_frag);
|
||||
device.destroy(src_msaa_copy_frag);
|
||||
device.destroy(src_non_msaa_copy_frag);
|
||||
}
|
||||
|
||||
void BlitHelper::ReinterpretColorAsMsDepth(u32 width, u32 height, u32 num_samples,
|
||||
vk::Format src_pixel_format, vk::Format dst_pixel_format,
|
||||
vk::Image source, vk::Image dest) {
|
||||
const vk::ImageViewUsageCreateInfo color_usage_ci{.usage = vk::ImageUsageFlagBits::eSampled};
|
||||
const vk::ImageViewCreateInfo color_view_ci = {
|
||||
.pNext = &color_usage_ci,
|
||||
.image = source.image,
|
||||
.image = source,
|
||||
.viewType = vk::ImageViewType::e2D,
|
||||
.format = source.info.pixel_format,
|
||||
.format = src_pixel_format,
|
||||
.subresourceRange{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = 0U,
|
||||
@@ -64,9 +69,9 @@ void BlitHelper::BlitColorToMsDepth(Image& source, Image& dest) {
|
||||
.usage = vk::ImageUsageFlagBits::eDepthStencilAttachment};
|
||||
const vk::ImageViewCreateInfo depth_view_ci = {
|
||||
.pNext = &depth_usage_ci,
|
||||
.image = dest.image,
|
||||
.image = dest,
|
||||
.viewType = vk::ImageViewType::e2D,
|
||||
.format = dest.info.pixel_format,
|
||||
.format = dst_pixel_format,
|
||||
.subresourceRange{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eDepth,
|
||||
.baseMipLevel = 0U,
|
||||
@@ -86,8 +91,8 @@ void BlitHelper::BlitColorToMsDepth(Image& source, Image& dest) {
|
||||
|
||||
Vulkan::RenderState state{};
|
||||
state.has_depth = true;
|
||||
state.width = dest.info.size.width;
|
||||
state.height = dest.info.size.height;
|
||||
state.width = width;
|
||||
state.height = height;
|
||||
state.depth_attachment = vk::RenderingAttachmentInfo{
|
||||
.imageView = depth_view,
|
||||
.imageLayout = vk::ImageLayout::eDepthAttachmentOptimal,
|
||||
@@ -114,9 +119,13 @@ void BlitHelper::BlitColorToMsDepth(Image& source, Image& dest) {
|
||||
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *single_texture_pl_layout, 0U,
|
||||
texture_write);
|
||||
|
||||
const DepthPipelineKey key{dest.info.num_samples, dest.info.pixel_format};
|
||||
const vk::Pipeline depth_pipeline = GetDepthToMsPipeline(key);
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, depth_pipeline);
|
||||
const MsPipelineKey key{num_samples, dst_pixel_format, false};
|
||||
auto it = std::ranges::find(color_to_ms_depth_pl, key, &MsPipeline::first);
|
||||
if (it == color_to_ms_depth_pl.end()) {
|
||||
CreateColorToMSDepthPipeline(key);
|
||||
it = --color_to_ms_depth_pl.end();
|
||||
}
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, *it->second);
|
||||
|
||||
const vk::Viewport viewport = {
|
||||
.x = 0,
|
||||
@@ -136,24 +145,122 @@ void BlitHelper::BlitColorToMsDepth(Image& source, Image& dest) {
|
||||
|
||||
cmdbuf.draw(3, 1, 0, 0);
|
||||
|
||||
scheduler.EndRendering();
|
||||
scheduler.GetDynamicState().Invalidate();
|
||||
}
|
||||
|
||||
vk::Pipeline BlitHelper::GetDepthToMsPipeline(const DepthPipelineKey& key) {
|
||||
auto it = std::ranges::find(color_to_ms_depth_pl, key, &DepthPipeline::first);
|
||||
if (it != color_to_ms_depth_pl.end()) {
|
||||
return *it->second;
|
||||
void BlitHelper::CopyBetweenMsImages(u32 width, u32 height, u32 num_samples,
|
||||
vk::Format pixel_format, bool src_msaa, vk::Image source,
|
||||
vk::Image dest) {
|
||||
const vk::ImageViewUsageCreateInfo src_usage_ci{.usage = vk::ImageUsageFlagBits::eSampled};
|
||||
const vk::ImageViewCreateInfo src_view_ci = {
|
||||
.pNext = &src_usage_ci,
|
||||
.image = source,
|
||||
.viewType = vk::ImageViewType::e2D,
|
||||
.format = pixel_format,
|
||||
.subresourceRange{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = 0U,
|
||||
.levelCount = 1U,
|
||||
.baseArrayLayer = 0U,
|
||||
.layerCount = 1U,
|
||||
},
|
||||
};
|
||||
const auto [src_view_result, src_view] = instance.GetDevice().createImageView(src_view_ci);
|
||||
ASSERT_MSG(src_view_result == vk::Result::eSuccess, "Failed to create image view: {}",
|
||||
vk::to_string(src_view_result));
|
||||
|
||||
const vk::ImageViewUsageCreateInfo dst_usage_ci{.usage =
|
||||
vk::ImageUsageFlagBits::eColorAttachment};
|
||||
const vk::ImageViewCreateInfo dst_view_ci = {
|
||||
.pNext = &dst_usage_ci,
|
||||
.image = dest,
|
||||
.viewType = vk::ImageViewType::e2D,
|
||||
.format = pixel_format,
|
||||
.subresourceRange{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = 0U,
|
||||
.levelCount = 1U,
|
||||
.baseArrayLayer = 0U,
|
||||
.layerCount = 1U,
|
||||
},
|
||||
};
|
||||
const auto [dst_view_result, dst_view] = instance.GetDevice().createImageView(dst_view_ci);
|
||||
ASSERT_MSG(dst_view_result == vk::Result::eSuccess, "Failed to create image view: {}",
|
||||
vk::to_string(dst_view_result));
|
||||
scheduler.DeferOperation([device = instance.GetDevice(), src_view, dst_view] {
|
||||
device.destroyImageView(src_view);
|
||||
device.destroyImageView(dst_view);
|
||||
});
|
||||
|
||||
Vulkan::RenderState state{};
|
||||
state.width = width;
|
||||
state.height = height;
|
||||
state.color_attachments[state.num_color_attachments++] = vk::RenderingAttachmentInfo{
|
||||
.imageView = dst_view,
|
||||
.imageLayout = vk::ImageLayout::eColorAttachmentOptimal,
|
||||
.loadOp = vk::AttachmentLoadOp::eDontCare,
|
||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||
};
|
||||
scheduler.BeginRendering(state);
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
const vk::DescriptorImageInfo image_info = {
|
||||
.sampler = VK_NULL_HANDLE,
|
||||
.imageView = src_view,
|
||||
.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal,
|
||||
};
|
||||
const vk::WriteDescriptorSet texture_write = {
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = 0U,
|
||||
.dstArrayElement = 0U,
|
||||
.descriptorCount = 1U,
|
||||
.descriptorType = vk::DescriptorType::eSampledImage,
|
||||
.pImageInfo = &image_info,
|
||||
};
|
||||
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *single_texture_pl_layout, 0U,
|
||||
texture_write);
|
||||
|
||||
const MsPipelineKey key{num_samples, pixel_format, src_msaa};
|
||||
auto it = std::ranges::find(ms_image_copy_pl, key, &MsPipeline::first);
|
||||
if (it == ms_image_copy_pl.end()) {
|
||||
CreateMsCopyPipeline(key);
|
||||
it = --ms_image_copy_pl.end();
|
||||
}
|
||||
CreateColorToMSDepthPipeline(key);
|
||||
return *color_to_ms_depth_pl.back().second;
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, *it->second);
|
||||
|
||||
const vk::Viewport viewport = {
|
||||
.x = 0,
|
||||
.y = 0,
|
||||
.width = float(state.width),
|
||||
.height = float(state.height),
|
||||
.minDepth = 0.f,
|
||||
.maxDepth = 1.f,
|
||||
};
|
||||
cmdbuf.setViewportWithCount(viewport);
|
||||
|
||||
const vk::Rect2D scissor = {
|
||||
.offset = {0, 0},
|
||||
.extent = {state.width, state.height},
|
||||
};
|
||||
cmdbuf.setScissorWithCount(scissor);
|
||||
|
||||
cmdbuf.draw(3, 1, 0, 0);
|
||||
|
||||
scheduler.EndRendering();
|
||||
scheduler.GetDynamicState().Invalidate();
|
||||
}
|
||||
|
||||
void BlitHelper::CreateShaders() {
|
||||
fs_tri_vertex = Vulkan::Compile(HostShaders::FS_TRI_VERT, vk::ShaderStageFlagBits::eVertex,
|
||||
instance.GetDevice());
|
||||
color_to_ms_depth_frag =
|
||||
Vulkan::Compile(HostShaders::COLOR_TO_MS_DEPTH_FRAG, vk::ShaderStageFlagBits::eFragment,
|
||||
instance.GetDevice());
|
||||
const auto device = instance.GetDevice();
|
||||
fs_tri_vertex =
|
||||
Vulkan::Compile(HostShaders::FS_TRI_VERT, vk::ShaderStageFlagBits::eVertex, device);
|
||||
color_to_ms_depth_frag = Vulkan::Compile(HostShaders::COLOR_TO_MS_DEPTH_FRAG,
|
||||
vk::ShaderStageFlagBits::eFragment, device);
|
||||
src_msaa_copy_frag = Vulkan::Compile(HostShaders::MS_IMAGE_BLIT_FRAG,
|
||||
vk::ShaderStageFlagBits::eFragment, device, {"SRC_MSAA"});
|
||||
src_non_msaa_copy_frag = Vulkan::Compile(HostShaders::MS_IMAGE_BLIT_FRAG,
|
||||
vk::ShaderStageFlagBits::eFragment, device);
|
||||
}
|
||||
|
||||
void BlitHelper::CreatePipelineLayouts() {
|
||||
@@ -186,7 +293,7 @@ void BlitHelper::CreatePipelineLayouts() {
|
||||
single_texture_pl_layout = std::move(pipeline_layout);
|
||||
}
|
||||
|
||||
void BlitHelper::CreateColorToMSDepthPipeline(const DepthPipelineKey& key) {
|
||||
void BlitHelper::CreateColorToMSDepthPipeline(const MsPipelineKey& key) {
|
||||
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
|
||||
.topology = vk::PrimitiveTopology::eTriangleList,
|
||||
};
|
||||
@@ -220,7 +327,7 @@ void BlitHelper::CreateColorToMSDepthPipeline(const DepthPipelineKey& key) {
|
||||
const vk::PipelineRenderingCreateInfo pipeline_rendering_ci = {
|
||||
.colorAttachmentCount = 0U,
|
||||
.pColorAttachmentFormats = nullptr,
|
||||
.depthAttachmentFormat = key.depth_format,
|
||||
.depthAttachmentFormat = key.attachment_format,
|
||||
.stencilAttachmentFormat = vk::Format::eUndefined,
|
||||
};
|
||||
|
||||
@@ -253,4 +360,83 @@ void BlitHelper::CreateColorToMSDepthPipeline(const DepthPipelineKey& key) {
|
||||
color_to_ms_depth_pl.emplace_back(key, std::move(pipeline));
|
||||
}
|
||||
|
||||
void BlitHelper::CreateMsCopyPipeline(const MsPipelineKey& key) {
|
||||
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
|
||||
.topology = vk::PrimitiveTopology::eTriangleList,
|
||||
};
|
||||
const vk::PipelineMultisampleStateCreateInfo multisampling = {
|
||||
.rasterizationSamples = ToSampleCount(key.num_samples),
|
||||
};
|
||||
const vk::PipelineDepthStencilStateCreateInfo depth_state = {
|
||||
.depthTestEnable = false,
|
||||
.depthWriteEnable = false,
|
||||
.depthCompareOp = vk::CompareOp::eAlways,
|
||||
};
|
||||
const std::array dynamic_states = {vk::DynamicState::eViewportWithCount,
|
||||
vk::DynamicState::eScissorWithCount};
|
||||
const vk::PipelineDynamicStateCreateInfo dynamic_info = {
|
||||
.dynamicStateCount = static_cast<u32>(dynamic_states.size()),
|
||||
.pDynamicStates = dynamic_states.data(),
|
||||
};
|
||||
|
||||
std::array<vk::PipelineShaderStageCreateInfo, 2> shader_stages;
|
||||
shader_stages[0] = {
|
||||
.stage = vk::ShaderStageFlagBits::eVertex,
|
||||
.module = fs_tri_vertex,
|
||||
.pName = "main",
|
||||
};
|
||||
shader_stages[1] = {
|
||||
.stage = vk::ShaderStageFlagBits::eFragment,
|
||||
.module = key.src_msaa ? src_msaa_copy_frag : src_non_msaa_copy_frag,
|
||||
.pName = "main",
|
||||
};
|
||||
|
||||
const vk::PipelineRenderingCreateInfo pipeline_rendering_ci = {
|
||||
.colorAttachmentCount = 1u,
|
||||
.pColorAttachmentFormats = &key.attachment_format,
|
||||
.depthAttachmentFormat = vk::Format::eUndefined,
|
||||
.stencilAttachmentFormat = vk::Format::eUndefined,
|
||||
};
|
||||
|
||||
const vk::PipelineColorBlendAttachmentState attachment = {
|
||||
.blendEnable = false,
|
||||
.colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG |
|
||||
vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA,
|
||||
};
|
||||
|
||||
const vk::PipelineColorBlendStateCreateInfo color_blending = {
|
||||
.logicOpEnable = false,
|
||||
.logicOp = vk::LogicOp::eCopy,
|
||||
.attachmentCount = 1u,
|
||||
.pAttachments = &attachment,
|
||||
};
|
||||
const vk::PipelineViewportStateCreateInfo viewport_info{};
|
||||
const vk::PipelineVertexInputStateCreateInfo vertex_input_info{};
|
||||
const vk::PipelineRasterizationStateCreateInfo raster_state{.lineWidth = 1.f};
|
||||
|
||||
const vk::GraphicsPipelineCreateInfo pipeline_info = {
|
||||
.pNext = &pipeline_rendering_ci,
|
||||
.stageCount = static_cast<u32>(shader_stages.size()),
|
||||
.pStages = shader_stages.data(),
|
||||
.pVertexInputState = &vertex_input_info,
|
||||
.pInputAssemblyState = &input_assembly,
|
||||
.pViewportState = &viewport_info,
|
||||
.pRasterizationState = &raster_state,
|
||||
.pMultisampleState = &multisampling,
|
||||
.pDepthStencilState = &depth_state,
|
||||
.pColorBlendState = &color_blending,
|
||||
.pDynamicState = &dynamic_info,
|
||||
.layout = *single_texture_pl_layout,
|
||||
};
|
||||
|
||||
auto [pipeline_result, pipeline] =
|
||||
instance.GetDevice().createGraphicsPipelineUnique(VK_NULL_HANDLE, pipeline_info);
|
||||
ASSERT_MSG(pipeline_result == vk::Result::eSuccess, "Failed to create graphics pipeline: {}",
|
||||
vk::to_string(pipeline_result));
|
||||
Vulkan::SetObjectName(instance.GetDevice(), *pipeline, "Non MS Image to MS Image {}",
|
||||
key.num_samples);
|
||||
|
||||
ms_image_copy_pl.emplace_back(key, std::move(pipeline));
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
||||
|
||||
@@ -17,6 +17,7 @@ namespace VideoCore {
|
||||
|
||||
class Image;
|
||||
class ImageView;
|
||||
struct ImageInfo;
|
||||
|
||||
class BlitHelper {
|
||||
static constexpr size_t MaxMsPipelines = 6;
|
||||
@@ -25,20 +26,26 @@ public:
|
||||
explicit BlitHelper(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler);
|
||||
~BlitHelper();
|
||||
|
||||
void BlitColorToMsDepth(Image& source, Image& dest);
|
||||
void ReinterpretColorAsMsDepth(u32 width, u32 height, u32 num_samples,
|
||||
vk::Format src_pixel_format, vk::Format dst_pixel_format,
|
||||
vk::Image source, vk::Image dest);
|
||||
|
||||
void CopyBetweenMsImages(u32 width, u32 height, u32 num_samples, vk::Format pixel_format,
|
||||
bool src_msaa, vk::Image source, vk::Image dest);
|
||||
|
||||
private:
|
||||
void CreateShaders();
|
||||
void CreatePipelineLayouts();
|
||||
|
||||
struct DepthPipelineKey {
|
||||
struct MsPipelineKey {
|
||||
u32 num_samples;
|
||||
vk::Format depth_format;
|
||||
vk::Format attachment_format;
|
||||
bool src_msaa;
|
||||
|
||||
auto operator<=>(const DepthPipelineKey&) const noexcept = default;
|
||||
auto operator<=>(const MsPipelineKey&) const noexcept = default;
|
||||
};
|
||||
vk::Pipeline GetDepthToMsPipeline(const DepthPipelineKey& key);
|
||||
void CreateColorToMSDepthPipeline(const DepthPipelineKey& key);
|
||||
void CreateColorToMSDepthPipeline(const MsPipelineKey& key);
|
||||
void CreateMsCopyPipeline(const MsPipelineKey& key);
|
||||
|
||||
private:
|
||||
const Vulkan::Instance& instance;
|
||||
@@ -47,9 +54,12 @@ private:
|
||||
vk::UniquePipelineLayout single_texture_pl_layout;
|
||||
vk::ShaderModule fs_tri_vertex;
|
||||
vk::ShaderModule color_to_ms_depth_frag;
|
||||
vk::ShaderModule src_msaa_copy_frag;
|
||||
vk::ShaderModule src_non_msaa_copy_frag;
|
||||
|
||||
using DepthPipeline = std::pair<DepthPipelineKey, vk::UniquePipeline>;
|
||||
std::vector<DepthPipeline> color_to_ms_depth_pl{};
|
||||
using MsPipeline = std::pair<MsPipelineKey, vk::UniquePipeline>;
|
||||
std::vector<MsPipeline> color_to_ms_depth_pl;
|
||||
std::vector<MsPipeline> ms_image_copy_pl;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/texture_cache/blit_helper.h"
|
||||
#include "video_core/texture_cache/image.h"
|
||||
|
||||
#include <vk_mem_alloc.h>
|
||||
@@ -75,11 +76,6 @@ static vk::FormatFeatureFlags2 FormatFeatureFlags(const vk::ImageUsageFlags usag
|
||||
return feature_flags;
|
||||
}
|
||||
|
||||
UniqueImage::UniqueImage() {}
|
||||
|
||||
UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_)
|
||||
: device{device_}, allocator{allocator_} {}
|
||||
|
||||
UniqueImage::~UniqueImage() {
|
||||
if (image) {
|
||||
vmaDestroyImage(allocator, image, allocation);
|
||||
@@ -87,9 +83,8 @@ UniqueImage::~UniqueImage() {
|
||||
}
|
||||
|
||||
void UniqueImage::Create(const vk::ImageCreateInfo& image_ci) {
|
||||
if (image) {
|
||||
vmaDestroyImage(allocator, image, allocation);
|
||||
}
|
||||
this->image_ci = image_ci;
|
||||
ASSERT(!image);
|
||||
const VmaAllocationCreateInfo alloc_info = {
|
||||
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT,
|
||||
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
|
||||
@@ -109,9 +104,10 @@ void UniqueImage::Create(const vk::ImageCreateInfo& image_ci) {
|
||||
}
|
||||
|
||||
Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||
BlitHelper& blit_helper_, Common::SlotVector<ImageView>& slot_image_views_,
|
||||
const ImageInfo& info_)
|
||||
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
|
||||
image{instance->GetDevice(), instance->GetAllocator()} {
|
||||
: instance{&instance_}, scheduler{&scheduler_}, blit_helper{&blit_helper_},
|
||||
slot_image_views{&slot_image_views_}, info{info_} {
|
||||
if (info.pixel_format == vk::Format::eUndefined) {
|
||||
return;
|
||||
}
|
||||
@@ -130,20 +126,11 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||
|
||||
usage_flags = ImageUsageFlags(instance, info);
|
||||
format_features = FormatFeatureFlags(usage_flags);
|
||||
|
||||
switch (info.pixel_format) {
|
||||
case vk::Format::eD16Unorm:
|
||||
case vk::Format::eD32Sfloat:
|
||||
case vk::Format::eX8D24UnormPack32:
|
||||
if (info.props.is_depth) {
|
||||
aspect_mask = vk::ImageAspectFlagBits::eDepth;
|
||||
break;
|
||||
case vk::Format::eD16UnormS8Uint:
|
||||
case vk::Format::eD24UnormS8Uint:
|
||||
case vk::Format::eD32SfloatS8Uint:
|
||||
aspect_mask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
if (info.props.has_stencil) {
|
||||
aspect_mask |= vk::ImageAspectFlagBits::eStencil;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr auto tiling = vk::ImageTiling::eOptimal;
|
||||
@@ -162,10 +149,9 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||
vk::to_string(supported_format), vk::to_string(format_info.type),
|
||||
vk::to_string(format_info.flags), vk::to_string(format_info.usage));
|
||||
}
|
||||
const auto supported_samples =
|
||||
image_format_properties.result == vk::Result::eSuccess
|
||||
? image_format_properties.value.imageFormatProperties.sampleCounts
|
||||
: vk::SampleCountFlagBits::e1;
|
||||
supported_samples = image_format_properties.result == vk::Result::eSuccess
|
||||
? image_format_properties.value.imageFormatProperties.sampleCounts
|
||||
: vk::SampleCountFlagBits::e1;
|
||||
|
||||
const vk::ImageCreateInfo image_ci = {
|
||||
.flags = flags,
|
||||
@@ -184,22 +170,48 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||
.initialLayout = vk::ImageLayout::eUndefined,
|
||||
};
|
||||
|
||||
image.Create(image_ci);
|
||||
backing = &backing_images.emplace_back();
|
||||
backing->num_samples = info.num_samples;
|
||||
backing->image = UniqueImage{instance->GetDevice(), instance->GetAllocator()};
|
||||
backing->image.Create(image_ci);
|
||||
|
||||
Vulkan::SetObjectName(instance->GetDevice(), (vk::Image)image, "Image {}x{}x{} {} {:#x}:{:#x}",
|
||||
info.size.width, info.size.height, info.size.depth,
|
||||
AmdGpu::NameOf(info.tile_mode), info.guest_address, info.guest_size);
|
||||
Vulkan::SetObjectName(instance->GetDevice(), GetImage(),
|
||||
"Image {}x{}x{} {} {} {:#x}:{:#x} L:{} M:{} S:{}", info.size.width,
|
||||
info.size.height, info.size.depth, AmdGpu::NameOf(info.tile_mode),
|
||||
vk::to_string(info.pixel_format), info.guest_address, info.guest_size,
|
||||
info.resources.layers, info.resources.levels, info.num_samples);
|
||||
}
|
||||
|
||||
boost::container::small_vector<vk::ImageMemoryBarrier2, 32> Image::GetBarriers(
|
||||
vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
|
||||
vk::PipelineStageFlags2 dst_stage, std::optional<SubresourceRange> subres_range) {
|
||||
Image::~Image() = default;
|
||||
|
||||
ImageView& Image::FindView(const ImageViewInfo& view_info, bool ensure_guest_samples) {
|
||||
if (ensure_guest_samples && backing->num_samples > 1 != info.num_samples > 1) {
|
||||
SetBackingSamples(info.num_samples);
|
||||
}
|
||||
const auto& view_infos = backing->image_view_infos;
|
||||
const auto it = std::ranges::find(view_infos, view_info);
|
||||
if (it != view_infos.end()) {
|
||||
const auto view_id = backing->image_view_ids[std::distance(view_infos.begin(), it)];
|
||||
return (*slot_image_views)[view_id];
|
||||
}
|
||||
const auto view_id = slot_image_views->insert(*instance, view_info, *this);
|
||||
backing->image_view_infos.emplace_back(view_info);
|
||||
backing->image_view_ids.emplace_back(view_id);
|
||||
return (*slot_image_views)[view_id];
|
||||
}
|
||||
|
||||
Image::Barriers Image::GetBarriers(vk::ImageLayout dst_layout, vk::AccessFlags2 dst_mask,
|
||||
vk::PipelineStageFlags2 dst_stage,
|
||||
std::optional<SubresourceRange> subres_range) {
|
||||
auto& last_state = backing->state;
|
||||
auto& subresource_states = backing->subresource_states;
|
||||
|
||||
const bool needs_partial_transition =
|
||||
subres_range &&
|
||||
(subres_range->base != SubresourceBase{} || subres_range->extent != info.resources);
|
||||
const bool partially_transited = !subresource_states.empty();
|
||||
|
||||
boost::container::small_vector<vk::ImageMemoryBarrier2, 32> barriers{};
|
||||
Barriers barriers;
|
||||
if (needs_partial_transition || partially_transited) {
|
||||
if (!partially_transited) {
|
||||
subresource_states.resize(info.resources.levels * info.resources.layers);
|
||||
@@ -238,7 +250,7 @@ boost::container::small_vector<vk::ImageMemoryBarrier2, 32> Image::GetBarriers(
|
||||
.newLayout = dst_layout,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
.image = GetImage(),
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = mip,
|
||||
@@ -271,7 +283,7 @@ boost::container::small_vector<vk::ImageMemoryBarrier2, 32> Image::GetBarriers(
|
||||
.newLayout = dst_layout,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
.image = GetImage(),
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
@@ -289,7 +301,7 @@ boost::container::small_vector<vk::ImageMemoryBarrier2, 32> Image::GetBarriers(
|
||||
return barriers;
|
||||
}
|
||||
|
||||
void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
|
||||
void Image::Transit(vk::ImageLayout dst_layout, vk::AccessFlags2 dst_mask,
|
||||
std::optional<SubresourceRange> range, vk::CommandBuffer cmdbuf /*= {}*/) {
|
||||
// Adjust pipieline stage
|
||||
const vk::PipelineStageFlags2 dst_pl_stage =
|
||||
@@ -314,33 +326,91 @@ void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> d
|
||||
});
|
||||
}
|
||||
|
||||
void Image::Upload(vk::Buffer buffer, u64 offset) {
|
||||
void Image::Upload(std::span<const vk::BufferImageCopy> upload_copies, vk::Buffer buffer,
|
||||
u64 offset) {
|
||||
SetBackingSamples(info.num_samples, false);
|
||||
scheduler->EndRendering();
|
||||
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
|
||||
|
||||
// Copy to the image.
|
||||
const auto aspect = aspect_mask & vk::ImageAspectFlagBits::eStencil
|
||||
? vk::ImageAspectFlagBits::eDepth
|
||||
: aspect_mask;
|
||||
const vk::BufferImageCopy image_copy = {
|
||||
.bufferOffset = offset,
|
||||
.bufferRowLength = info.pitch,
|
||||
.bufferImageHeight = info.size.height,
|
||||
.imageSubresource{
|
||||
.aspectMask = aspect,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
.imageOffset = {0, 0, 0},
|
||||
.imageExtent = {info.size.width, info.size.height, 1},
|
||||
const vk::BufferMemoryBarrier2 pre_barrier{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferRead,
|
||||
.buffer = buffer,
|
||||
.offset = offset,
|
||||
.size = info.guest_size,
|
||||
};
|
||||
|
||||
const vk::BufferMemoryBarrier2 post_barrier{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
||||
.buffer = buffer,
|
||||
.offset = offset,
|
||||
.size = info.guest_size,
|
||||
};
|
||||
const auto image_barriers =
|
||||
GetBarriers(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite,
|
||||
vk::PipelineStageFlagBits2::eCopy, {});
|
||||
const auto cmdbuf = scheduler->CommandBuffer();
|
||||
cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, image_copy);
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &pre_barrier,
|
||||
.imageMemoryBarrierCount = static_cast<u32>(image_barriers.size()),
|
||||
.pImageMemoryBarriers = image_barriers.data(),
|
||||
});
|
||||
cmdbuf.copyBufferToImage(buffer, GetImage(), vk::ImageLayout::eTransferDstOptimal,
|
||||
upload_copies);
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &post_barrier,
|
||||
});
|
||||
flags &= ~ImageFlagBits::Dirty;
|
||||
}
|
||||
|
||||
Transit(vk::ImageLayout::eGeneral,
|
||||
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {});
|
||||
void Image::Download(std::span<const vk::BufferImageCopy> download_copies, vk::Buffer buffer,
|
||||
u64 offset, u64 download_size) {
|
||||
SetBackingSamples(info.num_samples);
|
||||
scheduler->EndRendering();
|
||||
|
||||
const vk::BufferMemoryBarrier2 pre_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eCopy,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.buffer = buffer,
|
||||
.offset = offset,
|
||||
.size = download_size,
|
||||
};
|
||||
const vk::BufferMemoryBarrier2 post_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eCopy,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.buffer = buffer,
|
||||
.offset = offset,
|
||||
.size = download_size,
|
||||
};
|
||||
const auto image_barriers =
|
||||
GetBarriers(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead,
|
||||
vk::PipelineStageFlagBits2::eCopy, {});
|
||||
auto cmdbuf = scheduler->CommandBuffer();
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &pre_barrier,
|
||||
.imageMemoryBarrierCount = static_cast<u32>(image_barriers.size()),
|
||||
.pImageMemoryBarriers = image_barriers.data(),
|
||||
});
|
||||
cmdbuf.copyImageToBuffer(GetImage(), vk::ImageLayout::eTransferSrcOptimal, buffer,
|
||||
download_copies);
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &post_barrier,
|
||||
});
|
||||
}
|
||||
|
||||
void Image::CopyImage(Image& src_image) {
|
||||
@@ -353,6 +423,9 @@ void Image::CopyImage(Image& src_image) {
|
||||
const u32 depth =
|
||||
info.type == AmdGpu::ImageType::Color3D ? info.size.depth : src_info.size.depth;
|
||||
|
||||
SetBackingSamples(info.num_samples, false);
|
||||
src_image.SetBackingSamples(src_info.num_samples);
|
||||
|
||||
boost::container::small_vector<vk::ImageCopy, 8> image_copies;
|
||||
for (u32 mip = 0; mip < num_mips; ++mip) {
|
||||
const auto mip_w = std::max(width >> mip, 1u);
|
||||
@@ -381,8 +454,8 @@ void Image::CopyImage(Image& src_image) {
|
||||
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
|
||||
|
||||
auto cmdbuf = scheduler->CommandBuffer();
|
||||
cmdbuf.copyImage(src_image.image, src_image.last_state.layout, image, last_state.layout,
|
||||
image_copies);
|
||||
cmdbuf.copyImage(src_image.GetImage(), src_image.backing->state.layout, GetImage(),
|
||||
backing->state.layout, image_copies);
|
||||
|
||||
Transit(vk::ImageLayout::eGeneral,
|
||||
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {});
|
||||
@@ -393,6 +466,9 @@ void Image::CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset)
|
||||
const u32 num_mips = std::min(src_info.resources.levels, info.resources.levels);
|
||||
ASSERT(src_info.resources.layers == info.resources.layers || num_mips == 1);
|
||||
|
||||
SetBackingSamples(info.num_samples, false);
|
||||
src_image.SetBackingSamples(src_info.num_samples);
|
||||
|
||||
boost::container::small_vector<vk::BufferImageCopy, 8> buffer_copies;
|
||||
for (u32 mip = 0; mip < num_mips; ++mip) {
|
||||
const auto mip_w = std::max(src_info.size.width >> mip, 1u);
|
||||
@@ -445,7 +521,7 @@ void Image::CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset)
|
||||
.pBufferMemoryBarriers = &pre_copy_barrier,
|
||||
});
|
||||
|
||||
cmdbuf.copyImageToBuffer(src_image.image, vk::ImageLayout::eTransferSrcOptimal, buffer,
|
||||
cmdbuf.copyImageToBuffer(src_image.GetImage(), vk::ImageLayout::eTransferSrcOptimal, buffer,
|
||||
buffer_copies);
|
||||
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
@@ -458,15 +534,11 @@ void Image::CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset)
|
||||
copy.imageSubresource.aspectMask = aspect_mask & ~vk::ImageAspectFlagBits::eStencil;
|
||||
}
|
||||
|
||||
cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, buffer_copies);
|
||||
cmdbuf.copyBufferToImage(buffer, GetImage(), vk::ImageLayout::eTransferDstOptimal,
|
||||
buffer_copies);
|
||||
}
|
||||
|
||||
void Image::CopyMip(const Image& src_image, u32 mip, u32 slice) {
|
||||
scheduler->EndRendering();
|
||||
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
|
||||
|
||||
auto cmdbuf = scheduler->CommandBuffer();
|
||||
|
||||
void Image::CopyMip(Image& src_image, u32 mip, u32 slice) {
|
||||
const auto mip_w = std::max(info.size.width >> mip, 1u);
|
||||
const auto mip_h = std::max(info.size.height >> mip, 1u);
|
||||
const auto mip_d = std::max(info.size.depth >> mip, 1u);
|
||||
@@ -491,13 +563,166 @@ void Image::CopyMip(const Image& src_image, u32 mip, u32 slice) {
|
||||
},
|
||||
.extent = {mip_w, mip_h, mip_d},
|
||||
};
|
||||
cmdbuf.copyImage(src_image.image, src_image.last_state.layout, image, last_state.layout,
|
||||
image_copy);
|
||||
|
||||
Transit(vk::ImageLayout::eGeneral,
|
||||
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {});
|
||||
SetBackingSamples(info.num_samples);
|
||||
src_image.SetBackingSamples(src_info.num_samples);
|
||||
|
||||
scheduler->EndRendering();
|
||||
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
|
||||
src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
|
||||
|
||||
const auto cmdbuf = scheduler->CommandBuffer();
|
||||
cmdbuf.copyImage(src_image.GetImage(), src_image.backing->state.layout, GetImage(),
|
||||
backing->state.layout, image_copy);
|
||||
}
|
||||
|
||||
Image::~Image() = default;
|
||||
void Image::Resolve(Image& src_image, const VideoCore::SubresourceRange& mrt0_range,
|
||||
const VideoCore::SubresourceRange& mrt1_range) {
|
||||
SetBackingSamples(1, false);
|
||||
scheduler->EndRendering();
|
||||
|
||||
src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead,
|
||||
mrt0_range);
|
||||
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, mrt1_range);
|
||||
|
||||
if (src_image.backing->num_samples == 1) {
|
||||
const vk::ImageCopy region = {
|
||||
.srcSubresource{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = mrt0_range.base.layer,
|
||||
.layerCount = mrt0_range.extent.layers,
|
||||
},
|
||||
.srcOffset = {0, 0, 0},
|
||||
.dstSubresource{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = mrt1_range.base.layer,
|
||||
.layerCount = mrt1_range.extent.layers,
|
||||
},
|
||||
.dstOffset = {0, 0, 0},
|
||||
.extent = {info.size.width, info.size.height, 1},
|
||||
};
|
||||
scheduler->CommandBuffer().copyImage(src_image.GetImage(),
|
||||
vk::ImageLayout::eTransferSrcOptimal, GetImage(),
|
||||
vk::ImageLayout::eTransferDstOptimal, region);
|
||||
} else {
|
||||
const vk::ImageResolve region = {
|
||||
.srcSubresource{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = mrt0_range.base.layer,
|
||||
.layerCount = mrt0_range.extent.layers,
|
||||
},
|
||||
.srcOffset = {0, 0, 0},
|
||||
.dstSubresource{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = mrt1_range.base.layer,
|
||||
.layerCount = mrt1_range.extent.layers,
|
||||
},
|
||||
.dstOffset = {0, 0, 0},
|
||||
.extent = {info.size.width, info.size.height, 1},
|
||||
};
|
||||
scheduler->CommandBuffer().resolveImage(src_image.GetImage(),
|
||||
vk::ImageLayout::eTransferSrcOptimal, GetImage(),
|
||||
vk::ImageLayout::eTransferDstOptimal, region);
|
||||
}
|
||||
|
||||
flags |= VideoCore::ImageFlagBits::GpuModified;
|
||||
flags &= ~VideoCore::ImageFlagBits::Dirty;
|
||||
}
|
||||
|
||||
void Image::Clear(const vk::ClearValue& clear_value, const VideoCore::SubresourceRange& range) {
|
||||
const vk::ImageSubresourceRange vk_range = {
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = range.base.level,
|
||||
.levelCount = range.extent.levels,
|
||||
.baseArrayLayer = range.base.layer,
|
||||
.layerCount = range.extent.layers,
|
||||
};
|
||||
scheduler->EndRendering();
|
||||
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
|
||||
const auto cmdbuf = scheduler->CommandBuffer();
|
||||
cmdbuf.clearColorImage(GetImage(), vk::ImageLayout::eTransferDstOptimal, clear_value.color,
|
||||
vk_range);
|
||||
}
|
||||
|
||||
void Image::SetBackingSamples(u32 num_samples, bool copy_backing) {
|
||||
if (!backing || backing->num_samples == num_samples) {
|
||||
return;
|
||||
}
|
||||
ASSERT_MSG(!info.props.is_depth, "Swapping samples is only valid for color images");
|
||||
BackingImage* new_backing;
|
||||
auto it = std::ranges::find(backing_images, num_samples, &BackingImage::num_samples);
|
||||
if (it == backing_images.end()) {
|
||||
auto new_image_ci = backing->image.image_ci;
|
||||
new_image_ci.samples = LiverpoolToVK::NumSamples(num_samples, supported_samples);
|
||||
|
||||
new_backing = &backing_images.emplace_back();
|
||||
new_backing->num_samples = num_samples;
|
||||
new_backing->image = UniqueImage{instance->GetDevice(), instance->GetAllocator()};
|
||||
new_backing->image.Create(new_image_ci);
|
||||
|
||||
Vulkan::SetObjectName(instance->GetDevice(), new_backing->image.image,
|
||||
"Image {}x{}x{} {} {} {:#x}:{:#x} L:{} M:{} S:{} (backing)",
|
||||
info.size.width, info.size.height, info.size.depth,
|
||||
AmdGpu::NameOf(info.tile_mode), vk::to_string(info.pixel_format),
|
||||
info.guest_address, info.guest_size, info.resources.layers,
|
||||
info.resources.levels, num_samples);
|
||||
} else {
|
||||
new_backing = std::addressof(*it);
|
||||
}
|
||||
|
||||
if (copy_backing) {
|
||||
scheduler->EndRendering();
|
||||
ASSERT(info.resources.levels == 1 && info.resources.layers == 1);
|
||||
|
||||
// Transition current backing to shader read layout
|
||||
auto barriers =
|
||||
GetBarriers(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits2::eShaderRead,
|
||||
vk::PipelineStageFlagBits2::eFragmentShader, std::nullopt);
|
||||
|
||||
// Transition dest backing to color attachment layout, not caring of previous contents
|
||||
constexpr auto dst_stage = vk::PipelineStageFlagBits2::eColorAttachmentOutput;
|
||||
constexpr auto dst_access = vk::AccessFlagBits2::eColorAttachmentWrite;
|
||||
constexpr auto dst_layout = vk::ImageLayout::eColorAttachmentOptimal;
|
||||
barriers.push_back(vk::ImageMemoryBarrier2{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eNone,
|
||||
.dstStageMask = dst_stage,
|
||||
.dstAccessMask = dst_access,
|
||||
.oldLayout = vk::ImageLayout::eUndefined,
|
||||
.newLayout = dst_layout,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = new_backing->image,
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = info.resources.layers,
|
||||
},
|
||||
});
|
||||
const auto cmdbuf = scheduler->CommandBuffer();
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.imageMemoryBarrierCount = static_cast<u32>(barriers.size()),
|
||||
.pImageMemoryBarriers = barriers.data(),
|
||||
});
|
||||
|
||||
// Copy between ms and non ms backing images
|
||||
blit_helper->CopyBetweenMsImages(
|
||||
info.size.width, info.size.height, new_backing->num_samples, info.pixel_format,
|
||||
backing->num_samples > 1, backing->image, new_backing->image);
|
||||
|
||||
// Update current layout in tracker to new backings layout
|
||||
new_backing->state.layout = dst_layout;
|
||||
new_backing->state.access_mask = dst_access;
|
||||
new_backing->state.pl_stage = dst_stage;
|
||||
}
|
||||
|
||||
backing = new_backing;
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "video_core/texture_cache/image_info.h"
|
||||
#include "video_core/texture_cache/image_view.h"
|
||||
|
||||
#include <deque>
|
||||
#include <optional>
|
||||
|
||||
namespace Vulkan {
|
||||
@@ -34,8 +35,9 @@ enum ImageFlagBits : u32 {
|
||||
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
|
||||
|
||||
struct UniqueImage {
|
||||
explicit UniqueImage();
|
||||
explicit UniqueImage(vk::Device device, VmaAllocator allocator);
|
||||
explicit UniqueImage() = default;
|
||||
explicit UniqueImage(vk::Device device, VmaAllocator allocator)
|
||||
: device{device}, allocator{allocator} {}
|
||||
~UniqueImage();
|
||||
|
||||
UniqueImage(const UniqueImage&) = delete;
|
||||
@@ -44,11 +46,12 @@ struct UniqueImage {
|
||||
UniqueImage(UniqueImage&& other)
|
||||
: allocator{std::exchange(other.allocator, VK_NULL_HANDLE)},
|
||||
allocation{std::exchange(other.allocation, VK_NULL_HANDLE)},
|
||||
image{std::exchange(other.image, VK_NULL_HANDLE)} {}
|
||||
image{std::exchange(other.image, VK_NULL_HANDLE)}, image_ci{std::move(other.image_ci)} {}
|
||||
UniqueImage& operator=(UniqueImage&& other) {
|
||||
image = std::exchange(other.image, VK_NULL_HANDLE);
|
||||
allocator = std::exchange(other.allocator, VK_NULL_HANDLE);
|
||||
allocation = std::exchange(other.allocation, VK_NULL_HANDLE);
|
||||
image_ci = std::move(other.image_ci);
|
||||
return *this;
|
||||
}
|
||||
|
||||
@@ -58,17 +61,25 @@ struct UniqueImage {
|
||||
return image;
|
||||
}
|
||||
|
||||
private:
|
||||
vk::Device device;
|
||||
VmaAllocator allocator;
|
||||
VmaAllocation allocation;
|
||||
operator bool() const {
|
||||
return image;
|
||||
}
|
||||
|
||||
public:
|
||||
vk::Device device{};
|
||||
VmaAllocator allocator{};
|
||||
VmaAllocation allocation{};
|
||||
vk::Image image{};
|
||||
vk::ImageCreateInfo image_ci{};
|
||||
};
|
||||
|
||||
constexpr Common::SlotId NULL_IMAGE_ID{0};
|
||||
|
||||
class BlitHelper;
|
||||
|
||||
struct Image {
|
||||
Image(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, const ImageInfo& info);
|
||||
Image(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, BlitHelper& blit_helper,
|
||||
Common::SlotVector<ImageView>& slot_image_views, const ImageInfo& info);
|
||||
~Image();
|
||||
|
||||
Image(const Image&) = delete;
|
||||
@@ -77,94 +88,100 @@ struct Image {
|
||||
Image(Image&&) = default;
|
||||
Image& operator=(Image&&) = default;
|
||||
|
||||
[[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
|
||||
bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
|
||||
const VAddr overlap_end = overlap_cpu_addr + overlap_size;
|
||||
const auto image_addr = info.guest_address;
|
||||
const auto image_end = info.guest_address + info.guest_size;
|
||||
return image_addr < overlap_end && overlap_cpu_addr < image_end;
|
||||
}
|
||||
|
||||
ImageViewId FindView(const ImageViewInfo& info) const {
|
||||
const auto it = std::ranges::find(image_view_infos, info);
|
||||
if (it == image_view_infos.end()) {
|
||||
return {};
|
||||
}
|
||||
return image_view_ids[std::distance(image_view_infos.begin(), it)];
|
||||
vk::Image GetImage() const {
|
||||
return backing->image.image;
|
||||
}
|
||||
|
||||
void AssociateDepth(ImageId image_id) {
|
||||
depth_id = image_id;
|
||||
}
|
||||
|
||||
boost::container::small_vector<vk::ImageMemoryBarrier2, 32> GetBarriers(
|
||||
vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
|
||||
vk::PipelineStageFlags2 dst_stage, std::optional<SubresourceRange> subres_range);
|
||||
void Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
|
||||
std::optional<SubresourceRange> range, vk::CommandBuffer cmdbuf = {});
|
||||
void Upload(vk::Buffer buffer, u64 offset);
|
||||
|
||||
void CopyImage(Image& src_image);
|
||||
void CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset);
|
||||
void CopyMip(const Image& src_image, u32 mip, u32 slice);
|
||||
|
||||
bool IsTracked() {
|
||||
return track_addr != 0 && track_addr_end != 0;
|
||||
}
|
||||
|
||||
bool SafeToDownload() const {
|
||||
return True(flags & ImageFlagBits::GpuModified) &&
|
||||
False(flags & (ImageFlagBits::GpuDirty | ImageFlagBits::CpuDirty));
|
||||
return True(flags & ImageFlagBits::GpuModified) && False(flags & (ImageFlagBits::Dirty));
|
||||
}
|
||||
|
||||
void AssociateDepth(ImageId image_id) {
|
||||
depth_id = image_id;
|
||||
}
|
||||
|
||||
ImageView& FindView(const ImageViewInfo& view_info, bool ensure_guest_samples = true);
|
||||
|
||||
using Barriers = boost::container::small_vector<vk::ImageMemoryBarrier2, 32>;
|
||||
Barriers GetBarriers(vk::ImageLayout dst_layout, vk::AccessFlags2 dst_mask,
|
||||
vk::PipelineStageFlags2 dst_stage,
|
||||
std::optional<SubresourceRange> subres_range);
|
||||
void Transit(vk::ImageLayout dst_layout, vk::AccessFlags2 dst_mask,
|
||||
std::optional<SubresourceRange> range, vk::CommandBuffer cmdbuf = {});
|
||||
void Upload(std::span<const vk::BufferImageCopy> upload_copies, vk::Buffer buffer, u64 offset);
|
||||
void Download(std::span<const vk::BufferImageCopy> download_copies, vk::Buffer buffer,
|
||||
u64 offset, u64 download_size);
|
||||
|
||||
void CopyImage(Image& src_image);
|
||||
void CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset);
|
||||
void CopyMip(Image& src_image, u32 mip, u32 slice);
|
||||
|
||||
void Resolve(Image& src_image, const VideoCore::SubresourceRange& mrt0_range,
|
||||
const VideoCore::SubresourceRange& mrt1_range);
|
||||
void Clear(const vk::ClearValue& clear_value, const VideoCore::SubresourceRange& range);
|
||||
|
||||
void SetBackingSamples(u32 num_samples, bool copy_backing = true);
|
||||
|
||||
public:
|
||||
const Vulkan::Instance* instance;
|
||||
Vulkan::Scheduler* scheduler;
|
||||
BlitHelper* blit_helper;
|
||||
Common::SlotVector<ImageView>* slot_image_views;
|
||||
ImageInfo info;
|
||||
UniqueImage image;
|
||||
vk::ImageAspectFlags aspect_mask = vk::ImageAspectFlagBits::eColor;
|
||||
vk::SampleCountFlags supported_samples = vk::SampleCountFlagBits::e1;
|
||||
ImageFlagBits flags = ImageFlagBits::Dirty;
|
||||
VAddr track_addr = 0;
|
||||
VAddr track_addr_end = 0;
|
||||
std::vector<ImageViewInfo> image_view_infos;
|
||||
std::vector<ImageViewId> image_view_ids;
|
||||
ImageId depth_id{};
|
||||
u64 lru_id{};
|
||||
|
||||
// Resource state tracking
|
||||
vk::ImageUsageFlags usage_flags;
|
||||
vk::FormatFeatureFlags2 format_features;
|
||||
struct State {
|
||||
vk::PipelineStageFlags2 pl_stage = vk::PipelineStageFlagBits2::eAllCommands;
|
||||
vk::AccessFlags2 access_mask = vk::AccessFlagBits2::eNone;
|
||||
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
|
||||
};
|
||||
struct BackingImage {
|
||||
UniqueImage image;
|
||||
State state;
|
||||
std::vector<State> subresource_states;
|
||||
boost::container::small_vector<ImageViewInfo, 4> image_view_infos;
|
||||
boost::container::small_vector<ImageViewId, 4> image_view_ids;
|
||||
u32 num_samples;
|
||||
};
|
||||
std::deque<BackingImage> backing_images;
|
||||
BackingImage* backing{};
|
||||
boost::container::static_vector<u64, 16> mip_hashes{};
|
||||
u64 lru_id{};
|
||||
u64 tick_accessed_last{};
|
||||
u64 hash{};
|
||||
|
||||
struct {
|
||||
u32 texture : 1;
|
||||
u32 storage : 1;
|
||||
u32 render_target : 1;
|
||||
u32 depth_target : 1;
|
||||
u32 stencil : 1;
|
||||
u32 vo_surface : 1;
|
||||
} usage{};
|
||||
vk::ImageUsageFlags usage_flags;
|
||||
vk::FormatFeatureFlags2 format_features;
|
||||
struct State {
|
||||
vk::Flags<vk::PipelineStageFlagBits2> pl_stage = vk::PipelineStageFlagBits2::eAllCommands;
|
||||
vk::Flags<vk::AccessFlagBits2> access_mask = vk::AccessFlagBits2::eNone;
|
||||
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
|
||||
};
|
||||
State last_state{};
|
||||
std::vector<State> subresource_states{};
|
||||
boost::container::small_vector<u64, 14> mip_hashes{};
|
||||
u64 tick_accessed_last{0};
|
||||
u64 hash{0};
|
||||
|
||||
struct {
|
||||
union {
|
||||
struct {
|
||||
u32 is_bound : 1; // the image is bound to a descriptor set
|
||||
u32 is_target : 1; // the image is bound as color/depth target
|
||||
u32 needs_rebind : 1; // the image needs to be rebound
|
||||
u32 force_general : 1; // the image needs to be used in general layout
|
||||
};
|
||||
u32 raw{};
|
||||
};
|
||||
|
||||
void Reset() {
|
||||
raw = 0u;
|
||||
}
|
||||
u32 is_bound : 1;
|
||||
u32 is_target : 1;
|
||||
u32 needs_rebind : 1;
|
||||
u32 force_general : 1;
|
||||
} binding{};
|
||||
};
|
||||
|
||||
|
||||
@@ -90,9 +90,9 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer,
|
||||
type = range.extent.layers > 1 ? AmdGpu::ImageType::Color2DArray : AmdGpu::ImageType::Color2D;
|
||||
}
|
||||
|
||||
ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image,
|
||||
ImageId image_id_)
|
||||
: image_id{image_id_}, info{info_} {
|
||||
ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_,
|
||||
const Image& image)
|
||||
: info{info_} {
|
||||
vk::ImageViewUsageCreateInfo usage_ci{.usage = image.usage_flags};
|
||||
if (!info.is_storage) {
|
||||
usage_ci.usage &= ~vk::ImageUsageFlagBits::eStorage;
|
||||
@@ -113,7 +113,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
|
||||
|
||||
const vk::ImageViewCreateInfo image_view_ci = {
|
||||
.pNext = &usage_ci,
|
||||
.image = image.image,
|
||||
.image = image.GetImage(),
|
||||
.viewType = ConvertImageViewType(info.type),
|
||||
.format = instance.GetSupportedFormat(format, image.format_features),
|
||||
.components = info.mapping,
|
||||
|
||||
@@ -35,8 +35,7 @@ struct ImageViewInfo {
|
||||
struct Image;
|
||||
|
||||
struct ImageView {
|
||||
ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, Image& image,
|
||||
ImageId image_id);
|
||||
ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, const Image& image);
|
||||
~ImageView();
|
||||
|
||||
ImageView(const ImageView&) = delete;
|
||||
@@ -45,7 +44,6 @@ struct ImageView {
|
||||
ImageView(ImageView&&) = default;
|
||||
ImageView& operator=(ImageView&&) = default;
|
||||
|
||||
ImageId image_id;
|
||||
ImageViewInfo info;
|
||||
vk::UniqueImageView image_view;
|
||||
};
|
||||
|
||||
@@ -73,16 +73,15 @@ ImageId TextureCache::GetNullImage(const vk::Format format) {
|
||||
info.num_bits = 32;
|
||||
info.UpdateSize();
|
||||
|
||||
const ImageId null_id = slot_images.insert(instance, scheduler, info);
|
||||
auto& img = slot_images[null_id];
|
||||
|
||||
const vk::Image& null_image = img.image;
|
||||
Vulkan::SetObjectName(instance.GetDevice(), null_image,
|
||||
const ImageId null_id =
|
||||
slot_images.insert(instance, scheduler, blit_helper, slot_image_views, info);
|
||||
auto& image = slot_images[null_id];
|
||||
Vulkan::SetObjectName(instance.GetDevice(), image.GetImage(),
|
||||
fmt::format("Null Image ({})", vk::to_string(format)));
|
||||
|
||||
img.flags = ImageFlagBits::Empty;
|
||||
img.track_addr = img.info.guest_address;
|
||||
img.track_addr_end = img.info.guest_address + img.info.guest_size;
|
||||
image.flags = ImageFlagBits::Empty;
|
||||
image.track_addr = image.info.guest_address;
|
||||
image.track_addr_end = image.info.guest_address + image.info.guest_size;
|
||||
|
||||
null_images.emplace(format, null_id);
|
||||
return null_id;
|
||||
@@ -124,7 +123,7 @@ void TextureCache::DownloadImageMemory(ImageId image_id) {
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
|
||||
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal,
|
||||
cmdbuf.copyImageToBuffer(image.GetImage(), vk::ImageLayout::eTransferSrcOptimal,
|
||||
download_buffer.Handle(), image_download);
|
||||
|
||||
{
|
||||
@@ -269,7 +268,8 @@ ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, Bindi
|
||||
if (recreate) {
|
||||
auto new_info = requested_info;
|
||||
new_info.resources = std::max(requested_info.resources, cache_image.info.resources);
|
||||
const auto new_image_id = slot_images.insert(instance, scheduler, new_info);
|
||||
const auto new_image_id =
|
||||
slot_images.insert(instance, scheduler, blit_helper, slot_image_views, new_info);
|
||||
RegisterImage(new_image_id);
|
||||
|
||||
// Inherit image usage
|
||||
@@ -290,7 +290,14 @@ ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, Bindi
|
||||
} else if (cache_image.info.num_samples == 1 && new_info.props.is_depth &&
|
||||
new_info.num_samples > 1) {
|
||||
// Perform a rendering pass to transfer the channels of source as samples in dest.
|
||||
blit_helper.BlitColorToMsDepth(cache_image, new_image);
|
||||
cache_image.Transit(vk::ImageLayout::eShaderReadOnlyOptimal,
|
||||
vk::AccessFlagBits2::eShaderRead, {});
|
||||
new_image.Transit(vk::ImageLayout::eDepthAttachmentOptimal,
|
||||
vk::AccessFlagBits2::eDepthStencilAttachmentWrite, {});
|
||||
blit_helper.ReinterpretColorAsMsDepth(
|
||||
new_info.size.width, new_info.size.height, new_info.num_samples,
|
||||
cache_image.info.pixel_format, new_info.pixel_format, cache_image.GetImage(),
|
||||
new_image.GetImage());
|
||||
} else {
|
||||
LOG_WARNING(Render_Vulkan, "Unimplemented depth overlap copy");
|
||||
}
|
||||
@@ -308,15 +315,16 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
|
||||
BindingType binding,
|
||||
ImageId cache_image_id,
|
||||
ImageId merged_image_id) {
|
||||
auto& tex_cache_image = slot_images[cache_image_id];
|
||||
// We can assume it is safe to delete the image if it wasn't accessed in some number of frames.
|
||||
auto& cache_image = slot_images[cache_image_id];
|
||||
const bool safe_to_delete =
|
||||
scheduler.CurrentTick() - tex_cache_image.tick_accessed_last > NumFramesBeforeRemoval;
|
||||
scheduler.CurrentTick() - cache_image.tick_accessed_last > NumFramesBeforeRemoval;
|
||||
|
||||
if (image_info.guest_address == tex_cache_image.info.guest_address) { // Equal address
|
||||
if (image_info.BlockDim() != tex_cache_image.info.BlockDim() ||
|
||||
image_info.num_bits * image_info.num_samples !=
|
||||
tex_cache_image.info.num_bits * tex_cache_image.info.num_samples) {
|
||||
// Equal address
|
||||
if (image_info.guest_address == cache_image.info.guest_address) {
|
||||
const u32 lhs_block_size = image_info.num_bits * image_info.num_samples;
|
||||
const u32 rhs_block_size = cache_image.info.num_bits * cache_image.info.num_samples;
|
||||
if (image_info.BlockDim() != cache_image.info.BlockDim() ||
|
||||
lhs_block_size != rhs_block_size) {
|
||||
// Very likely this kind of overlap is caused by allocation from a pool.
|
||||
if (safe_to_delete) {
|
||||
FreeImage(cache_image_id);
|
||||
@@ -329,19 +337,19 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
|
||||
}
|
||||
|
||||
// Compressed view of uncompressed image with same block size.
|
||||
if (image_info.props.is_block && !tex_cache_image.info.props.is_block) {
|
||||
if (image_info.props.is_block && !cache_image.info.props.is_block) {
|
||||
return {ExpandImage(image_info, cache_image_id), -1, -1};
|
||||
}
|
||||
|
||||
if (image_info.guest_size == tex_cache_image.info.guest_size &&
|
||||
if (image_info.guest_size == cache_image.info.guest_size &&
|
||||
(image_info.type == AmdGpu::ImageType::Color3D ||
|
||||
tex_cache_image.info.type == AmdGpu::ImageType::Color3D)) {
|
||||
cache_image.info.type == AmdGpu::ImageType::Color3D)) {
|
||||
return {ExpandImage(image_info, cache_image_id), -1, -1};
|
||||
}
|
||||
|
||||
// Size and resources are less than or equal, use image view.
|
||||
if (image_info.pixel_format != tex_cache_image.info.pixel_format ||
|
||||
image_info.guest_size <= tex_cache_image.info.guest_size) {
|
||||
if (image_info.pixel_format != cache_image.info.pixel_format ||
|
||||
image_info.guest_size <= cache_image.info.guest_size) {
|
||||
auto result_id = merged_image_id ? merged_image_id : cache_image_id;
|
||||
const auto& result_image = slot_images[result_id];
|
||||
const bool is_compatible =
|
||||
@@ -350,14 +358,14 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
|
||||
}
|
||||
|
||||
// Size and resources are greater, expand the image.
|
||||
if (image_info.type == tex_cache_image.info.type &&
|
||||
image_info.resources > tex_cache_image.info.resources) {
|
||||
if (image_info.type == cache_image.info.type &&
|
||||
image_info.resources > cache_image.info.resources) {
|
||||
return {ExpandImage(image_info, cache_image_id), -1, -1};
|
||||
}
|
||||
|
||||
// Size is greater but resources are not, because the tiling mode is different.
|
||||
// Likely the address is reused for a image with a different tiling mode.
|
||||
if (image_info.tile_mode != tex_cache_image.info.tile_mode) {
|
||||
if (image_info.tile_mode != cache_image.info.tile_mode) {
|
||||
if (safe_to_delete) {
|
||||
FreeImage(cache_image_id);
|
||||
}
|
||||
@@ -368,9 +376,9 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
|
||||
}
|
||||
|
||||
// Right overlap, the image requested is a possible subresource of the image from cache.
|
||||
if (image_info.guest_address > tex_cache_image.info.guest_address) {
|
||||
if (auto mip = image_info.MipOf(tex_cache_image.info); mip >= 0) {
|
||||
if (auto slice = image_info.SliceOf(tex_cache_image.info, mip); slice >= 0) {
|
||||
if (image_info.guest_address > cache_image.info.guest_address) {
|
||||
if (auto mip = image_info.MipOf(cache_image.info); mip >= 0) {
|
||||
if (auto slice = image_info.SliceOf(cache_image.info, mip); slice >= 0) {
|
||||
return {cache_image_id, mip, slice};
|
||||
}
|
||||
}
|
||||
@@ -383,12 +391,12 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
|
||||
return {{}, -1, -1};
|
||||
} else {
|
||||
// Left overlap, the image from cache is a possible subresource of the image requested
|
||||
if (auto mip = tex_cache_image.info.MipOf(image_info); mip >= 0) {
|
||||
if (auto slice = tex_cache_image.info.SliceOf(image_info, mip); slice >= 0) {
|
||||
if (auto mip = cache_image.info.MipOf(image_info); mip >= 0) {
|
||||
if (auto slice = cache_image.info.SliceOf(image_info, mip); slice >= 0) {
|
||||
// We have a larger image created and a separate one, representing a subres of it
|
||||
// bound as render target. In this case we need to rebind render target.
|
||||
if (tex_cache_image.binding.is_target) {
|
||||
tex_cache_image.binding.needs_rebind = 1u;
|
||||
if (cache_image.binding.is_target) {
|
||||
cache_image.binding.needs_rebind = 1u;
|
||||
if (merged_image_id) {
|
||||
GetImage(merged_image_id).binding.is_target = 1u;
|
||||
}
|
||||
@@ -399,15 +407,8 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
|
||||
|
||||
// We need to have a larger, already allocated image to copy this one into
|
||||
if (merged_image_id) {
|
||||
tex_cache_image.Transit(vk::ImageLayout::eTransferSrcOptimal,
|
||||
vk::AccessFlagBits2::eTransferRead, {});
|
||||
|
||||
const auto num_mips_to_copy = tex_cache_image.info.resources.levels;
|
||||
ASSERT(num_mips_to_copy == 1);
|
||||
|
||||
auto& merged_image = slot_images[merged_image_id];
|
||||
merged_image.CopyMip(tex_cache_image, mip, slice);
|
||||
|
||||
merged_image.CopyMip(cache_image, mip, slice);
|
||||
FreeImage(cache_image_id);
|
||||
}
|
||||
}
|
||||
@@ -418,7 +419,8 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
|
||||
}
|
||||
|
||||
ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) {
|
||||
const auto new_image_id = slot_images.insert(instance, scheduler, info);
|
||||
const auto new_image_id =
|
||||
slot_images.insert(instance, scheduler, blit_helper, slot_image_views, info);
|
||||
RegisterImage(new_image_id);
|
||||
|
||||
auto& src_image = slot_images[image_id];
|
||||
@@ -507,7 +509,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, bool exact_fmt) {
|
||||
}
|
||||
// Create and register a new image
|
||||
if (!image_id) {
|
||||
image_id = slot_images.insert(instance, scheduler, info);
|
||||
image_id = slot_images.insert(instance, scheduler, blit_helper, slot_image_views, info);
|
||||
RegisterImage(image_id);
|
||||
}
|
||||
|
||||
@@ -557,18 +559,6 @@ ImageId TextureCache::FindImageFromRange(VAddr address, size_t size, bool ensure
|
||||
return {};
|
||||
}
|
||||
|
||||
ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo& view_info) {
|
||||
Image& image = slot_images[image_id];
|
||||
if (const ImageViewId view_id = image.FindView(view_info); view_id) {
|
||||
return slot_image_views[view_id];
|
||||
}
|
||||
|
||||
const ImageViewId view_id = slot_image_views.insert(instance, view_info, image, image_id);
|
||||
image.image_view_infos.emplace_back(view_info);
|
||||
image.image_view_ids.emplace_back(view_id);
|
||||
return slot_image_views[view_id];
|
||||
}
|
||||
|
||||
ImageView& TextureCache::FindTexture(ImageId image_id, const BaseDesc& desc) {
|
||||
Image& image = slot_images[image_id];
|
||||
if (desc.type == BindingType::Storage) {
|
||||
@@ -579,11 +569,10 @@ ImageView& TextureCache::FindTexture(ImageId image_id, const BaseDesc& desc) {
|
||||
}
|
||||
}
|
||||
UpdateImage(image_id);
|
||||
return RegisterImageView(image_id, desc.view_info);
|
||||
return image.FindView(desc.view_info);
|
||||
}
|
||||
|
||||
ImageView& TextureCache::FindRenderTarget(BaseDesc& desc) {
|
||||
const ImageId image_id = FindImage(desc);
|
||||
ImageView& TextureCache::FindRenderTarget(ImageId image_id, const BaseDesc& desc) {
|
||||
Image& image = slot_images[image_id];
|
||||
image.flags |= ImageFlagBits::GpuModified;
|
||||
image.usage.render_target = 1u;
|
||||
@@ -602,15 +591,13 @@ ImageView& TextureCache::FindRenderTarget(BaseDesc& desc) {
|
||||
image.info.meta_info.fmask_addr = desc.info.meta_info.fmask_addr;
|
||||
}
|
||||
|
||||
return RegisterImageView(image_id, desc.view_info);
|
||||
return image.FindView(desc.view_info, false);
|
||||
}
|
||||
|
||||
ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) {
|
||||
const ImageId image_id = FindImage(desc);
|
||||
ImageView& TextureCache::FindDepthTarget(ImageId image_id, const BaseDesc& desc) {
|
||||
Image& image = slot_images[image_id];
|
||||
image.flags |= ImageFlagBits::GpuModified;
|
||||
image.usage.depth_target = 1u;
|
||||
image.usage.stencil = image.info.props.has_stencil;
|
||||
UpdateImage(image_id);
|
||||
|
||||
// Register meta data for this depth buffer
|
||||
@@ -635,7 +622,8 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) {
|
||||
info.guest_address = desc.info.stencil_addr;
|
||||
info.guest_size = desc.info.stencil_size;
|
||||
info.size = desc.info.size;
|
||||
stencil_id = slot_images.insert(instance, scheduler, info);
|
||||
stencil_id =
|
||||
slot_images.insert(instance, scheduler, blit_helper, slot_image_views, info);
|
||||
RegisterImage(stencil_id);
|
||||
}
|
||||
Image& image = slot_images[stencil_id];
|
||||
@@ -643,10 +631,10 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) {
|
||||
image.AssociateDepth(image_id);
|
||||
}
|
||||
|
||||
return RegisterImageView(image_id, desc.view_info);
|
||||
return image.FindView(desc.view_info, false);
|
||||
}
|
||||
|
||||
void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler /*= nullptr*/) {
|
||||
void TextureCache::RefreshImage(Image& image) {
|
||||
if (False(image.flags & ImageFlagBits::Dirty) || image.info.num_samples > 1) {
|
||||
return;
|
||||
}
|
||||
@@ -678,7 +666,7 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
|
||||
const bool is_gpu_modified = True(image.flags & ImageFlagBits::GpuModified);
|
||||
const bool is_gpu_dirty = True(image.flags & ImageFlagBits::GpuDirty);
|
||||
|
||||
boost::container::small_vector<vk::BufferImageCopy, 14> image_copy{};
|
||||
boost::container::small_vector<vk::BufferImageCopy, 14> image_copies;
|
||||
for (u32 m = 0; m < num_mips; m++) {
|
||||
const u32 width = std::max(image.info.size.width >> m, 1u);
|
||||
const u32 height = std::max(image.info.size.height >> m, 1u);
|
||||
@@ -698,7 +686,7 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
|
||||
|
||||
const u32 extent_width = mip_pitch ? std::min(mip_pitch, width) : width;
|
||||
const u32 extent_height = mip_height ? std::min(mip_height, height) : height;
|
||||
image_copy.push_back({
|
||||
image_copies.push_back({
|
||||
.bufferOffset = mip_offset,
|
||||
.bufferRowLength = mip_pitch,
|
||||
.bufferImageHeight = mip_height,
|
||||
@@ -713,21 +701,18 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
|
||||
});
|
||||
}
|
||||
|
||||
if (image_copy.empty()) {
|
||||
if (image_copies.empty()) {
|
||||
image.flags &= ~ImageFlagBits::Dirty;
|
||||
return;
|
||||
}
|
||||
|
||||
auto* sched_ptr = custom_scheduler ? custom_scheduler : &scheduler;
|
||||
sched_ptr->EndRendering();
|
||||
scheduler.EndRendering();
|
||||
|
||||
const VAddr image_addr = image.info.guest_address;
|
||||
const size_t image_size = image.info.guest_size;
|
||||
const auto [in_buffer, in_offset] = buffer_cache.ObtainBufferForImage(image_addr, image_size);
|
||||
const auto [in_buffer, in_offset] =
|
||||
buffer_cache.ObtainBufferForImage(image.info.guest_address, image.info.guest_size);
|
||||
if (auto barrier = in_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead,
|
||||
vk::PipelineStageFlagBits2::eTransfer)) {
|
||||
const auto cmdbuf = sched_ptr->CommandBuffer();
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
scheduler.CommandBuffer().pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &barrier.value(),
|
||||
@@ -735,48 +720,12 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
|
||||
}
|
||||
|
||||
const auto [buffer, offset] =
|
||||
!custom_scheduler ? tile_manager.DetileImage(in_buffer->Handle(), in_offset, image.info)
|
||||
: std::make_pair(in_buffer->Handle(), in_offset);
|
||||
for (auto& copy : image_copy) {
|
||||
tile_manager.DetileImage(in_buffer->Handle(), in_offset, image.info);
|
||||
for (auto& copy : image_copies) {
|
||||
copy.bufferOffset += offset;
|
||||
}
|
||||
|
||||
const vk::BufferMemoryBarrier2 pre_barrier{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferRead,
|
||||
.buffer = buffer,
|
||||
.offset = offset,
|
||||
.size = image_size,
|
||||
};
|
||||
const vk::BufferMemoryBarrier2 post_barrier{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
||||
.buffer = buffer,
|
||||
.offset = offset,
|
||||
.size = image_size,
|
||||
};
|
||||
const auto image_barriers =
|
||||
image.GetBarriers(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite,
|
||||
vk::PipelineStageFlagBits2::eTransfer, {});
|
||||
const auto cmdbuf = sched_ptr->CommandBuffer();
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &pre_barrier,
|
||||
.imageMemoryBarrierCount = static_cast<u32>(image_barriers.size()),
|
||||
.pImageMemoryBarriers = image_barriers.data(),
|
||||
});
|
||||
cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &post_barrier,
|
||||
});
|
||||
image.flags &= ~ImageFlagBits::Dirty;
|
||||
image.Upload(image_copies, buffer, offset);
|
||||
}
|
||||
|
||||
vk::Sampler TextureCache::GetSampler(
|
||||
@@ -1020,8 +969,10 @@ void TextureCache::DeleteImage(ImageId image_id) {
|
||||
// Reclaim image and any image views it references.
|
||||
scheduler.DeferOperation([this, image_id] {
|
||||
Image& image = slot_images[image_id];
|
||||
for (const ImageViewId image_view_id : image.image_view_ids) {
|
||||
slot_image_views.erase(image_view_id);
|
||||
for (auto& backing : image.backing_images) {
|
||||
for (const ImageViewId image_view_id : backing.image_view_ids) {
|
||||
slot_image_views.erase(image_view_id);
|
||||
}
|
||||
}
|
||||
slot_images.erase(image_id);
|
||||
});
|
||||
|
||||
@@ -67,12 +67,14 @@ public:
|
||||
};
|
||||
|
||||
struct RenderTargetDesc : public BaseDesc {
|
||||
RenderTargetDesc() = default;
|
||||
RenderTargetDesc(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
||||
const AmdGpu::Liverpool::CbDbExtent& hint = {})
|
||||
: BaseDesc{BindingType::RenderTarget, ImageInfo{buffer, hint}, ImageViewInfo{buffer}} {}
|
||||
};
|
||||
|
||||
struct DepthTargetDesc : public BaseDesc {
|
||||
DepthTargetDesc() = default;
|
||||
DepthTargetDesc(const AmdGpu::Liverpool::DepthBuffer& buffer,
|
||||
const AmdGpu::Liverpool::DepthView& view,
|
||||
const AmdGpu::Liverpool::DepthControl& ctl, VAddr htile_address,
|
||||
@@ -118,20 +120,21 @@ public:
|
||||
[[nodiscard]] ImageView& FindTexture(ImageId image_id, const BaseDesc& desc);
|
||||
|
||||
/// Retrieves the render target with specified properties
|
||||
[[nodiscard]] ImageView& FindRenderTarget(BaseDesc& desc);
|
||||
[[nodiscard]] ImageView& FindRenderTarget(ImageId image_id, const BaseDesc& desc);
|
||||
|
||||
/// Retrieves the depth target with specified properties
|
||||
[[nodiscard]] ImageView& FindDepthTarget(BaseDesc& desc);
|
||||
[[nodiscard]] ImageView& FindDepthTarget(ImageId image_id, const BaseDesc& desc);
|
||||
|
||||
/// Updates image contents if it was modified by CPU.
|
||||
void UpdateImage(ImageId image_id, Vulkan::Scheduler* custom_scheduler = nullptr) {
|
||||
void UpdateImage(ImageId image_id) {
|
||||
std::scoped_lock lock{mutex};
|
||||
Image& image = slot_images[image_id];
|
||||
TrackImage(image_id);
|
||||
TouchImage(image);
|
||||
RefreshImage(image, custom_scheduler);
|
||||
RefreshImage(image);
|
||||
}
|
||||
|
||||
/// Resolves overlap between existing cache image and pending merged image
|
||||
[[nodiscard]] std::tuple<ImageId, int, int> ResolveOverlap(const ImageInfo& info,
|
||||
BindingType binding,
|
||||
ImageId cache_img_id,
|
||||
@@ -145,7 +148,7 @@ public:
|
||||
[[nodiscard]] ImageId ExpandImage(const ImageInfo& info, ImageId image_id);
|
||||
|
||||
/// Reuploads image contents.
|
||||
void RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler = nullptr);
|
||||
void RefreshImage(Image& image);
|
||||
|
||||
/// Retrieves the sampler that matches the provided S# descriptor.
|
||||
[[nodiscard]] vk::Sampler GetSampler(
|
||||
@@ -161,16 +164,9 @@ public:
|
||||
|
||||
/// Retrieves the image view with the specified id.
|
||||
[[nodiscard]] ImageView& GetImageView(ImageId id) {
|
||||
auto& view = slot_image_views[id];
|
||||
// Maybe this is not needed.
|
||||
Image& image = slot_images[view.image_id];
|
||||
TouchImage(image);
|
||||
return view;
|
||||
return slot_image_views[id];
|
||||
}
|
||||
|
||||
/// Registers an image view for provided image
|
||||
ImageView& RegisterImageView(ImageId image_id, const ImageViewInfo& view_info);
|
||||
|
||||
/// Returns true if the specified address is a metadata surface.
|
||||
bool IsMeta(VAddr address) const {
|
||||
return surface_metas.contains(address);
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
#include "video_core/texture_cache/image.h"
|
||||
#include "video_core/texture_cache/image_info.h"
|
||||
#include "video_core/texture_cache/image_view.h"
|
||||
#include "video_core/texture_cache/tile_manager.h"
|
||||
@@ -190,6 +191,8 @@ TileManager::Result TileManager::DetileImage(vk::Buffer in_buffer, u32 in_offset
|
||||
vmaDestroyBuffer(instance.GetAllocator(), out_buffer, out_allocation);
|
||||
});
|
||||
|
||||
scheduler.EndRendering();
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, GetTilingPipeline(info, false));
|
||||
|
||||
@@ -238,15 +241,14 @@ TileManager::Result TileManager::DetileImage(vk::Buffer in_buffer, u32 in_offset
|
||||
return {out_buffer, 0};
|
||||
}
|
||||
|
||||
void TileManager::TileImage(vk::Image in_image, std::span<vk::BufferImageCopy> buffer_copies,
|
||||
vk::Buffer out_buffer, u32 out_offset, const ImageInfo& info) {
|
||||
void TileManager::TileImage(Image& in_image, std::span<vk::BufferImageCopy> buffer_copies,
|
||||
vk::Buffer out_buffer, u32 out_offset, u32 copy_size) {
|
||||
const auto& info = in_image.info;
|
||||
if (!info.props.is_tiled) {
|
||||
for (auto& copy : buffer_copies) {
|
||||
copy.bufferOffset += out_offset;
|
||||
}
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.copyImageToBuffer(in_image, vk::ImageLayout::eTransferSrcOptimal, out_buffer,
|
||||
buffer_copies);
|
||||
in_image.Download(buffer_copies, out_buffer, out_offset, copy_size);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -275,8 +277,8 @@ void TileManager::TileImage(vk::Image in_image, std::span<vk::BufferImageCopy> b
|
||||
});
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.copyImageToBuffer(in_image, vk::ImageLayout::eTransferSrcOptimal, temp_buffer,
|
||||
buffer_copies);
|
||||
in_image.Download(buffer_copies, temp_buffer, 0, copy_size);
|
||||
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, GetTilingPipeline(info, true));
|
||||
|
||||
const vk::DescriptorBufferInfo tiled_buffer_info{
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
namespace VideoCore {
|
||||
|
||||
struct ImageInfo;
|
||||
struct Image;
|
||||
class StreamBuffer;
|
||||
|
||||
class TileManager {
|
||||
@@ -23,8 +24,8 @@ public:
|
||||
StreamBuffer& stream_buffer);
|
||||
~TileManager();
|
||||
|
||||
void TileImage(vk::Image in_image, std::span<vk::BufferImageCopy> buffer_copies,
|
||||
vk::Buffer out_buffer, u32 out_offset, const ImageInfo& info);
|
||||
void TileImage(Image& in_image, std::span<vk::BufferImageCopy> buffer_copies,
|
||||
vk::Buffer out_buffer, u32 out_offset, u32 copy_size);
|
||||
|
||||
Result DetileImage(vk::Buffer in_buffer, u32 in_offset, const ImageInfo& info);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user