diff --git a/src/common/number_utils.cpp b/src/common/number_utils.cpp
index af29e5cd3..660f539f9 100644
--- a/src/common/number_utils.cpp
+++ b/src/common/number_utils.cpp
@@ -158,4 +158,4 @@ float S16ToSnorm(s16 val) {
     return float(val * c);
 }
 
-} // namespace NumberUtils
\ No newline at end of file
+} // namespace NumberUtils
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 98f2195c5..a5cdca8f1 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -305,19 +305,23 @@ void SetupCapabilities(const Info& info, const Profile& profile, const RuntimeIn
             runtime_info.fs_info.addr_flags.persp_sample_ena) {
             ctx.AddCapability(spv::Capability::SampleRateShading);
         }
+        if (info.loads.GetAny(IR::Attribute::RenderTargetIndex)) {
+            ctx.AddCapability(spv::Capability::Geometry);
+        }
     }
     if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) {
         ctx.AddCapability(spv::Capability::Tessellation);
     }
     if (stage == LogicalStage::Vertex || stage == LogicalStage::TessellationControl ||
         stage == LogicalStage::TessellationEval) {
-        if (info.has_layer_output) {
+        if (info.stores.GetAny(IR::Attribute::RenderTargetIndex)) {
             ctx.AddCapability(spv::Capability::ShaderLayer);
         }
-        if (info.has_viewport_index_output) {
+        if (info.stores.GetAny(IR::Attribute::ViewportIndex)) {
             ctx.AddCapability(spv::Capability::ShaderViewportIndex);
         }
-    } else if (stage == LogicalStage::Geometry && info.has_viewport_index_output) {
+    } else if (stage == LogicalStage::Geometry &&
+               info.stores.GetAny(IR::Attribute::ViewportIndex)) {
         ctx.AddCapability(spv::Capability::MultiViewport);
     }
     if (info.uses_dma) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 6df8f74fd..554448b13 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -3,7 +3,6 @@
 
 #include "common/assert.h"
 #include "common/config.h"
-#include "common/logging/log.h"
 #include "shader_recompiler/backend/spirv/emit_spirv_bounds.h"
 #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
 #include "shader_recompiler/backend/spirv/spirv_emit_context.h"
@@ -14,55 +13,11 @@
 #include <magic_enum/magic_enum.hpp>
 
 namespace Shader::Backend::SPIRV {
-namespace {
 
-Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
-    if (IR::IsParam(attr)) {
-        const u32 attr_index{u32(attr) - u32(IR::Attribute::Param0)};
-        if (ctx.stage == Stage::Local) {
-            const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]);
-            return ctx.OpAccessChain(component_ptr, ctx.output_attr_array, ctx.ConstU32(attr_index),
-                                     ctx.ConstU32(element));
-        } else {
-            const auto& info{ctx.output_params.at(attr_index)};
-            ASSERT(info.num_components > 0);
-            if (info.num_components == 1) {
-                return info.id;
-            } else {
-                return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element));
-            }
-        }
-    }
-    if (IR::IsMrt(attr)) {
-        const u32 index{u32(attr) - u32(IR::Attribute::RenderTarget0)};
-        const auto& info{ctx.frag_outputs.at(index)};
-        if (info.num_components == 1) {
-            return info.id;
-        } else {
-            return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element));
-        }
-    }
-    switch (attr) {
-    case IR::Attribute::Position0:
-        return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, ctx.ConstU32(element));
-    case IR::Attribute::ClipDistance:
-        return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, ctx.ConstU32(element));
-    case IR::Attribute::CullDistance:
-        return ctx.OpAccessChain(ctx.output_f32, ctx.cull_distances, ctx.ConstU32(element));
-    case IR::Attribute::PointSize:
-        return ctx.output_point_size;
-    case IR::Attribute::RenderTargetIndex:
-        return ctx.output_layer;
-    case IR::Attribute::ViewportIndex:
-        return ctx.output_viewport_index;
-    case IR::Attribute::Depth:
-        return ctx.frag_depth;
-    default:
-        UNREACHABLE_MSG("Write attribute {}", attr);
-    }
-}
+using PointerType = EmitContext::PointerType;
+using PointerSize = EmitContext::PointerSize;
 
-std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr) {
+static std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr) {
     if (IR::IsParam(attr)) {
         const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
         const auto& info{ctx.output_params.at(index)};
@@ -82,15 +37,13 @@ std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr
         return {ctx.F32[1], false};
     case IR::Attribute::RenderTargetIndex:
     case IR::Attribute::ViewportIndex:
-        return {ctx.S32[1], true};
+    case IR::Attribute::SampleMask:
+    case IR::Attribute::StencilRef:
+        return {ctx.U32[1], true};
     default:
         UNREACHABLE_MSG("Write attribute {}", attr);
     }
 }
-} // Anonymous namespace
-
-using PointerType = EmitContext::PointerType;
-using PointerSize = EmitContext::PointerSize;
 
 Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) {
     const u32 index = ctx.binding.user_data + ctx.info.ud_mask.Index(reg);
@@ -212,6 +165,10 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
     case IR::Attribute::IsFrontFace:
         return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value,
                             ctx.u32_zero_value);
+    case IR::Attribute::SampleIndex:
+        return ctx.OpLoad(ctx.U32[1], ctx.sample_index);
+    case IR::Attribute::RenderTargetIndex:
+        return ctx.OpLoad(ctx.U32[1], ctx.output_layer);
     case IR::Attribute::PrimitiveId:
         return ctx.OpLoad(ctx.U32[1], ctx.primitive_id);
     case IR::Attribute::InvocationId:
@@ -243,12 +200,62 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
 }
 
 void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) {
-    const Id pointer{OutputAttrPointer(ctx, attr, element)};
-    const auto [component_type, is_integer]{OutputAttrComponentType(ctx, attr)};
-    if (is_integer) {
-        ctx.OpStore(pointer, ctx.OpBitcast(component_type, value));
-    } else {
-        ctx.OpStore(pointer, value);
+    const auto op_store = [&](Id pointer) {
+        const auto [component_type, is_integer] = OutputAttrComponentType(ctx, attr);
+        if (is_integer) {
+            ctx.OpStore(pointer, ctx.OpBitcast(component_type, value));
+        } else {
+            ctx.OpStore(pointer, value);
+        }
+    };
+    if (IR::IsParam(attr)) {
+        const u32 attr_index{u32(attr) - u32(IR::Attribute::Param0)};
+        if (ctx.stage == Stage::Local) {
+            const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]);
+            return op_store(ctx.OpAccessChain(component_ptr, ctx.output_attr_array,
+                                              ctx.ConstU32(attr_index), ctx.ConstU32(element)));
+        } else {
+            const auto& info{ctx.output_params.at(attr_index)};
+            ASSERT(info.num_components > 0);
+            if (info.num_components == 1) {
+                return op_store(info.id);
+            } else {
+                return op_store(
+                    ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element)));
+            }
+        }
+    }
+    if (IR::IsMrt(attr)) {
+        const u32 index{u32(attr) - u32(IR::Attribute::RenderTarget0)};
+        const auto& info{ctx.frag_outputs.at(index)};
+        if (info.num_components == 1) {
+            return op_store(info.id);
+        } else {
+            return op_store(ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element)));
+        }
+    }
+    switch (attr) {
+    case IR::Attribute::Position0:
+        return op_store(
+            ctx.OpAccessChain(ctx.output_f32, ctx.output_position, ctx.ConstU32(element)));
+    case IR::Attribute::ClipDistance:
+        return op_store(
+            ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, ctx.ConstU32(element)));
+    case IR::Attribute::CullDistance:
+        return op_store(
+            ctx.OpAccessChain(ctx.output_f32, ctx.cull_distances, ctx.ConstU32(element)));
+    case IR::Attribute::PointSize:
+        return op_store(ctx.output_point_size);
+    case IR::Attribute::RenderTargetIndex:
+        return op_store(ctx.output_layer);
+    case IR::Attribute::ViewportIndex:
+        return op_store(ctx.output_viewport_index);
+    case IR::Attribute::Depth:
+        return op_store(ctx.frag_depth);
+    case IR::Attribute::SampleMask:
+        return op_store(ctx.OpAccessChain(ctx.output_u32, ctx.sample_mask, ctx.u32_zero_value));
+    default:
+        UNREACHABLE_MSG("Write attribute {}", attr);
     }
 }
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
index 440f80fa9..804d98b74 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
@@ -28,7 +28,7 @@ void ConvertDepthMode(EmitContext& ctx) {
 }
 
 void ConvertPositionToClipSpace(EmitContext& ctx) {
-    ASSERT_MSG(!ctx.info.has_viewport_index_output,
+    ASSERT_MSG(!ctx.info.stores.GetAny(IR::Attribute::ViewportIndex),
                "Multi-viewport with shader clip space conversion not yet implemented.");
 
     const Id type{ctx.F32[1]};
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index 131b475fc..4152420d0 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -370,13 +370,18 @@ void EmitContext::DefineInputs() {
         if (info.loads.GetAny(IR::Attribute::FragCoord)) {
             frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
         }
-        if (info.stores.Get(IR::Attribute::Depth)) {
-            frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
-        }
         if (info.loads.Get(IR::Attribute::IsFrontFace)) {
             front_facing =
                 DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
         }
+        if (info.loads.GetAny(IR::Attribute::RenderTargetIndex)) {
+            output_layer = DefineVariable(U32[1], spv::BuiltIn::Layer, spv::StorageClass::Input);
+            Decorate(output_layer, spv::Decoration::Flat);
+        }
+        if (info.loads.Get(IR::Attribute::SampleIndex)) {
+            sample_index = DefineVariable(U32[1], spv::BuiltIn::SampleId, spv::StorageClass::Input);
+            Decorate(sample_index, spv::Decoration::Flat);
+        }
         if (info.loads.GetAny(IR::Attribute::BaryCoordSmooth)) {
             if (profile.supports_amd_shader_explicit_vertex_parameter) {
                 bary_coord_smooth = DefineVariable(F32[2], spv::BuiltIn::BaryCoordSmoothAMD,
@@ -560,11 +565,11 @@ void EmitContext::DefineVertexBlock() {
             DefineVariable(F32[1], spv::BuiltIn::PointSize, spv::StorageClass::Output);
     }
     if (info.stores.GetAny(IR::Attribute::RenderTargetIndex)) {
-        output_layer = DefineVariable(S32[1], spv::BuiltIn::Layer, spv::StorageClass::Output);
+        output_layer = DefineVariable(U32[1], spv::BuiltIn::Layer, spv::StorageClass::Output);
     }
     if (info.stores.GetAny(IR::Attribute::ViewportIndex)) {
         output_viewport_index =
-            DefineVariable(S32[1], spv::BuiltIn::ViewportIndex, spv::StorageClass::Output);
+            DefineVariable(U32[1], spv::BuiltIn::ViewportIndex, spv::StorageClass::Output);
     }
 }
 
@@ -646,6 +651,13 @@ void EmitContext::DefineOutputs() {
         break;
     }
     case LogicalStage::Fragment: {
+        if (info.stores.Get(IR::Attribute::Depth)) {
+            frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
+        }
+        if (info.stores.Get(IR::Attribute::SampleMask)) {
+            sample_mask = DefineVariable(TypeArray(U32[1], u32_one_value), spv::BuiltIn::SampleMask,
+                                         spv::StorageClass::Output);
+        }
         u32 num_render_targets = 0;
         for (u32 i = 0; i < IR::NumRenderTargets; i++) {
             const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
@@ -1080,36 +1092,26 @@ Id EmitContext::DefineUfloatM5ToFloat32(u32 mantissa_bits, const std::string_vie
     Name(func, name);
     AddLabel();
 
-    const auto raw_mantissa{
-        OpBitFieldUExtract(U32[1], value, ConstU32(0U), ConstU32(mantissa_bits))};
-    const auto mantissa{OpConvertUToF(F32[1], raw_mantissa)};
-    const auto exponent{OpBitcast(
-        S32[1], OpBitFieldSExtract(U32[1], value, ConstU32(mantissa_bits), ConstU32(5U)))};
-
-    const auto is_exp_neg_one{OpIEqual(U1[1], exponent, ConstS32(-1))};
-    const auto is_exp_zero{OpIEqual(U1[1], exponent, ConstS32(0))};
-
-    const auto is_zero{OpIEqual(U1[1], value, ConstU32(0u))};
-    const auto is_nan{
-        OpLogicalAnd(U1[1], is_exp_neg_one, OpINotEqual(U1[1], raw_mantissa, ConstU32(0u)))};
-    const auto is_inf{
-        OpLogicalAnd(U1[1], is_exp_neg_one, OpIEqual(U1[1], raw_mantissa, ConstU32(0u)))};
-    const auto is_denorm{
-        OpLogicalAnd(U1[1], is_exp_zero, OpINotEqual(U1[1], raw_mantissa, ConstU32(0u)))};
-
-    const auto denorm{OpFMul(F32[1], mantissa, ConstF32(1.f / (1 << 20)))};
-    const auto norm{OpLdexp(
-        F32[1],
-        OpFAdd(F32[1],
-               OpFMul(F32[1], mantissa, ConstF32(1.f / static_cast<float>(1 << mantissa_bits))),
-               ConstF32(1.f)),
-        exponent)};
-
-    const auto result{OpSelect(F32[1], is_zero, ConstF32(0.f),
-                               OpSelect(F32[1], is_nan, ConstF32(NAN),
-                                        OpSelect(F32[1], is_inf, ConstF32(INFINITY),
-                                                 OpSelect(F32[1], is_denorm, denorm, norm))))};
-
+    const Id exponent{OpBitFieldUExtract(U32[1], value, ConstU32(mantissa_bits), ConstU32(5U))};
+    const Id mantissa{OpBitFieldUExtract(U32[1], value, ConstU32(0U), ConstU32(mantissa_bits))};
+    const Id mantissa_f{OpConvertUToF(F32[1], mantissa)};
+    const Id a{OpSelect(F32[1], OpINotEqual(U1[1], mantissa, u32_zero_value),
+                        OpFMul(F32[1], ConstF32(1.f / (1 << (14 + mantissa_bits))), mantissa_f),
+                        f32_zero_value)};
+    const Id b{OpBitcast(F32[1], OpBitwiseOr(U32[1], mantissa, ConstU32(0x7f800000U)))};
+    const Id exponent_c{OpISub(U32[1], exponent, ConstU32(15U))};
+    const Id scale_a{
+        OpFDiv(F32[1], ConstF32(1.f),
+               OpConvertUToF(F32[1], OpShiftLeftLogical(U32[1], u32_one_value,
+                                                        OpSNegate(U32[1], exponent_c))))};
+    const Id scale_b{OpConvertUToF(F32[1], OpShiftLeftLogical(U32[1], u32_one_value, exponent_c))};
+    const Id scale{
+        OpSelect(F32[1], OpSLessThan(U1[1], exponent_c, u32_zero_value), scale_a, scale_b)};
+    const Id c{OpFMul(F32[1], scale,
+                      OpFAdd(F32[1], ConstF32(1.f),
+                             OpFDiv(F32[1], mantissa_f, ConstF32(f32(1 << mantissa_bits)))))};
+    const Id result{OpSelect(F32[1], OpIEqual(U1[1], exponent, u32_zero_value), a,
+                             OpSelect(F32[1], OpIEqual(U1[1], exponent, ConstU32(31U)), b, c))};
     OpReturnValue(result);
     OpFunctionEnd();
     return func;
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
index 4daba8903..9bb2b7d7a 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@@ -256,6 +256,8 @@ public:
     Id frag_coord{};
     Id front_facing{};
     Id frag_depth{};
+    Id sample_mask{};
+    Id sample_index{};
     Id clip_distances{};
     Id cull_distances{};
 
diff --git a/src/shader_recompiler/frontend/copy_shader.cpp b/src/shader_recompiler/frontend/copy_shader.cpp
index 52b433dbc..795003e43 100644
--- a/src/shader_recompiler/frontend/copy_shader.cpp
+++ b/src/shader_recompiler/frontend/copy_shader.cpp
@@ -49,6 +49,9 @@ CopyShaderData ParseCopyShader(std::span<const u32> code) {
             const auto& exp = inst.control.exp;
             const IR::Attribute semantic = static_cast<IR::Attribute>(exp.target);
             for (int i = 0; i < inst.src_count; ++i) {
+                if ((exp.en & (1 << i)) == 0) {
+                    continue;
+                }
                 const auto ofs = offsets[inst.src[i].code];
                 if (ofs != -1) {
                     data.attr_map[ofs] = {semantic, i};
diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp
index 9dccf1105..e1e39105f 100644
--- a/src/shader_recompiler/frontend/translate/export.cpp
+++ b/src/shader_recompiler/frontend/translate/export.cpp
@@ -22,7 +22,7 @@ static AmdGpu::NumberFormat NumberFormatCompressed(
     case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SINT16:
         return AmdGpu::NumberFormat::Sint;
     default:
-        UNREACHABLE_MSG("Unimplemented compressed MRT export format {}",
+        UNREACHABLE_MSG("Unimplemented compressed export format {}",
                         static_cast<u32>(export_format));
     }
 }
@@ -42,7 +42,7 @@ static u32 MaskFromExportFormat(u8 mask, AmdGpu::Liverpool::ShaderExportFormat e
         // All components
         return mask;
     default:
-        UNREACHABLE_MSG("Unimplemented uncompressed MRT export format {}",
+        UNREACHABLE_MSG("Unimplemented uncompressed export format {}",
                         static_cast<u32>(export_format));
     }
 }
@@ -118,25 +118,68 @@ void Translator::ExportRenderTarget(const GcnInst& inst) {
     }
 }
 
+void Translator::ExportDepth(const GcnInst& inst) {
+    const auto& exp = inst.control.exp;
+    if (exp.en == 0) {
+        // No export
+        return;
+    }
+
+    std::array<IR::F32, 4> components{};
+    if (exp.compr) {
+        // Components are float16 packed into a VGPR
+        const auto num_format = NumberFormatCompressed(runtime_info.fs_info.z_export_format);
+        // Export R, G
+        if (exp.en & 1) {
+            const IR::Value unpacked_value =
+                ir.Unpack2x16(num_format, ir.GetVectorReg(IR::VectorReg(inst.src[0].code)));
+            components[0] = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
+            components[1] = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
+        }
+        // Export B, A
+        if ((exp.en >> 2) & 1) {
+            const IR::Value unpacked_value =
+                ir.Unpack2x16(num_format, ir.GetVectorReg(IR::VectorReg(inst.src[1].code)));
+            components[2] = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
+            // components[3] = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
+        }
+    } else {
+        // Components are float32 into separate VGPRS
+        u32 mask = MaskFromExportFormat(exp.en, runtime_info.fs_info.z_export_format);
+        for (u32 i = 0; i < 4; i++, mask >>= 1) {
+            if ((mask & 1) == 0) {
+                continue;
+            }
+            components[i] = ir.GetVectorReg<IR::F32>(IR::VectorReg(inst.src[i].code));
+        }
+    }
+
+    static constexpr std::array MrtzBuiltins = {IR::Attribute::Depth, IR::Attribute::StencilRef,
+                                                IR::Attribute::SampleMask, IR::Attribute::Null};
+    for (u32 i = 0; i < 4; ++i) {
+        if (components[i].IsEmpty()) {
+            continue;
+        }
+        ir.SetAttribute(MrtzBuiltins[i], components[i]);
+    }
+}
+
 void Translator::EmitExport(const GcnInst& inst) {
     if (info.stage == Stage::Fragment && inst.control.exp.vm) {
         ir.Discard(ir.LogicalNot(ir.GetExec()));
     }
 
-    const auto& exp = inst.control.exp;
-    const IR::Attribute attrib{exp.target};
+    const IR::Attribute attrib{inst.control.exp.target};
     if (IR::IsMrt(attrib)) {
         return ExportRenderTarget(inst);
     }
-
-    if (attrib == IR::Attribute::Depth && exp.en != 0 && exp.en != 1) {
-        LOG_WARNING(Render_Vulkan, "Unsupported depth export");
-        return;
+    if (attrib == IR::Attribute::Depth) {
+        return ExportDepth(inst);
     }
 
-    ASSERT_MSG(!exp.compr, "Compressed exports only supported for render targets");
+    ASSERT_MSG(!inst.control.exp.compr, "Compressed exports only supported for render targets");
 
-    u32 mask = exp.en;
+    u32 mask = inst.control.exp.en;
     for (u32 i = 0; i < 4; i++, mask >>= 1) {
         if ((mask & 1) == 0) {
             continue;
diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp
index e7a7b3be6..9e42ebea9 100644
--- a/src/shader_recompiler/frontend/translate/translate.cpp
+++ b/src/shader_recompiler/frontend/translate/translate.cpp
@@ -171,6 +171,13 @@ void Translator::EmitPrologue(IR::Block* first_block) {
                 ir.SetVectorReg(dst_vreg++, ir.Imm32(0));
             }
         }
+        if (runtime_info.fs_info.addr_flags.ancillary_ena) {
+            if (runtime_info.fs_info.en_flags.ancillary_ena) {
+                ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::PackedAncillary));
+            } else {
+                ir.SetVectorReg(dst_vreg++, ir.Imm32(0));
+            }
+        }
         break;
     case LogicalStage::TessellationControl: {
         ir.SetVectorReg(IR::VectorReg::V0, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
@@ -460,7 +467,7 @@ void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
             result = ir.FPMul(result, ir.Imm32(operand.output_modifier.multiplier));
         }
         if (operand.output_modifier.clamp) {
-            result = ir.FPSaturate(value);
+            result = ir.FPSaturate(result);
         }
     }
 
@@ -490,7 +497,7 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra
                 ir.FPMul(value_untyped, ir.Imm64(f64(operand.output_modifier.multiplier)));
         }
         if (operand.output_modifier.clamp) {
-            value_untyped = ir.FPSaturate(value_raw);
+            value_untyped = ir.FPSaturate(value_untyped);
         }
     }
 
diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h
index dad2cc829..b3b6a3977 100644
--- a/src/shader_recompiler/frontend/translate/translate.h
+++ b/src/shader_recompiler/frontend/translate/translate.h
@@ -319,6 +319,7 @@ private:
                              const IR::F32& x_res, const IR::F32& y_res, const IR::F32& z_res);
 
     void ExportRenderTarget(const GcnInst& inst);
+    void ExportDepth(const GcnInst& inst);
     void LogMissingOpcode(const GcnInst& inst);
 
     IR::VectorReg GetScratchVgpr(u32 offset);
diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h
index 689264c6a..ccf2c45e0 100644
--- a/src/shader_recompiler/info.h
+++ b/src/shader_recompiler/info.h
@@ -210,8 +210,6 @@ struct Info {
     bool has_bitwise_xor{};
     bool has_image_gather{};
     bool has_image_query{};
-    bool has_layer_output{};
-    bool has_viewport_index_output{};
     bool uses_buffer_atomic_float_min_max{};
     bool uses_image_atomic_float_min_max{};
     bool uses_lane_id{};
diff --git a/src/shader_recompiler/ir/attribute.cpp b/src/shader_recompiler/ir/attribute.cpp
index 388f8de8c..382f9b1d9 100644
--- a/src/shader_recompiler/ir/attribute.cpp
+++ b/src/shader_recompiler/ir/attribute.cpp
@@ -160,6 +160,12 @@ std::string NameOf(Attribute attribute) {
         return "TessFactorsBufferBase";
     case Attribute::PointSize:
         return "PointSize";
+    case Attribute::StencilRef:
+        return "StencilRef";
+    case Attribute::SampleMask:
+        return "SampleMask";
+    case Attribute::PackedAncillary:
+        return "PackedAncillary";
     default:
         break;
     }
diff --git a/src/shader_recompiler/ir/attribute.h b/src/shader_recompiler/ir/attribute.h
index 28950ab52..c8a6e6b20 100644
--- a/src/shader_recompiler/ir/attribute.h
+++ b/src/shader_recompiler/ir/attribute.h
@@ -88,6 +88,9 @@ enum class Attribute : u64 {
     OffChipLdsBase = 91,
     TessFactorsBufferBase = 92,
     PointSize = 93,
+    StencilRef = 94,
+    SampleMask = 95,
+    PackedAncillary = 96,
     Max,
 };
 
diff --git a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
index b877a6e87..5f9a3cc55 100644
--- a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
@@ -257,12 +257,50 @@ void FoldCmpClass(IR::Block& block, IR::Inst& inst) {
         IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
         const IR::F32 value = IR::F32{inst.Arg(0)};
         inst.ReplaceUsesWithAndRemove(
-            ir.LogicalNot(ir.LogicalOr(ir.FPIsInf(value), ir.FPIsInf(value))));
+            ir.LogicalNot(ir.LogicalOr(ir.FPIsNan(value), ir.FPIsInf(value))));
     } else {
         UNREACHABLE();
     }
 }
 
+bool FoldPackedAncillary(IR::Block& block, IR::Inst& inst) {
+    if (inst.Arg(0).IsImmediate() || !inst.Arg(1).IsImmediate() || !inst.Arg(2).IsImmediate()) {
+        return false;
+    }
+    IR::Inst* value = inst.Arg(0).InstRecursive();
+    if (value->GetOpcode() != IR::Opcode::GetAttributeU32 ||
+        value->Arg(0).Attribute() != IR::Attribute::PackedAncillary) {
+        return false;
+    }
+    const u32 offset = inst.Arg(1).U32();
+    const u32 bits = inst.Arg(2).U32();
+    IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+    if (offset >= 8 && offset + bits <= 12) {
+        const auto sample_index = ir.GetAttributeU32(IR::Attribute::SampleIndex);
+        if (offset == 8 && bits == 4) {
+            inst.ReplaceUsesWithAndRemove(sample_index);
+        } else {
+            inst.ReplaceUsesWithAndRemove(
+                ir.BitFieldExtract(sample_index, ir.Imm32(offset - 8), ir.Imm32(bits)));
+        }
+    } else if (offset >= 16 && offset + bits <= 27) {
+        const auto mrt_index = ir.GetAttributeU32(IR::Attribute::RenderTargetIndex);
+        if (offset == 16 && bits == 11) {
+            inst.ReplaceUsesWithAndRemove(mrt_index);
+        } else {
+            inst.ReplaceUsesWithAndRemove(
+                ir.BitFieldExtract(mrt_index, ir.Imm32(offset - 16), ir.Imm32(bits)));
+        }
+    } else {
+        UNREACHABLE_MSG("Unhandled bitfield extract from ancillary VGPR offset={}, bits={}", offset,
+                        bits);
+    }
+
+    value->ReplaceUsesWithAndRemove(ir.Imm32(0U));
+
+    return true;
+}
+
 void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
     switch (inst.GetOpcode()) {
     case IR::Opcode::IAdd32:
@@ -475,6 +513,9 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
         FoldWhenAllImmediates(inst, [](u64 a) { return static_cast<u32>(std::popcount(a)); });
         return;
     case IR::Opcode::BitFieldUExtract:
+        if (FoldPackedAncillary(block, inst)) {
+            return;
+        }
         FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) {
             if (static_cast<size_t>(shift) + static_cast<size_t>(count) > 32) {
                 UNREACHABLE_MSG("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract,
diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
index 625c8676e..38aad55c4 100644
--- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
+++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
@@ -934,14 +934,25 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
         }
     }();
 
-    const auto unnormalized = sampler.force_unnormalized || inst_info.is_unnormalized;
-    // Query dimensions of image if needed for normalization.
-    // We can't use the image sharp because it could be bound to a different image later.
+    const bool is_msaa = view_type == AmdGpu::ImageType::Color2DMsaa ||
+                         view_type == AmdGpu::ImageType::Color2DMsaaArray;
+    const bool unnormalized = sampler.force_unnormalized || inst_info.is_unnormalized;
+    const bool needs_dimentions = (!is_msaa && unnormalized) || (is_msaa && !unnormalized);
     const auto dimensions =
-        unnormalized ? ir.ImageQueryDimension(handle, ir.Imm32(0u), ir.Imm1(false), inst_info)
-                     : IR::Value{};
+        needs_dimentions ? ir.ImageQueryDimension(handle, ir.Imm32(0u), ir.Imm1(false), inst_info)
+                         : IR::Value{};
     const auto get_coord = [&](u32 coord_idx, u32 dim_idx) -> IR::Value {
         const auto coord = get_addr_reg(coord_idx);
+        if (is_msaa) {
+            // For MSAA images preserve the unnormalized coord or manually unnormalize it
+            if (unnormalized) {
+                return ir.ConvertFToU(32, coord);
+            } else {
+                const auto dim =
+                    ir.ConvertUToF(32, 32, IR::U32{ir.CompositeExtract(dimensions, dim_idx)});
+                return ir.ConvertFToU(32, ir.FPMul(coord, dim));
+            }
+        }
         if (unnormalized) {
             // Normalize the coordinate for sampling, dividing by its corresponding dimension.
             const auto dim =
@@ -958,12 +969,10 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
             addr_reg = addr_reg + 1;
             return get_coord(addr_reg - 1, 0);
         case AmdGpu::ImageType::Color1DArray: // x, slice
-            [[fallthrough]];
-        case AmdGpu::ImageType::Color2D: // x, y
+        case AmdGpu::ImageType::Color2D:      // x, y
+        case AmdGpu::ImageType::Color2DMsaa:  // x, y
             addr_reg = addr_reg + 2;
             return ir.CompositeConstruct(get_coord(addr_reg - 2, 0), get_coord(addr_reg - 1, 1));
-        case AmdGpu::ImageType::Color2DMsaa: // x, y, frag
-            [[fallthrough]];
         case AmdGpu::ImageType::Color2DArray: // x, y, slice
             addr_reg = addr_reg + 3;
             // Note we can use FixCubeCoords with fallthrough cases since it checks for image type.
@@ -986,6 +995,9 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
     const IR::F32 lod_clamp = inst_info.has_lod_clamp ? get_addr_reg(addr_reg++) : IR::F32{};
 
     auto texel = [&] -> IR::Value {
+        if (is_msaa) {
+            return ir.ImageRead(handle, coords, ir.Imm32(0U), ir.Imm32(0U), inst_info);
+        }
         if (inst_info.is_gather) {
             if (inst_info.is_depth) {
                 return ir.ImageGatherDref(handle, coords, offset, dref, inst_info);
diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
index a7108a5ef..8f0e61da2 100644
--- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
+++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
@@ -160,13 +160,6 @@ void CollectShaderInfoPass(IR::Program& program, const Profile& profile) {
         }
     }
 
-    if (info.stores.GetAny(IR::Attribute::RenderTargetIndex)) {
-        info.has_layer_output = true;
-    }
-    if (info.stores.GetAny(IR::Attribute::ViewportIndex)) {
-        info.has_viewport_index_output = true;
-    }
-
     // In case Flatbuf has not already been bound by IR and is needed
     // to query buffer sizes, bind it now.
     if (!profile.supports_robust_buffer_access && !info.uses_dma) {
diff --git a/src/shader_recompiler/ir/reinterpret.h b/src/shader_recompiler/ir/reinterpret.h
index 10728d8dd..84a4a51d5 100644
--- a/src/shader_recompiler/ir/reinterpret.h
+++ b/src/shader_recompiler/ir/reinterpret.h
@@ -22,7 +22,7 @@ inline Value ApplySwizzle(IREmitter& ir, const Value& vector, const AmdGpu::Comp
 }
 
 /// Converts gamma corrected value to linear space
-inline F32 ApplyGammaToLinear(IREmitter& ir, F32& c) {
+inline F32 ApplyGammaToLinear(IREmitter& ir, const F32& c) {
     const F32 a =
         ir.FPPow(ir.FPMul(ir.FPAdd(c, ir.Imm32(0.055f)), ir.Imm32(1.0f / 1.055f)), ir.Imm32(2.4f));
     const F32 b = ir.FPMul(c, ir.Imm32(1.0f / 12.92f));
@@ -80,6 +80,9 @@ inline F32 ApplyReadNumberConversion(IREmitter& ir, const F32& value,
         const auto float_val = ir.ConvertUToF(32, 32, ir.BitCast<U32>(value));
         return ir.FPDiv(float_val, ir.Imm32(static_cast<float>(std::numeric_limits<u32>::max())));
     }
+    case AmdGpu::NumberConversion::SrgbToNorm: {
+        return ApplyGammaToLinear(ir, value);
+    }
     default:
         UNREACHABLE();
     }
diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h
index f532dcbad..53e4ecd11 100644
--- a/src/shader_recompiler/runtime_info.h
+++ b/src/shader_recompiler/runtime_info.h
@@ -205,12 +205,13 @@ struct FragmentRuntimeInfo {
     u32 num_inputs;
     std::array<PsInput, 32> inputs;
     std::array<PsColorBuffer, MaxColorBuffers> color_buffers;
+    AmdGpu::Liverpool::ShaderExportFormat z_export_format;
     bool dual_source_blending;
 
     bool operator==(const FragmentRuntimeInfo& other) const noexcept {
         return std::ranges::equal(color_buffers, other.color_buffers) &&
                en_flags.raw == other.en_flags.raw && addr_flags.raw == other.addr_flags.raw &&
-               num_inputs == other.num_inputs &&
+               num_inputs == other.num_inputs && z_export_format == other.z_export_format &&
                dual_source_blending == other.dual_source_blending &&
                std::ranges::equal(inputs.begin(), inputs.begin() + num_inputs, other.inputs.begin(),
                                   other.inputs.begin() + num_inputs);
diff --git a/src/video_core/amdgpu/pixel_format.h b/src/video_core/amdgpu/pixel_format.h
index 27c9313a2..21c2eee2a 100644
--- a/src/video_core/amdgpu/pixel_format.h
+++ b/src/video_core/amdgpu/pixel_format.h
@@ -102,6 +102,7 @@ enum class NumberConversion : u32 {
     Sint8ToSnormNz = 4,
     Sint16ToSnormNz = 5,
     Uint32ToUnorm = 6,
+    SrgbToNorm = 7,
 };
 
 union CompMapping {
@@ -219,6 +220,8 @@ constexpr NumberFormat RemapNumberFormat(const NumberFormat format, const DataFo
             return format;
         }
     }
+    case NumberFormat::Srgb:
+        return data_format == DataFormat::FormatBc6 ? NumberFormat::Unorm : format;
     case NumberFormat::Uscaled:
         return NumberFormat::Uint;
     case NumberFormat::Sscaled:
@@ -295,6 +298,9 @@ constexpr NumberConversion MapNumberConversion(const NumberFormat num_fmt,
             return NumberConversion::None;
         }
     }
+    case NumberFormat::Srgb:
+        return data_fmt == DataFormat::FormatBc6 ? NumberConversion::SrgbToNorm
+                                                 : NumberConversion::None;
     case NumberFormat::Uscaled:
         return NumberConversion::UintToUscaled;
     case NumberFormat::Sscaled:
diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp
index d13aeec99..5206edbec 100644
--- a/src/video_core/renderer_vulkan/vk_instance.cpp
+++ b/src/video_core/renderer_vulkan/vk_instance.cpp
@@ -254,6 +254,15 @@ bool Instance::CreateDevice() {
 
     // Optional
     maintenance_8 = add_extension(VK_KHR_MAINTENANCE_8_EXTENSION_NAME);
+    attachment_feedback_loop = add_extension(VK_EXT_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_EXTENSION_NAME);
+    if (attachment_feedback_loop) {
+        attachment_feedback_loop =
+            add_extension(VK_EXT_ATTACHMENT_FEEDBACK_LOOP_DYNAMIC_STATE_EXTENSION_NAME);
+        if (!attachment_feedback_loop) {
+            // We want both extensions so remove the first if the second isn't available
+            enabled_extensions.pop_back();
+        }
+    }
     depth_range_unrestricted = add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
     dynamic_state_3 = add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
     if (dynamic_state_3) {
@@ -464,6 +473,12 @@ bool Instance::CreateDevice() {
         vk::PhysicalDeviceMaintenance8FeaturesKHR{
             .maintenance8 = true,
         },
+        vk::PhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT{
+            .attachmentFeedbackLoopLayout = true,
+        },
+        vk::PhysicalDeviceAttachmentFeedbackLoopDynamicStateFeaturesEXT{
+            .attachmentFeedbackLoopDynamicState = true,
+        },
         vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT{
             .shaderBufferFloat32AtomicMinMax =
                 shader_atomic_float2_features.shaderBufferFloat32AtomicMinMax,
@@ -535,6 +550,10 @@ bool Instance::CreateDevice() {
     if (!maintenance_8) {
         device_chain.unlink<vk::PhysicalDeviceMaintenance8FeaturesKHR>();
     }
+    if (!attachment_feedback_loop) {
+        device_chain.unlink<vk::PhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT>();
+        device_chain.unlink<vk::PhysicalDeviceAttachmentFeedbackLoopDynamicStateFeaturesEXT>();
+    }
     if (!shader_atomic_float2) {
         device_chain.unlink<vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
     }
diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h
index e1fa180fb..09f68d764 100644
--- a/src/video_core/renderer_vulkan/vk_instance.h
+++ b/src/video_core/renderer_vulkan/vk_instance.h
@@ -114,6 +114,11 @@ public:
         return maintenance_8;
     }
 
+    /// Returns true if VK_EXT_attachment_feedback_loop_layout is supported
+    bool IsAttachmentFeedbackLoopLayoutSupported() const {
+        return attachment_feedback_loop;
+    }
+
     /// Returns true when VK_EXT_custom_border_color is supported
     bool IsCustomBorderColorSupported() const {
         return custom_border_color;
@@ -475,6 +480,7 @@ private:
     bool workgroup_memory_explicit_layout{};
     bool portability_subset{};
     bool maintenance_8{};
+    bool attachment_feedback_loop{};
     bool supports_memory_budget{};
     u64 total_memory_budget{};
     std::vector<size_t> valid_heaps;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 89b48f0e4..c250f4d13 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -167,8 +167,8 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
         BuildCommon(regs.ps_program);
         info.fs_info.en_flags = regs.ps_input_ena;
         info.fs_info.addr_flags = regs.ps_input_addr;
-        const auto& ps_inputs = regs.ps_inputs;
         info.fs_info.num_inputs = regs.num_interp;
+        info.fs_info.z_export_format = regs.z_export_format;
         const auto& cb0_blend = regs.blend_control[0];
         if (cb0_blend.enable) {
             info.fs_info.dual_source_blending =
@@ -182,6 +182,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
         } else {
             info.fs_info.dual_source_blending = false;
         }
+        const auto& ps_inputs = regs.ps_inputs;
         for (u32 i = 0; i < regs.num_interp; i++) {
             info.fs_info.inputs[i] = {
                 .param_index = u8(ps_inputs[i].input_offset.Value()),
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index b7cb570f4..3ff78f967 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -251,15 +251,14 @@ void Rasterizer::EliminateFastClear() {
     if (!col_buf || !col_buf.info.fast_clear) {
         return;
     }
+    VideoCore::TextureCache::RenderTargetDesc desc(col_buf, liverpool->last_cb_extent[0]);
+    const auto& image_view = texture_cache.FindRenderTarget(desc);
     if (!texture_cache.IsMetaCleared(col_buf.CmaskAddress(), col_buf.view.slice_start)) {
         return;
     }
     for (u32 slice = col_buf.view.slice_start; slice <= col_buf.view.slice_max; ++slice) {
         texture_cache.TouchMeta(col_buf.CmaskAddress(), slice, false);
     }
-    const auto& hint = liverpool->last_cb_extent[0];
-    VideoCore::TextureCache::RenderTargetDesc desc(col_buf, hint);
-    const auto& image_view = texture_cache.FindRenderTarget(desc);
     auto& image = texture_cache.GetImage(image_view.image_id);
     const vk::ImageSubresourceRange range = {
         .aspectMask = vk::ImageAspectFlagBits::eColor,
@@ -723,11 +722,6 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
             // to force general layout on it.
             image->binding.force_general |= image_desc.is_written;
         }
-        if (image->binding.is_target) {
-            // The image is already bound as target. Since we read and output to it need to force
-            // general layout too.
-            image->binding.force_general = 1u;
-        }
         image->binding.is_bound = 1u;
     }
 
@@ -754,8 +748,15 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
             auto& image = texture_cache.GetImage(image_id);
             auto& image_view = texture_cache.FindTexture(image_id, desc);
 
-            if (image.binding.force_general || image.binding.is_target) {
-                image.Transit(vk::ImageLayout::eGeneral,
+            // The image is either bound as storage in a separate descriptor or bound as render
+            // target in feedback loop. Depth images are excluded because they can't be bound as
+            // storage and feedback loop doesn't make sense for them
+            if ((image.binding.force_general || image.binding.is_target) &&
+                !image.info.props.is_depth) {
+                image.Transit(instance.IsAttachmentFeedbackLoopLayoutSupported() &&
+                                      image.binding.is_target
+                                  ? vk::ImageLayout::eAttachmentFeedbackLoopOptimalEXT
+                                  : vk::ImageLayout::eGeneral,
                               vk::AccessFlagBits2::eShaderRead |
                                   (image.info.props.is_depth
                                        ? vk::AccessFlagBits2::eDepthStencilAttachmentWrite
@@ -816,6 +817,7 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
 
 void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline, RenderState& state) {
     int cb_index = 0;
+    attachment_feedback_loop = false;
     for (auto attach_idx = 0u; attach_idx < state.num_color_attachments; ++attach_idx) {
         if (state.color_attachments[attach_idx].imageView == VK_NULL_HANDLE) {
             continue;
@@ -835,11 +837,14 @@ void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline, RenderState& s
             state.height = std::min<u32>(state.height, std::max(image.info.size.height >> mip, 1u));
         }
         auto& image = texture_cache.GetImage(image_id);
-        if (image.binding.force_general) {
-            image.Transit(
-                vk::ImageLayout::eGeneral,
-                vk::AccessFlagBits2::eColorAttachmentWrite | vk::AccessFlagBits2::eShaderRead, {});
-
+        if (image.binding.is_bound) {
+            ASSERT_MSG(!image.binding.force_general,
+                       "Having image both as storage and render target is unsupported");
+            image.Transit(instance.IsAttachmentFeedbackLoopLayoutSupported()
+                              ? vk::ImageLayout::eAttachmentFeedbackLoopOptimalEXT
+                              : vk::ImageLayout::eGeneral,
+                          vk::AccessFlagBits2::eColorAttachmentWrite, {});
+            attachment_feedback_loop = true;
         } else {
             image.Transit(vk::ImageLayout::eColorAttachmentOptimal,
                           vk::AccessFlagBits2::eColorAttachmentWrite |
@@ -859,23 +864,15 @@ void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline, RenderState& s
         if (has_stencil) {
             image.aspect_mask |= vk::ImageAspectFlagBits::eStencil;
         }
-        if (image.binding.force_general) {
-            image.Transit(vk::ImageLayout::eGeneral,
-                          vk::AccessFlagBits2::eDepthStencilAttachmentWrite |
-                              vk::AccessFlagBits2::eShaderRead,
-                          {});
-        } else {
-            const auto new_layout = desc.view_info.is_storage
-                                        ? has_stencil
-                                              ? vk::ImageLayout::eDepthStencilAttachmentOptimal
-                                              : vk::ImageLayout::eDepthAttachmentOptimal
-                                    : has_stencil ? vk::ImageLayout::eDepthStencilReadOnlyOptimal
-                                                  : vk::ImageLayout::eDepthReadOnlyOptimal;
-            image.Transit(new_layout,
-                          vk::AccessFlagBits2::eDepthStencilAttachmentWrite |
-                              vk::AccessFlagBits2::eDepthStencilAttachmentRead,
-                          desc.view_info.range);
-        }
+        const auto new_layout = desc.view_info.is_storage
+                                    ? has_stencil ? vk::ImageLayout::eDepthStencilAttachmentOptimal
+                                                  : vk::ImageLayout::eDepthAttachmentOptimal
+                                : has_stencil ? vk::ImageLayout::eDepthStencilReadOnlyOptimal
+                                              : vk::ImageLayout::eDepthReadOnlyOptimal;
+        image.Transit(new_layout,
+                      vk::AccessFlagBits2::eDepthStencilAttachmentWrite |
+                          vk::AccessFlagBits2::eDepthStencilAttachmentRead,
+                      desc.view_info.range);
         state.depth_attachment.imageLayout = image.last_state.layout;
         state.stencil_attachment.imageLayout = image.last_state.layout;
         image.usage.depth_target = true;
@@ -1101,6 +1098,7 @@ void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline, const bool
     auto& dynamic_state = scheduler.GetDynamicState();
     dynamic_state.SetBlendConstants(liverpool->regs.blend_constants);
     dynamic_state.SetColorWriteMasks(pipeline.GetWriteMasks());
+    dynamic_state.SetAttachmentFeedbackLoopEnabled(attachment_feedback_loop);
 
     // Commit new dynamic state to the command buffer.
     dynamic_state.Commit(instance, scheduler.CommandBuffer());
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index a6848d527..b32cfa424 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -143,7 +143,8 @@ private:
     boost::container::static_vector<BufferBindingInfo, Shader::NumBuffers> buffer_bindings;
     using ImageBindingInfo = std::pair<VideoCore::ImageId, VideoCore::TextureCache::TextureDesc>;
     boost::container::static_vector<ImageBindingInfo, Shader::NumImages> image_bindings;
-    bool fault_process_pending{false};
+    bool fault_process_pending{};
+    bool attachment_feedback_loop{};
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index a34bb15ad..f1e5937fe 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -333,6 +333,12 @@ void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmd
         dirty_state.line_width = false;
         cmdbuf.setLineWidth(line_width);
     }
+    if (dirty_state.feedback_loop_enabled && instance.IsAttachmentFeedbackLoopLayoutSupported()) {
+        dirty_state.feedback_loop_enabled = false;
+        cmdbuf.setAttachmentFeedbackLoopEnableEXT(feedback_loop_enabled
+                                                      ? vk::ImageAspectFlagBits::eColor
+                                                      : vk::ImageAspectFlagBits::eNone);
+    }
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 7dbc2b260..ef0f84822 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -113,6 +113,7 @@ struct DynamicState {
         bool blend_constants : 1;
         bool color_write_masks : 1;
         bool line_width : 1;
+        bool feedback_loop_enabled : 1;
     } dirty_state{};
 
     Viewports viewports{};
@@ -149,6 +150,7 @@ struct DynamicState {
     std::array<float, 4> blend_constants{};
     ColorWriteMasks color_write_masks{};
     float line_width{};
+    bool feedback_loop_enabled{};
 
     /// Commits the dynamic state to the provided command buffer.
     void Commit(const Instance& instance, const vk::CommandBuffer& cmdbuf);
@@ -324,6 +326,13 @@ struct DynamicState {
             dirty_state.line_width = true;
         }
     }
+
+    void SetAttachmentFeedbackLoopEnabled(const bool enabled) {
+        if (feedback_loop_enabled != enabled) {
+            feedback_loop_enabled = enabled;
+            dirty_state.feedback_loop_enabled = true;
+        }
+    }
 };
 
 class Scheduler {
diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp
index 8dda6aa18..a0daab362 100644
--- a/src/video_core/texture_cache/image.cpp
+++ b/src/video_core/texture_cache/image.cpp
@@ -14,7 +14,8 @@ namespace VideoCore {
 
 using namespace Vulkan;
 
-static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) {
+static vk::ImageUsageFlags ImageUsageFlags(const Vulkan::Instance* instance,
+                                           const ImageInfo& info) {
     vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferSrc |
                                 vk::ImageUsageFlagBits::eTransferDst |
                                 vk::ImageUsageFlagBits::eSampled;
@@ -23,13 +24,12 @@ static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) {
             usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment;
         } else {
             usage |= vk::ImageUsageFlagBits::eColorAttachment;
-
-            // In cases where an image is created as a render/depth target and cleared with compute,
-            // we cannot predict whether it will be used as a storage image. A proper solution would
-            // involve re-creating the resource with a new configuration and copying previous
-            // content into it. However, for now, we will set storage usage for all images (if the
-            // format allows), sacrificing a bit of performance. Note use of ExtendedUsage flag set
-            // by default.
+            if (instance->IsAttachmentFeedbackLoopLayoutSupported()) {
+                usage |= vk::ImageUsageFlagBits::eAttachmentFeedbackLoopEXT;
+            }
+            // Always create images with storage flag to avoid needing re-creation in case of e.g
+            // compute clears This sacrifices a bit of performance but is less work. ExtendedUsage
+            // flag is also used.
             usage |= vk::ImageUsageFlagBits::eStorage;
         }
     }
@@ -128,7 +128,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
         flags |= vk::ImageCreateFlagBits::eBlockTexelViewCompatible;
     }
 
-    usage_flags = ImageUsageFlags(info);
+    usage_flags = ImageUsageFlags(instance, info);
     format_features = FormatFeatureFlags(usage_flags);
 
     switch (info.pixel_format) {
@@ -348,11 +348,16 @@ void Image::CopyImage(Image& src_image) {
     const u32 num_mips = std::min(src_info.resources.levels, info.resources.levels);
     ASSERT(src_info.resources.layers == info.resources.layers || num_mips == 1);
 
+    const u32 width = src_info.size.width;
+    const u32 height = src_info.size.height;
+    const u32 depth =
+        info.type == AmdGpu::ImageType::Color3D ? info.size.depth : src_info.size.depth;
+
     boost::container::small_vector<vk::ImageCopy, 8> image_copies;
     for (u32 mip = 0; mip < num_mips; ++mip) {
-        const auto mip_w = std::max(src_info.size.width >> mip, 1u);
-        const auto mip_h = std::max(src_info.size.height >> mip, 1u);
-        const auto mip_d = std::max(src_info.size.depth >> mip, 1u);
+        const auto mip_w = std::max(width >> mip, 1u);
+        const auto mip_h = std::max(height >> mip, 1u);
+        const auto mip_d = std::max(depth >> mip, 1u);
 
         image_copies.emplace_back(vk::ImageCopy{
             .srcSubresource{
@@ -365,7 +370,7 @@ void Image::CopyImage(Image& src_image) {
                 .aspectMask = aspect_mask & ~vk::ImageAspectFlagBits::eStencil,
                 .mipLevel = mip,
                 .baseArrayLayer = 0,
-                .layerCount = src_info.resources.layers,
+                .layerCount = info.resources.layers,
             },
             .extent = {mip_w, mip_h, mip_d},
         });
diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h
index 00f56b1c7..583b0d7fa 100644
--- a/src/video_core/texture_cache/image_info.h
+++ b/src/video_core/texture_cache/image_info.h
@@ -45,8 +45,9 @@ struct ImageInfo {
     bool IsTiled() const {
         return tile_mode != AmdGpu::TileMode::DisplayLinearAligned;
     }
-    Extent3D BlockDim() const {
-        return props.is_block ? Extent3D{size.width >> 2, size.height >> 2, size.depth} : size;
+    Extent2D BlockDim() const {
+        const auto dim = props.is_block ? 2 : 0;
+        return Extent2D{size.width >> dim, size.height >> dim};
     }
 
     s32 MipOf(const ImageInfo& info) const;
diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp
index 2da037a6e..f39bc16fd 100644
--- a/src/video_core/texture_cache/texture_cache.cpp
+++ b/src/video_core/texture_cache/texture_cache.cpp
@@ -304,6 +304,12 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
             return {ExpandImage(image_info, cache_image_id), -1, -1};
         }
 
+        if (image_info.guest_size == tex_cache_image.info.guest_size &&
+            (image_info.type == AmdGpu::ImageType::Color3D ||
+             tex_cache_image.info.type == AmdGpu::ImageType::Color3D)) {
+            return {ExpandImage(image_info, cache_image_id), -1, -1};
+        }
+
         // Size and resources are less than or equal, use image view.
         if (image_info.pixel_format != tex_cache_image.info.pixel_format ||
             image_info.guest_size <= tex_cache_image.info.guest_size) {
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
index e20d4dcd0..097fdcb96 100644
--- a/src/video_core/texture_cache/types.h
+++ b/src/video_core/texture_cache/types.h
@@ -30,6 +30,10 @@ struct Region2D {
 struct Extent2D {
     u32 width;
     u32 height;
+
+    bool operator==(const Extent2D& other) const {
+        return width == other.width && height == other.height;
+    }
 };
 
 struct Extent3D {
@@ -37,8 +41,6 @@ struct Extent3D {
     u32 height;
     u32 depth;
 
-    auto operator<=>(const Extent3D&) const = default;
-
     bool operator==(const Extent3D& other) const {
         return width == other.width && height == other.height && depth == other.depth;
     }