video_core: Address various UE bugs (#3559)

* vk_rasterizer: Reorder image query in fast clear elimination

Fixes missing clears when a texture is being cleared using this method but never actually used for rendering purposes by ensuring the texture cache has at least a chance to register cmask

* shader_recompiler: Partial support for ANCILLARY_ENA

* pixel_format: Add number conversion of BC6 srgb format

* texture_cache: Support aliases of 3D and 2D array images

Used be UE to render its post processing LUT

* pixel_format: Test BC6 srgb as unorm

Still not sure what is up with snorm/unorm can be useful to have both actions to compare for now

* video_core: Use attachment feedback layout instead of general if possible

UE games often do mipgen passes where the previous mip of the image being rendered to is bound for reading. This appears to cause corruption issues so use attachment feedback loop extension to ensure correct output

* renderer_vulkan: Improve feedback loop code

* Set proper usage flag for feedback loop usage
* Add dynamic state extension and enable it for color aspect when necessary
* Check if image is bound instead of force_general for better code consistency

* shader_recompiler: More proper depth export implementation

* shader_recompiler: Fix bug in output modifiers

* shader_recompiler: Fix sampling from MSAA images

This is not allowed by any graphics API but seems hardware supports it somehow and it can be encountered. To avoid glitched output translate to to a texelFetch call on sample 0

* clang format

* image: Add back missing code

* shader_recompiler: Better ancillary implementation

Now is implemented with a custom attribute that is constant propagated depending on which parts of it are extracted. It will assert if an unknown part is used or if the attribute itself is not removed by dead code elim

* copy_shader: Ignore not enabled export channels

* constant_propagation: Invalidate ancillary after successful elimination

* spirv: Fix f11/f10 conversion to f32

---------

Co-authored-by: georgemoralis <giorgosmrls@gmail.com>
This commit is contained in:
TheTurtle
2025-09-12 19:29:16 +03:00
committed by GitHub
parent de7652384d
commit 374c2194d4
30 changed files with 369 additions and 183 deletions

View File

@@ -22,7 +22,7 @@ static AmdGpu::NumberFormat NumberFormatCompressed(
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SINT16:
return AmdGpu::NumberFormat::Sint;
default:
UNREACHABLE_MSG("Unimplemented compressed MRT export format {}",
UNREACHABLE_MSG("Unimplemented compressed export format {}",
static_cast<u32>(export_format));
}
}
@@ -42,7 +42,7 @@ static u32 MaskFromExportFormat(u8 mask, AmdGpu::Liverpool::ShaderExportFormat e
// All components
return mask;
default:
UNREACHABLE_MSG("Unimplemented uncompressed MRT export format {}",
UNREACHABLE_MSG("Unimplemented uncompressed export format {}",
static_cast<u32>(export_format));
}
}
@@ -118,25 +118,68 @@ void Translator::ExportRenderTarget(const GcnInst& inst) {
}
}
void Translator::ExportDepth(const GcnInst& inst) {
const auto& exp = inst.control.exp;
if (exp.en == 0) {
// No export
return;
}
std::array<IR::F32, 4> components{};
if (exp.compr) {
// Components are float16 packed into a VGPR
const auto num_format = NumberFormatCompressed(runtime_info.fs_info.z_export_format);
// Export R, G
if (exp.en & 1) {
const IR::Value unpacked_value =
ir.Unpack2x16(num_format, ir.GetVectorReg(IR::VectorReg(inst.src[0].code)));
components[0] = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
components[1] = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
}
// Export B, A
if ((exp.en >> 2) & 1) {
const IR::Value unpacked_value =
ir.Unpack2x16(num_format, ir.GetVectorReg(IR::VectorReg(inst.src[1].code)));
components[2] = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
// components[3] = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
}
} else {
// Components are float32 into separate VGPRS
u32 mask = MaskFromExportFormat(exp.en, runtime_info.fs_info.z_export_format);
for (u32 i = 0; i < 4; i++, mask >>= 1) {
if ((mask & 1) == 0) {
continue;
}
components[i] = ir.GetVectorReg<IR::F32>(IR::VectorReg(inst.src[i].code));
}
}
static constexpr std::array MrtzBuiltins = {IR::Attribute::Depth, IR::Attribute::StencilRef,
IR::Attribute::SampleMask, IR::Attribute::Null};
for (u32 i = 0; i < 4; ++i) {
if (components[i].IsEmpty()) {
continue;
}
ir.SetAttribute(MrtzBuiltins[i], components[i]);
}
}
void Translator::EmitExport(const GcnInst& inst) {
if (info.stage == Stage::Fragment && inst.control.exp.vm) {
ir.Discard(ir.LogicalNot(ir.GetExec()));
}
const auto& exp = inst.control.exp;
const IR::Attribute attrib{exp.target};
const IR::Attribute attrib{inst.control.exp.target};
if (IR::IsMrt(attrib)) {
return ExportRenderTarget(inst);
}
if (attrib == IR::Attribute::Depth && exp.en != 0 && exp.en != 1) {
LOG_WARNING(Render_Vulkan, "Unsupported depth export");
return;
if (attrib == IR::Attribute::Depth) {
return ExportDepth(inst);
}
ASSERT_MSG(!exp.compr, "Compressed exports only supported for render targets");
ASSERT_MSG(!inst.control.exp.compr, "Compressed exports only supported for render targets");
u32 mask = exp.en;
u32 mask = inst.control.exp.en;
for (u32 i = 0; i < 4; i++, mask >>= 1) {
if ((mask & 1) == 0) {
continue;