Files
shadPS4/src/shader_recompiler/frontend/translate/export.cpp
Stephen Miller 0bfde1fcde video_core: Check DB_SHADER_CONTROL register before performing depth exports (#3588)
The DB_SHADER_CONTROL register has several enable flags which must be set before certain depth exports are enabled.
This commit adds logic to respect the values in this register when performing depth exports, which fixes the regression in earlier versions of KNACK.
I've also renamed DepthBufferControl to DepthShaderControl, since that's closer to the official name for the register.
2025-09-13 04:32:24 -07:00

198 lines
7.9 KiB
C++

// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/ir/position.h"
#include "shader_recompiler/ir/reinterpret.h"
#include "shader_recompiler/runtime_info.h"
namespace Shader::Gcn {
static AmdGpu::NumberFormat NumberFormatCompressed(
AmdGpu::Liverpool::ShaderExportFormat export_format) {
switch (export_format) {
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_FP16:
return AmdGpu::NumberFormat::Float;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UNORM16:
return AmdGpu::NumberFormat::Unorm;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SNORM16:
return AmdGpu::NumberFormat::Snorm;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UINT16:
return AmdGpu::NumberFormat::Uint;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SINT16:
return AmdGpu::NumberFormat::Sint;
default:
UNREACHABLE_MSG("Unimplemented compressed export format {}",
static_cast<u32>(export_format));
}
}
static u32 MaskFromExportFormat(u8 mask, AmdGpu::Liverpool::ShaderExportFormat export_format) {
switch (export_format) {
case AmdGpu::Liverpool::ShaderExportFormat::R_32:
// Red only
return mask & 1;
case AmdGpu::Liverpool::ShaderExportFormat::GR_32:
// Red and Green only
return mask & 3;
case AmdGpu::Liverpool::ShaderExportFormat::AR_32:
// Red and Alpha only
return mask & 9;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_32:
// All components
return mask;
default:
UNREACHABLE_MSG("Unimplemented uncompressed export format {}",
static_cast<u32>(export_format));
}
}
void Translator::ExportRenderTarget(const GcnInst& inst) {
const auto& exp = inst.control.exp;
const IR::Attribute mrt{exp.target};
info.mrt_mask |= 1u << static_cast<u8>(mrt);
// Dual source blending uses MRT1 for exporting src1
u32 color_buffer_idx = static_cast<u32>(mrt) - static_cast<u32>(IR::Attribute::RenderTarget0);
if (runtime_info.fs_info.dual_source_blending && mrt == IR::Attribute::RenderTarget1) {
color_buffer_idx = 0;
}
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
if (color_buffer.export_format == AmdGpu::Liverpool::ShaderExportFormat::Zero || exp.en == 0) {
// No export
return;
}
std::array<IR::F32, 4> components{};
if (exp.compr) {
// Components are float16 packed into a VGPR
const auto num_format = NumberFormatCompressed(color_buffer.export_format);
// Export R, G
if (exp.en & 1) {
const IR::Value unpacked_value =
ir.Unpack2x16(num_format, ir.GetVectorReg(IR::VectorReg(inst.src[0].code)));
components[0] = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
components[1] = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
}
// Export B, A
if ((exp.en >> 2) & 1) {
const IR::Value unpacked_value =
ir.Unpack2x16(num_format, ir.GetVectorReg(IR::VectorReg(inst.src[1].code)));
components[2] = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
components[3] = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
}
} else {
// Components are float32 into separate VGPRS
u32 mask = MaskFromExportFormat(exp.en, color_buffer.export_format);
for (u32 i = 0; i < 4; i++, mask >>= 1) {
if ((mask & 1) == 0) {
continue;
}
components[i] = ir.GetVectorReg<IR::F32>(IR::VectorReg(inst.src[i].code));
}
}
// Metal seems to have an issue where 8-bit unorm/snorm/sRGB outputs to render target
// need a bias applied to round correctly; detect and set the flag for that here.
const auto needs_unorm_fixup = profile.needs_unorm_fixup &&
(color_buffer.num_format == AmdGpu::NumberFormat::Unorm ||
color_buffer.num_format == AmdGpu::NumberFormat::Snorm ||
color_buffer.num_format == AmdGpu::NumberFormat::Srgb) &&
(color_buffer.data_format == AmdGpu::DataFormat::Format8 ||
color_buffer.data_format == AmdGpu::DataFormat::Format8_8 ||
color_buffer.data_format == AmdGpu::DataFormat::Format8_8_8_8);
// Swizzle components and export
for (u32 i = 0; i < 4; ++i) {
const auto swizzled_comp = components[color_buffer.swizzle.Map(i)];
if (swizzled_comp.IsEmpty()) {
continue;
}
auto converted = ApplyWriteNumberConversion(ir, swizzled_comp, color_buffer.num_conversion);
if (needs_unorm_fixup) {
// FIXME: Fix-up for GPUs where float-to-unorm rounding is off from expected.
converted = ir.FPSub(converted, ir.Imm32(1.f / 127500.f));
}
ir.SetAttribute(mrt, converted, i);
}
}
void Translator::ExportDepth(const GcnInst& inst) {
const auto& exp = inst.control.exp;
if (exp.en == 0) {
// No export
return;
}
std::array<IR::F32, 4> components{};
if (exp.compr) {
// Components are float16 packed into a VGPR
const auto num_format = NumberFormatCompressed(runtime_info.fs_info.z_export_format);
// Export R, G
if (exp.en & 1) {
const IR::Value unpacked_value =
ir.Unpack2x16(num_format, ir.GetVectorReg(IR::VectorReg(inst.src[0].code)));
components[0] = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
components[1] = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
}
// Export B, A
if ((exp.en >> 2) & 1) {
const IR::Value unpacked_value =
ir.Unpack2x16(num_format, ir.GetVectorReg(IR::VectorReg(inst.src[1].code)));
components[2] = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
// components[3] = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
}
} else {
// Components are float32 into separate VGPRS
u32 mask = MaskFromExportFormat(exp.en & runtime_info.fs_info.mrtz_mask,
runtime_info.fs_info.z_export_format);
for (u32 i = 0; i < 4; i++, mask >>= 1) {
if ((mask & 1) == 0) {
continue;
}
components[i] = ir.GetVectorReg<IR::F32>(IR::VectorReg(inst.src[i].code));
}
}
static constexpr std::array MrtzBuiltins = {IR::Attribute::Depth, IR::Attribute::StencilRef,
IR::Attribute::SampleMask, IR::Attribute::Null};
for (u32 i = 0; i < 4; ++i) {
if (components[i].IsEmpty()) {
continue;
}
ir.SetAttribute(MrtzBuiltins[i], components[i]);
}
}
void Translator::EmitExport(const GcnInst& inst) {
if (info.stage == Stage::Fragment && inst.control.exp.vm) {
ir.Discard(ir.LogicalNot(ir.GetExec()));
}
const IR::Attribute attrib{inst.control.exp.target};
if (IR::IsMrt(attrib)) {
return ExportRenderTarget(inst);
}
if (attrib == IR::Attribute::Depth) {
return ExportDepth(inst);
}
ASSERT_MSG(!inst.control.exp.compr, "Compressed exports only supported for render targets");
u32 mask = inst.control.exp.en;
for (u32 i = 0; i < 4; i++, mask >>= 1) {
if ((mask & 1) == 0) {
continue;
}
const auto value = ir.GetVectorReg<IR::F32>(IR::VectorReg(inst.src[i].code));
if (IsPosition(attrib)) {
IR::ExportPosition(ir, runtime_info.vs_info, attrib, i, value);
} else {
ir.SetAttribute(attrib, value, i);
}
}
}
} // namespace Shader::Gcn