mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-12 14:48:52 +00:00
shader_recompiler: Improve shader exports accuracy (part 1) (#3447)
* video_core: support for RT layer outputs - support for RT layer outputs - refactor for handling of export attributes - move output->attribute mapping to a separate header * export: Rework render target exports - Centralize all code related to MRT exports into a single function to make it easier to follow - Apply swizzle to output RGBA colors instead of the render target channel. This fixes swizzles on formats with < 4 channels For example with render target format R8_UNORM and COMP_SWAP ALT_REV the previous code would output frag_color.a = color.r; instead of frag_color.r = color.a; which would result in incorrect output in some cases * vk_pipeline_cache: Apply swizzle to write masks --------- Co-authored-by: polyproxy <47796739+polybiusproxy@users.noreply.github.com>
This commit is contained in:
@@ -2,134 +2,113 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/ir/position.h"
|
||||
#include "shader_recompiler/ir/reinterpret.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
u32 SwizzleMrtComponent(const PsColorBuffer& color_buffer, u32 comp) {
|
||||
const auto [r, g, b, a] = color_buffer.swizzle;
|
||||
const std::array swizzle_array = {r, g, b, a};
|
||||
const auto swizzled_comp_type = static_cast<u32>(swizzle_array[comp]);
|
||||
constexpr auto min_comp_type = static_cast<u32>(AmdGpu::CompSwizzle::Red);
|
||||
return swizzled_comp_type >= min_comp_type ? swizzled_comp_type - min_comp_type : comp;
|
||||
}
|
||||
|
||||
void Translator::ExportMrtValue(IR::Attribute attribute, u32 comp, const IR::F32& value,
|
||||
const PsColorBuffer& color_buffer) {
|
||||
auto converted = ApplyWriteNumberConversion(ir, value, color_buffer.num_conversion);
|
||||
if (color_buffer.needs_unorm_fixup) {
|
||||
// FIXME: Fix-up for GPUs where float-to-unorm rounding is off from expected.
|
||||
converted = ir.FPSub(converted, ir.Imm32(1.f / 127500.f));
|
||||
}
|
||||
ir.SetAttribute(attribute, converted, comp);
|
||||
}
|
||||
|
||||
void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value) {
|
||||
u32 color_buffer_idx =
|
||||
static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0);
|
||||
if (runtime_info.fs_info.dual_source_blending && attribute == IR::Attribute::RenderTarget1) {
|
||||
color_buffer_idx = 0;
|
||||
}
|
||||
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
|
||||
|
||||
AmdGpu::NumberFormat num_format;
|
||||
switch (color_buffer.export_format) {
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::Zero:
|
||||
// No export
|
||||
return;
|
||||
static AmdGpu::NumberFormat NumberFormatCompressed(
|
||||
AmdGpu::Liverpool::ShaderExportFormat export_format) {
|
||||
switch (export_format) {
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_FP16:
|
||||
num_format = AmdGpu::NumberFormat::Float;
|
||||
break;
|
||||
return AmdGpu::NumberFormat::Float;
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UNORM16:
|
||||
num_format = AmdGpu::NumberFormat::Unorm;
|
||||
break;
|
||||
return AmdGpu::NumberFormat::Unorm;
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SNORM16:
|
||||
num_format = AmdGpu::NumberFormat::Snorm;
|
||||
break;
|
||||
return AmdGpu::NumberFormat::Snorm;
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UINT16:
|
||||
num_format = AmdGpu::NumberFormat::Uint;
|
||||
break;
|
||||
return AmdGpu::NumberFormat::Uint;
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SINT16:
|
||||
num_format = AmdGpu::NumberFormat::Sint;
|
||||
break;
|
||||
return AmdGpu::NumberFormat::Sint;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unimplemented compressed MRT export format {}",
|
||||
static_cast<u32>(color_buffer.export_format));
|
||||
break;
|
||||
static_cast<u32>(export_format));
|
||||
}
|
||||
|
||||
const auto unpacked_value = ir.Unpack2x16(num_format, value);
|
||||
const IR::F32 r = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
|
||||
const IR::F32 g = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
|
||||
|
||||
const auto swizzled_r = SwizzleMrtComponent(color_buffer, idx * 2);
|
||||
const auto swizzled_g = SwizzleMrtComponent(color_buffer, idx * 2 + 1);
|
||||
|
||||
ExportMrtValue(attribute, swizzled_r, r, color_buffer);
|
||||
ExportMrtValue(attribute, swizzled_g, g, color_buffer);
|
||||
}
|
||||
|
||||
void Translator::ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) {
|
||||
u32 color_buffer_idx =
|
||||
static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0);
|
||||
if (runtime_info.fs_info.dual_source_blending && attribute == IR::Attribute::RenderTarget1) {
|
||||
color_buffer_idx = 0;
|
||||
}
|
||||
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
|
||||
const auto swizzled_comp = SwizzleMrtComponent(color_buffer, comp);
|
||||
|
||||
switch (color_buffer.export_format) {
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::Zero:
|
||||
// No export
|
||||
return;
|
||||
static u32 MaskFromExportFormat(u8 mask, AmdGpu::Liverpool::ShaderExportFormat export_format) {
|
||||
switch (export_format) {
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::R_32:
|
||||
// Red only
|
||||
if (swizzled_comp != 0) {
|
||||
return;
|
||||
}
|
||||
break;
|
||||
return mask & 1;
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::GR_32:
|
||||
// Red and Green only
|
||||
if (swizzled_comp != 0 && swizzled_comp != 1) {
|
||||
return;
|
||||
}
|
||||
break;
|
||||
return mask & 3;
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::AR_32:
|
||||
// Red and Alpha only
|
||||
if (swizzled_comp != 0 && swizzled_comp != 3) {
|
||||
return;
|
||||
}
|
||||
break;
|
||||
return mask & 9;
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_32:
|
||||
// All components
|
||||
break;
|
||||
return mask;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unimplemented uncompressed MRT export format {}",
|
||||
static_cast<u32>(color_buffer.export_format));
|
||||
break;
|
||||
static_cast<u32>(export_format));
|
||||
}
|
||||
ExportMrtValue(attribute, swizzled_comp, value, color_buffer);
|
||||
}
|
||||
|
||||
void Translator::ExportCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value) {
|
||||
if (IsMrt(attribute)) {
|
||||
ExportMrtCompressed(attribute, idx, value);
|
||||
return;
|
||||
}
|
||||
const IR::Value unpacked_value = ir.Unpack2x16(AmdGpu::NumberFormat::Float, value);
|
||||
const IR::F32 r = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
|
||||
const IR::F32 g = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
|
||||
ir.SetAttribute(attribute, r, idx * 2);
|
||||
ir.SetAttribute(attribute, g, idx * 2 + 1);
|
||||
}
|
||||
void Translator::ExportRenderTarget(const GcnInst& inst) {
|
||||
const auto& exp = inst.control.exp;
|
||||
const IR::Attribute mrt{exp.target};
|
||||
info.mrt_mask |= 1u << static_cast<u8>(mrt);
|
||||
|
||||
void Translator::ExportUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) {
|
||||
if (IsMrt(attribute)) {
|
||||
ExportMrtUncompressed(attribute, comp, value);
|
||||
// Dual source blending uses MRT1 for exporting src1
|
||||
u32 color_buffer_idx = static_cast<u32>(mrt) - static_cast<u32>(IR::Attribute::RenderTarget0);
|
||||
if (runtime_info.fs_info.dual_source_blending && mrt == IR::Attribute::RenderTarget1) {
|
||||
color_buffer_idx = 0;
|
||||
}
|
||||
|
||||
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
|
||||
if (color_buffer.export_format == AmdGpu::Liverpool::ShaderExportFormat::Zero || exp.en == 0) {
|
||||
// No export
|
||||
return;
|
||||
}
|
||||
ir.SetAttribute(attribute, value, comp);
|
||||
|
||||
std::array<IR::F32, 4> components{};
|
||||
if (exp.compr) {
|
||||
// Components are float16 packed into a VGPR
|
||||
const auto num_format = NumberFormatCompressed(color_buffer.export_format);
|
||||
// Export R, G
|
||||
if (exp.en & 1) {
|
||||
const IR::Value unpacked_value =
|
||||
ir.Unpack2x16(num_format, ir.GetVectorReg(IR::VectorReg(inst.src[0].code)));
|
||||
components[0] = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
|
||||
components[1] = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
|
||||
}
|
||||
// Export B, A
|
||||
if ((exp.en >> 2) & 1) {
|
||||
const IR::Value unpacked_value =
|
||||
ir.Unpack2x16(num_format, ir.GetVectorReg(IR::VectorReg(inst.src[1].code)));
|
||||
components[2] = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
|
||||
components[3] = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
|
||||
}
|
||||
} else {
|
||||
// Components are float32 into separate VGPRS
|
||||
u32 mask = MaskFromExportFormat(exp.en, color_buffer.export_format);
|
||||
for (u32 i = 0; i < 4; i++, mask >>= 1) {
|
||||
if ((mask & 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
components[i] = ir.GetVectorReg<IR::F32>(IR::VectorReg(inst.src[i].code));
|
||||
}
|
||||
}
|
||||
|
||||
// Swizzle components and export
|
||||
for (u32 i = 0; i < 4; ++i) {
|
||||
const u32 comp_swizzle = static_cast<u32>(color_buffer.swizzle.array[i]);
|
||||
constexpr u32 min_swizzle = static_cast<u32>(AmdGpu::CompSwizzle::Red);
|
||||
const auto swizzled_comp =
|
||||
components[comp_swizzle >= min_swizzle ? comp_swizzle - min_swizzle : i];
|
||||
if (swizzled_comp.IsEmpty()) {
|
||||
continue;
|
||||
}
|
||||
auto converted = ApplyWriteNumberConversion(ir, swizzled_comp, color_buffer.num_conversion);
|
||||
if (color_buffer.needs_unorm_fixup) {
|
||||
// FIXME: Fix-up for GPUs where float-to-unorm rounding is off from expected.
|
||||
converted = ir.FPSub(converted, ir.Imm32(1.f / 127500.f));
|
||||
}
|
||||
ir.SetAttribute(mrt, converted, i);
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::EmitExport(const GcnInst& inst) {
|
||||
@@ -139,40 +118,27 @@ void Translator::EmitExport(const GcnInst& inst) {
|
||||
|
||||
const auto& exp = inst.control.exp;
|
||||
const IR::Attribute attrib{exp.target};
|
||||
if (IR::IsMrt(attrib)) {
|
||||
return ExportRenderTarget(inst);
|
||||
}
|
||||
|
||||
ASSERT_MSG(!exp.compr, "Compressed exports only supported for render targets");
|
||||
if (attrib == IR::Attribute::Depth && exp.en != 0 && exp.en != 1) {
|
||||
LOG_WARNING(Render_Vulkan, "Unsupported depth export");
|
||||
return;
|
||||
}
|
||||
|
||||
const std::array vsrc = {
|
||||
IR::VectorReg(inst.src[0].code),
|
||||
IR::VectorReg(inst.src[1].code),
|
||||
IR::VectorReg(inst.src[2].code),
|
||||
IR::VectorReg(inst.src[3].code),
|
||||
};
|
||||
|
||||
// Components are float16 packed into a VGPR
|
||||
if (exp.compr) {
|
||||
// Export R, G
|
||||
if (exp.en & 1) {
|
||||
ExportCompressed(attrib, 0, ir.GetVectorReg<IR::U32>(vsrc[0]));
|
||||
u32 mask = exp.en;
|
||||
for (u32 i = 0; i < 4; i++, mask >>= 1) {
|
||||
if ((mask & 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
// Export B, A
|
||||
if ((exp.en >> 2) & 1) {
|
||||
ExportCompressed(attrib, 1, ir.GetVectorReg<IR::U32>(vsrc[1]));
|
||||
const auto value = ir.GetVectorReg<IR::F32>(IR::VectorReg(inst.src[i].code));
|
||||
if (IsPosition(attrib)) {
|
||||
IR::ExportPosition(ir, runtime_info.vs_info, attrib, i, value);
|
||||
} else {
|
||||
ir.SetAttribute(attrib, value, i);
|
||||
}
|
||||
} else {
|
||||
// Components are float32 into separate VGPRS
|
||||
u32 mask = exp.en;
|
||||
for (u32 i = 0; i < 4; i++, mask >>= 1) {
|
||||
if ((mask & 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
ExportUncompressed(attrib, i, ir.GetVectorReg<IR::F32>(vsrc[i]));
|
||||
}
|
||||
}
|
||||
if (IR::IsMrt(attrib)) {
|
||||
info.mrt_mask |= 1u << u8(attrib);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -317,13 +317,7 @@ private:
|
||||
IR::F32 SelectCubeResult(const IR::F32& x, const IR::F32& y, const IR::F32& z,
|
||||
const IR::F32& x_res, const IR::F32& y_res, const IR::F32& z_res);
|
||||
|
||||
void ExportMrtValue(IR::Attribute attribute, u32 comp, const IR::F32& value,
|
||||
const PsColorBuffer& color_buffer);
|
||||
void ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value);
|
||||
void ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value);
|
||||
void ExportCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value);
|
||||
void ExportUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value);
|
||||
|
||||
void ExportRenderTarget(const GcnInst& inst);
|
||||
void LogMissingOpcode(const GcnInst& inst);
|
||||
|
||||
IR::VectorReg GetScratchVgpr(u32 offset);
|
||||
|
||||
Reference in New Issue
Block a user