// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/ir/position.h" #include "shader_recompiler/ir/reinterpret.h" #include "shader_recompiler/runtime_info.h" namespace Shader::Gcn { static AmdGpu::NumberFormat NumberFormatCompressed( AmdGpu::Liverpool::ShaderExportFormat export_format) { switch (export_format) { case AmdGpu::Liverpool::ShaderExportFormat::ABGR_FP16: return AmdGpu::NumberFormat::Float; case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UNORM16: return AmdGpu::NumberFormat::Unorm; case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SNORM16: return AmdGpu::NumberFormat::Snorm; case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UINT16: return AmdGpu::NumberFormat::Uint; case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SINT16: return AmdGpu::NumberFormat::Sint; default: UNREACHABLE_MSG("Unimplemented compressed export format {}", static_cast(export_format)); } } static u32 MaskFromExportFormat(u8 mask, AmdGpu::Liverpool::ShaderExportFormat export_format) { switch (export_format) { case AmdGpu::Liverpool::ShaderExportFormat::R_32: // Red only return mask & 1; case AmdGpu::Liverpool::ShaderExportFormat::GR_32: // Red and Green only return mask & 3; case AmdGpu::Liverpool::ShaderExportFormat::AR_32: // Red and Alpha only return mask & 9; case AmdGpu::Liverpool::ShaderExportFormat::ABGR_32: // All components return mask; default: UNREACHABLE_MSG("Unimplemented uncompressed export format {}", static_cast(export_format)); } } void Translator::ExportRenderTarget(const GcnInst& inst) { const auto& exp = inst.control.exp; const IR::Attribute mrt{exp.target}; info.mrt_mask |= 1u << static_cast(mrt); // Dual source blending uses MRT1 for exporting src1 u32 color_buffer_idx = static_cast(mrt) - static_cast(IR::Attribute::RenderTarget0); if (runtime_info.fs_info.dual_source_blending && mrt == IR::Attribute::RenderTarget1) { color_buffer_idx = 0; } const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx]; if (color_buffer.export_format == AmdGpu::Liverpool::ShaderExportFormat::Zero || exp.en == 0) { // No export return; } std::array components{}; if (exp.compr) { // Components are float16 packed into a VGPR const auto num_format = NumberFormatCompressed(color_buffer.export_format); // Export R, G if (exp.en & 1) { const IR::Value unpacked_value = ir.Unpack2x16(num_format, ir.GetVectorReg(IR::VectorReg(inst.src[0].code))); components[0] = IR::F32{ir.CompositeExtract(unpacked_value, 0)}; components[1] = IR::F32{ir.CompositeExtract(unpacked_value, 1)}; } // Export B, A if ((exp.en >> 2) & 1) { const IR::Value unpacked_value = ir.Unpack2x16(num_format, ir.GetVectorReg(IR::VectorReg(inst.src[1].code))); components[2] = IR::F32{ir.CompositeExtract(unpacked_value, 0)}; components[3] = IR::F32{ir.CompositeExtract(unpacked_value, 1)}; } } else { // Components are float32 into separate VGPRS u32 mask = MaskFromExportFormat(exp.en, color_buffer.export_format); for (u32 i = 0; i < 4; i++, mask >>= 1) { if ((mask & 1) == 0) { continue; } components[i] = ir.GetVectorReg(IR::VectorReg(inst.src[i].code)); } } // Metal seems to have an issue where 8-bit unorm/snorm/sRGB outputs to render target // need a bias applied to round correctly; detect and set the flag for that here. const auto needs_unorm_fixup = profile.needs_unorm_fixup && (color_buffer.num_format == AmdGpu::NumberFormat::Unorm || color_buffer.num_format == AmdGpu::NumberFormat::Snorm || color_buffer.num_format == AmdGpu::NumberFormat::Srgb) && (color_buffer.data_format == AmdGpu::DataFormat::Format8 || color_buffer.data_format == AmdGpu::DataFormat::Format8_8 || color_buffer.data_format == AmdGpu::DataFormat::Format8_8_8_8); // Swizzle components and export for (u32 i = 0; i < 4; ++i) { const auto swizzled_comp = components[color_buffer.swizzle.Map(i)]; if (swizzled_comp.IsEmpty()) { continue; } auto converted = ApplyWriteNumberConversion(ir, swizzled_comp, color_buffer.num_conversion); if (needs_unorm_fixup) { // FIXME: Fix-up for GPUs where float-to-unorm rounding is off from expected. converted = ir.FPSub(converted, ir.Imm32(1.f / 127500.f)); } ir.SetAttribute(mrt, converted, i); } } void Translator::ExportDepth(const GcnInst& inst) { const auto& exp = inst.control.exp; if (exp.en == 0) { // No export return; } std::array components{}; if (exp.compr) { // Components are float16 packed into a VGPR const auto num_format = NumberFormatCompressed(runtime_info.fs_info.z_export_format); // Export R, G if (exp.en & 1) { const IR::Value unpacked_value = ir.Unpack2x16(num_format, ir.GetVectorReg(IR::VectorReg(inst.src[0].code))); components[0] = IR::F32{ir.CompositeExtract(unpacked_value, 0)}; components[1] = IR::F32{ir.CompositeExtract(unpacked_value, 1)}; } // Export B, A if ((exp.en >> 2) & 1) { const IR::Value unpacked_value = ir.Unpack2x16(num_format, ir.GetVectorReg(IR::VectorReg(inst.src[1].code))); components[2] = IR::F32{ir.CompositeExtract(unpacked_value, 0)}; // components[3] = IR::F32{ir.CompositeExtract(unpacked_value, 1)}; } } else { // Components are float32 into separate VGPRS u32 mask = MaskFromExportFormat(exp.en & runtime_info.fs_info.mrtz_mask, runtime_info.fs_info.z_export_format); for (u32 i = 0; i < 4; i++, mask >>= 1) { if ((mask & 1) == 0) { continue; } components[i] = ir.GetVectorReg(IR::VectorReg(inst.src[i].code)); } } static constexpr std::array MrtzBuiltins = {IR::Attribute::Depth, IR::Attribute::StencilRef, IR::Attribute::SampleMask, IR::Attribute::Null}; for (u32 i = 0; i < 4; ++i) { if (components[i].IsEmpty()) { continue; } ir.SetAttribute(MrtzBuiltins[i], components[i]); } } void Translator::EmitExport(const GcnInst& inst) { if (info.stage == Stage::Fragment && inst.control.exp.vm) { ir.Discard(ir.LogicalNot(ir.GetExec())); } const IR::Attribute attrib{inst.control.exp.target}; if (IR::IsMrt(attrib)) { return ExportRenderTarget(inst); } if (attrib == IR::Attribute::Depth) { return ExportDepth(inst); } ASSERT_MSG(!inst.control.exp.compr, "Compressed exports only supported for render targets"); u32 mask = inst.control.exp.en; for (u32 i = 0; i < 4; i++, mask >>= 1) { if ((mask & 1) == 0) { continue; } const auto value = ir.GetVectorReg(IR::VectorReg(inst.src[i].code)); if (IsPosition(attrib)) { IR::ExportPosition(ir, runtime_info.vs_info, attrib, i, value); } else { ir.SetAttribute(attrib, value, i); } } } } // namespace Shader::Gcn