diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 0e58b8f58..7f022d234 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -272,6 +272,9 @@ void SetupCapabilities(const Info& info, const Profile& profile, const RuntimeIn if (info.has_image_query) { ctx.AddCapability(spv::Capability::ImageQuery); } + if (info.has_layer_output) { + ctx.AddCapability(spv::Capability::ShaderLayer); + } if ((info.uses_image_atomic_float_min_max && profile.supports_image_fp32_atomic_min_max) || (info.uses_buffer_atomic_float_min_max && profile.supports_buffer_fp32_atomic_min_max)) { ctx.AddExtension("SPV_EXT_shader_atomic_float_min_max"); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 330516d07..2d64bafb4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -16,39 +16,6 @@ namespace Shader::Backend::SPIRV { namespace { -Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) { - switch (output) { - case VsOutput::ClipDist0: - case VsOutput::ClipDist1: - case VsOutput::ClipDist2: - case VsOutput::ClipDist3: - case VsOutput::ClipDist4: - case VsOutput::ClipDist5: - case VsOutput::ClipDist6: - case VsOutput::ClipDist7: { - const u32 index = u32(output) - u32(VsOutput::ClipDist0); - const Id clip_num{ctx.ConstU32(index)}; - ASSERT_MSG(Sirit::ValidId(ctx.clip_distances), "Clip distance used but not defined"); - return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, clip_num); - } - case VsOutput::CullDist0: - case VsOutput::CullDist1: - case VsOutput::CullDist2: - case VsOutput::CullDist3: - case VsOutput::CullDist4: - case VsOutput::CullDist5: - case VsOutput::CullDist6: - case VsOutput::CullDist7: { - const u32 index = u32(output) - u32(VsOutput::CullDist0); - const Id cull_num{ctx.ConstU32(index)}; - ASSERT_MSG(Sirit::ValidId(ctx.cull_distances), "Cull distance used but not defined"); - return ctx.OpAccessChain(ctx.output_f32, ctx.cull_distances, cull_num); - } - default: - UNREACHABLE_MSG("Vertex output {}", u32(output)); - } -} - Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { if (IR::IsParam(attr)) { const u32 attr_index{u32(attr) - u32(IR::Attribute::Param0)}; @@ -76,15 +43,14 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { } } switch (attr) { - case IR::Attribute::Position0: { + case IR::Attribute::Position0: return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, ctx.ConstU32(element)); - } - case IR::Attribute::Position1: - case IR::Attribute::Position2: - case IR::Attribute::Position3: { - const u32 index = u32(attr) - u32(IR::Attribute::Position1); - return VsOutputAttrPointer(ctx, ctx.runtime_info.vs_info.outputs[index][element]); - } + case IR::Attribute::ClipDistance: + return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, ctx.ConstU32(element)); + case IR::Attribute::CullDistance: + return ctx.OpAccessChain(ctx.output_f32, ctx.cull_distances, ctx.ConstU32(element)); + case IR::Attribute::RenderTargetId: + return ctx.output_layer; case IR::Attribute::Depth: return ctx.frag_depth; default: @@ -105,11 +71,13 @@ std::pair OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr } switch (attr) { case IR::Attribute::Position0: - case IR::Attribute::Position1: - case IR::Attribute::Position2: - case IR::Attribute::Position3: + case IR::Attribute::ClipDistance: + case IR::Attribute::CullDistance: case IR::Attribute::Depth: return {ctx.F32[1], false}; + case IR::Attribute::RenderTargetId: + case IR::Attribute::ViewportId: + return {ctx.S32[1], true}; default: UNREACHABLE_MSG("Write attribute {}", attr); } @@ -270,14 +238,10 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) { } void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) { - if (attr == IR::Attribute::Position1) { - LOG_WARNING(Render_Vulkan, "Ignoring pos1 export"); - return; - } const Id pointer{OutputAttrPointer(ctx, attr, element)}; - const auto component_type{OutputAttrComponentType(ctx, attr)}; - if (component_type.second) { - ctx.OpStore(pointer, ctx.OpBitcast(component_type.first, value)); + const auto [component_type, is_integer]{OutputAttrComponentType(ctx, attr)}; + if (is_integer) { + ctx.OpStore(pointer, ctx.OpBitcast(component_type, value)); } else { ctx.OpStore(pointer, value); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 95d269eb4..19469f64f 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -539,24 +539,26 @@ void EmitContext::DefineInputs() { } } +void EmitContext::DefineVertexBlock() { + output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); + if (info.stores.GetAny(IR::Attribute::ClipDistance)) { + clip_distances = DefineVariable(TypeArray(F32[1], ConstU32(8U)), spv::BuiltIn::ClipDistance, + spv::StorageClass::Output); + } + if (info.stores.GetAny(IR::Attribute::CullDistance)) { + cull_distances = DefineVariable(TypeArray(F32[1], ConstU32(8U)), spv::BuiltIn::CullDistance, + spv::StorageClass::Output); + } + if (info.stores.GetAny(IR::Attribute::RenderTargetId)) { + output_layer = DefineVariable(S32[1], spv::BuiltIn::Layer, spv::StorageClass::Output); + } +} + void EmitContext::DefineOutputs() { switch (l_stage) { case LogicalStage::Vertex: { - // No point in defining builtin outputs (i.e. position) unless next stage is fragment? - // Might cause problems linking with tcs - - output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); - const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) || - info.stores.Get(IR::Attribute::Position2) || - info.stores.Get(IR::Attribute::Position3); - if (has_extra_pos_stores) { - const Id type{TypeArray(F32[1], ConstU32(8U))}; - clip_distances = - DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output); - cull_distances = - DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output); - } - if (stage == Stage::Local) { + DefineVertexBlock(); + if (stage == Shader::Stage::Local) { const u32 num_attrs = Common::AlignUp(runtime_info.ls_info.ls_stride, 16) >> 4; if (num_attrs > 0) { const Id type{TypeArray(F32[4], ConstU32(num_attrs))}; @@ -615,17 +617,7 @@ void EmitContext::DefineOutputs() { break; } case LogicalStage::TessellationEval: { - output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); - const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) || - info.stores.Get(IR::Attribute::Position2) || - info.stores.Get(IR::Attribute::Position3); - if (has_extra_pos_stores) { - const Id type{TypeArray(F32[1], ConstU32(8U))}; - clip_distances = - DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output); - cull_distances = - DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output); - } + DefineVertexBlock(); for (u32 i = 0; i < IR::NumParams; i++) { const IR::Attribute param{IR::Attribute::Param0 + i}; if (!info.stores.GetAny(param)) { @@ -665,8 +657,7 @@ void EmitContext::DefineOutputs() { break; } case LogicalStage::Geometry: { - output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); - + DefineVertexBlock(); for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) { const Id id{DefineOutput(F32[4], attr_id)}; Name(id, fmt::format("out_attr{}", attr_id)); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index b73b2b67d..79ad2b6a1 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -245,6 +245,7 @@ public: boost::container::small_vector interfaces; Id output_position{}; + Id output_layer{}; Id primitive_id{}; Id vertex_index{}; Id instance_id{}; @@ -388,6 +389,7 @@ private: void DefineArithmeticTypes(); void DefineInterfaces(); void DefineInputs(); + void DefineVertexBlock(); void DefineOutputs(); void DefinePushDataBlock(); void DefineBuffers(); diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index 0047c791d..20d2c6587 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -2,134 +2,113 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "shader_recompiler/frontend/translate/translate.h" +#include "shader_recompiler/ir/position.h" #include "shader_recompiler/ir/reinterpret.h" #include "shader_recompiler/runtime_info.h" namespace Shader::Gcn { -u32 SwizzleMrtComponent(const PsColorBuffer& color_buffer, u32 comp) { - const auto [r, g, b, a] = color_buffer.swizzle; - const std::array swizzle_array = {r, g, b, a}; - const auto swizzled_comp_type = static_cast(swizzle_array[comp]); - constexpr auto min_comp_type = static_cast(AmdGpu::CompSwizzle::Red); - return swizzled_comp_type >= min_comp_type ? swizzled_comp_type - min_comp_type : comp; -} - -void Translator::ExportMrtValue(IR::Attribute attribute, u32 comp, const IR::F32& value, - const PsColorBuffer& color_buffer) { - auto converted = ApplyWriteNumberConversion(ir, value, color_buffer.num_conversion); - if (color_buffer.needs_unorm_fixup) { - // FIXME: Fix-up for GPUs where float-to-unorm rounding is off from expected. - converted = ir.FPSub(converted, ir.Imm32(1.f / 127500.f)); - } - ir.SetAttribute(attribute, converted, comp); -} - -void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value) { - u32 color_buffer_idx = - static_cast(attribute) - static_cast(IR::Attribute::RenderTarget0); - if (runtime_info.fs_info.dual_source_blending && attribute == IR::Attribute::RenderTarget1) { - color_buffer_idx = 0; - } - const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx]; - - AmdGpu::NumberFormat num_format; - switch (color_buffer.export_format) { - case AmdGpu::Liverpool::ShaderExportFormat::Zero: - // No export - return; +static AmdGpu::NumberFormat NumberFormatCompressed( + AmdGpu::Liverpool::ShaderExportFormat export_format) { + switch (export_format) { case AmdGpu::Liverpool::ShaderExportFormat::ABGR_FP16: - num_format = AmdGpu::NumberFormat::Float; - break; + return AmdGpu::NumberFormat::Float; case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UNORM16: - num_format = AmdGpu::NumberFormat::Unorm; - break; + return AmdGpu::NumberFormat::Unorm; case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SNORM16: - num_format = AmdGpu::NumberFormat::Snorm; - break; + return AmdGpu::NumberFormat::Snorm; case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UINT16: - num_format = AmdGpu::NumberFormat::Uint; - break; + return AmdGpu::NumberFormat::Uint; case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SINT16: - num_format = AmdGpu::NumberFormat::Sint; - break; + return AmdGpu::NumberFormat::Sint; default: UNREACHABLE_MSG("Unimplemented compressed MRT export format {}", - static_cast(color_buffer.export_format)); - break; + static_cast(export_format)); } - - const auto unpacked_value = ir.Unpack2x16(num_format, value); - const IR::F32 r = IR::F32{ir.CompositeExtract(unpacked_value, 0)}; - const IR::F32 g = IR::F32{ir.CompositeExtract(unpacked_value, 1)}; - - const auto swizzled_r = SwizzleMrtComponent(color_buffer, idx * 2); - const auto swizzled_g = SwizzleMrtComponent(color_buffer, idx * 2 + 1); - - ExportMrtValue(attribute, swizzled_r, r, color_buffer); - ExportMrtValue(attribute, swizzled_g, g, color_buffer); } -void Translator::ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) { - u32 color_buffer_idx = - static_cast(attribute) - static_cast(IR::Attribute::RenderTarget0); - if (runtime_info.fs_info.dual_source_blending && attribute == IR::Attribute::RenderTarget1) { - color_buffer_idx = 0; - } - const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx]; - const auto swizzled_comp = SwizzleMrtComponent(color_buffer, comp); - - switch (color_buffer.export_format) { - case AmdGpu::Liverpool::ShaderExportFormat::Zero: - // No export - return; +static u32 MaskFromExportFormat(u8 mask, AmdGpu::Liverpool::ShaderExportFormat export_format) { + switch (export_format) { case AmdGpu::Liverpool::ShaderExportFormat::R_32: // Red only - if (swizzled_comp != 0) { - return; - } - break; + return mask & 1; case AmdGpu::Liverpool::ShaderExportFormat::GR_32: // Red and Green only - if (swizzled_comp != 0 && swizzled_comp != 1) { - return; - } - break; + return mask & 3; case AmdGpu::Liverpool::ShaderExportFormat::AR_32: // Red and Alpha only - if (swizzled_comp != 0 && swizzled_comp != 3) { - return; - } - break; + return mask & 9; case AmdGpu::Liverpool::ShaderExportFormat::ABGR_32: // All components - break; + return mask; default: UNREACHABLE_MSG("Unimplemented uncompressed MRT export format {}", - static_cast(color_buffer.export_format)); - break; + static_cast(export_format)); } - ExportMrtValue(attribute, swizzled_comp, value, color_buffer); } -void Translator::ExportCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value) { - if (IsMrt(attribute)) { - ExportMrtCompressed(attribute, idx, value); - return; - } - const IR::Value unpacked_value = ir.Unpack2x16(AmdGpu::NumberFormat::Float, value); - const IR::F32 r = IR::F32{ir.CompositeExtract(unpacked_value, 0)}; - const IR::F32 g = IR::F32{ir.CompositeExtract(unpacked_value, 1)}; - ir.SetAttribute(attribute, r, idx * 2); - ir.SetAttribute(attribute, g, idx * 2 + 1); -} +void Translator::ExportRenderTarget(const GcnInst& inst) { + const auto& exp = inst.control.exp; + const IR::Attribute mrt{exp.target}; + info.mrt_mask |= 1u << static_cast(mrt); -void Translator::ExportUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) { - if (IsMrt(attribute)) { - ExportMrtUncompressed(attribute, comp, value); + // Dual source blending uses MRT1 for exporting src1 + u32 color_buffer_idx = static_cast(mrt) - static_cast(IR::Attribute::RenderTarget0); + if (runtime_info.fs_info.dual_source_blending && mrt == IR::Attribute::RenderTarget1) { + color_buffer_idx = 0; + } + + const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx]; + if (color_buffer.export_format == AmdGpu::Liverpool::ShaderExportFormat::Zero || exp.en == 0) { + // No export return; } - ir.SetAttribute(attribute, value, comp); + + std::array components{}; + if (exp.compr) { + // Components are float16 packed into a VGPR + const auto num_format = NumberFormatCompressed(color_buffer.export_format); + // Export R, G + if (exp.en & 1) { + const IR::Value unpacked_value = + ir.Unpack2x16(num_format, ir.GetVectorReg(IR::VectorReg(inst.src[0].code))); + components[0] = IR::F32{ir.CompositeExtract(unpacked_value, 0)}; + components[1] = IR::F32{ir.CompositeExtract(unpacked_value, 1)}; + } + // Export B, A + if ((exp.en >> 2) & 1) { + const IR::Value unpacked_value = + ir.Unpack2x16(num_format, ir.GetVectorReg(IR::VectorReg(inst.src[1].code))); + components[2] = IR::F32{ir.CompositeExtract(unpacked_value, 0)}; + components[3] = IR::F32{ir.CompositeExtract(unpacked_value, 1)}; + } + } else { + // Components are float32 into separate VGPRS + u32 mask = MaskFromExportFormat(exp.en, color_buffer.export_format); + for (u32 i = 0; i < 4; i++, mask >>= 1) { + if ((mask & 1) == 0) { + continue; + } + components[i] = ir.GetVectorReg(IR::VectorReg(inst.src[i].code)); + } + } + + // Swizzle components and export + for (u32 i = 0; i < 4; ++i) { + const u32 comp_swizzle = static_cast(color_buffer.swizzle.array[i]); + constexpr u32 min_swizzle = static_cast(AmdGpu::CompSwizzle::Red); + const auto swizzled_comp = + components[comp_swizzle >= min_swizzle ? comp_swizzle - min_swizzle : i]; + if (swizzled_comp.IsEmpty()) { + continue; + } + auto converted = ApplyWriteNumberConversion(ir, swizzled_comp, color_buffer.num_conversion); + if (color_buffer.needs_unorm_fixup) { + // FIXME: Fix-up for GPUs where float-to-unorm rounding is off from expected. + converted = ir.FPSub(converted, ir.Imm32(1.f / 127500.f)); + } + ir.SetAttribute(mrt, converted, i); + } } void Translator::EmitExport(const GcnInst& inst) { @@ -139,40 +118,27 @@ void Translator::EmitExport(const GcnInst& inst) { const auto& exp = inst.control.exp; const IR::Attribute attrib{exp.target}; + if (IR::IsMrt(attrib)) { + return ExportRenderTarget(inst); + } + + ASSERT_MSG(!exp.compr, "Compressed exports only supported for render targets"); if (attrib == IR::Attribute::Depth && exp.en != 0 && exp.en != 1) { LOG_WARNING(Render_Vulkan, "Unsupported depth export"); return; } - const std::array vsrc = { - IR::VectorReg(inst.src[0].code), - IR::VectorReg(inst.src[1].code), - IR::VectorReg(inst.src[2].code), - IR::VectorReg(inst.src[3].code), - }; - - // Components are float16 packed into a VGPR - if (exp.compr) { - // Export R, G - if (exp.en & 1) { - ExportCompressed(attrib, 0, ir.GetVectorReg(vsrc[0])); + u32 mask = exp.en; + for (u32 i = 0; i < 4; i++, mask >>= 1) { + if ((mask & 1) == 0) { + continue; } - // Export B, A - if ((exp.en >> 2) & 1) { - ExportCompressed(attrib, 1, ir.GetVectorReg(vsrc[1])); + const auto value = ir.GetVectorReg(IR::VectorReg(inst.src[i].code)); + if (IsPosition(attrib)) { + IR::ExportPosition(ir, runtime_info.vs_info, attrib, i, value); + } else { + ir.SetAttribute(attrib, value, i); } - } else { - // Components are float32 into separate VGPRS - u32 mask = exp.en; - for (u32 i = 0; i < 4; i++, mask >>= 1) { - if ((mask & 1) == 0) { - continue; - } - ExportUncompressed(attrib, i, ir.GetVectorReg(vsrc[i])); - } - } - if (IR::IsMrt(attrib)) { - info.mrt_mask |= 1u << u8(attrib); } } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 585c2f1b4..fbd07c887 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -317,13 +317,7 @@ private: IR::F32 SelectCubeResult(const IR::F32& x, const IR::F32& y, const IR::F32& z, const IR::F32& x_res, const IR::F32& y_res, const IR::F32& z_res); - void ExportMrtValue(IR::Attribute attribute, u32 comp, const IR::F32& value, - const PsColorBuffer& color_buffer); - void ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value); - void ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value); - void ExportCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value); - void ExportUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value); - + void ExportRenderTarget(const GcnInst& inst); void LogMissingOpcode(const GcnInst& inst); IR::VectorReg GetScratchVgpr(u32 offset); diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index ccf2c45e0..6ce4395f2 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -210,6 +210,7 @@ struct Info { bool has_bitwise_xor{}; bool has_image_gather{}; bool has_image_query{}; + bool has_layer_output{}; bool uses_buffer_atomic_float_min_max{}; bool uses_image_atomic_float_min_max{}; bool uses_lane_id{}; diff --git a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp index e1e5d762c..382031710 100644 --- a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp +++ b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/opcodes.h" +#include "shader_recompiler/ir/position.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/recompiler.h" @@ -142,11 +143,12 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim ASSERT(it != info.gs_copy_data.attr_map.cend()); const auto& [attr, comp] = it->second; - inst.ReplaceOpcode(IR::Opcode::SetAttribute); - inst.ClearArgs(); - inst.SetArg(0, IR::Value{attr}); - inst.SetArg(1, data); - inst.SetArg(2, ir.Imm32(comp)); + inst.Invalidate(); + if (IsPosition(attr)) { + ExportPosition(ir, runtime_info.gs_info, attr, comp, data); + } else { + ir.SetAttribute(attr, data, comp); + } break; } default: diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index 8f0e61da2..397b196f9 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -160,6 +160,10 @@ void CollectShaderInfoPass(IR::Program& program, const Profile& profile) { } } + if (info.stores.GetAny(IR::Attribute::RenderTargetId)) { + info.has_layer_output = true; + } + // In case Flatbuf has not already been bound by IR and is needed // to query buffer sizes, bind it now. if (!profile.supports_robust_buffer_access && !info.uses_dma) { diff --git a/src/shader_recompiler/ir/position.h b/src/shader_recompiler/ir/position.h new file mode 100644 index 000000000..0fdeb0eb8 --- /dev/null +++ b/src/shader_recompiler/ir/position.h @@ -0,0 +1,53 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/ir/ir_emitter.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::IR { + +/// Maps special position export to builtin attribute stores +inline void ExportPosition(IREmitter& ir, const auto& stage, Attribute attribute, u32 comp, + const IR::F32& value) { + if (attribute == Attribute::Position0) { + ir.SetAttribute(attribute, value, comp); + return; + } + const u32 index = u32(attribute) - u32(Attribute::Position1); + const auto output = stage.outputs[index][comp]; + switch (output) { + case Output::ClipDist0: + case Output::ClipDist1: + case Output::ClipDist2: + case Output::ClipDist3: + case Output::ClipDist4: + case Output::ClipDist5: + case Output::ClipDist6: + case Output::ClipDist7: { + const u32 index = u32(output) - u32(Output::ClipDist0); + ir.SetAttribute(IR::Attribute::ClipDistance, value, index); + break; + } + case Output::CullDist0: + case Output::CullDist1: + case Output::CullDist2: + case Output::CullDist3: + case Output::CullDist4: + case Output::CullDist5: + case Output::CullDist6: + case Output::CullDist7: { + const u32 index = u32(output) - u32(Output::CullDist0); + ir.SetAttribute(IR::Attribute::CullDistance, value, index); + break; + } + case Output::GsMrtIndex: + ir.SetAttribute(IR::Attribute::RenderTargetId, value); + break; + default: + UNREACHABLE_MSG("Unhandled output {} on attribute {}", u32(output), u32(attribute)); + } +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 61954bec2..791f305b0 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -52,7 +52,7 @@ struct ExportRuntimeInfo { auto operator<=>(const ExportRuntimeInfo&) const noexcept = default; }; -enum class VsOutput : u8 { +enum class Output : u8 { None, PointSprite, EdgeFlag, @@ -77,11 +77,11 @@ enum class VsOutput : u8 { ClipDist6, ClipDist7, }; -using VsOutputMap = std::array; +using OutputMap = std::array; struct VertexRuntimeInfo { u32 num_outputs; - std::array outputs; + std::array outputs; bool emulate_depth_negative_one_to_one{}; bool clip_disable{}; u32 step_rate_0; @@ -145,6 +145,8 @@ struct HullRuntimeInfo { static constexpr auto GsMaxOutputStreams = 4u; using GsOutputPrimTypes = std::array; struct GeometryRuntimeInfo { + u32 num_outputs; + std::array outputs; u32 num_invocations{}; u32 output_vertices{}; u32 in_vertex_data_size{}; @@ -179,7 +181,7 @@ struct PsColorBuffer { u32 pad : 20; AmdGpu::CompMapping swizzle; - auto operator<=>(const PsColorBuffer&) const noexcept = default; + bool operator==(const PsColorBuffer& other) const noexcept = default; }; struct FragmentRuntimeInfo { @@ -189,11 +191,11 @@ struct FragmentRuntimeInfo { bool is_flat; u8 default_value; - [[nodiscard]] bool IsDefault() const { + bool IsDefault() const { return is_default && !is_flat; } - auto operator<=>(const PsInput&) const noexcept = default; + bool operator==(const PsInput&) const noexcept = default; }; AmdGpu::Liverpool::PsInput en_flags; AmdGpu::Liverpool::PsInput addr_flags; diff --git a/src/video_core/amdgpu/pixel_format.h b/src/video_core/amdgpu/pixel_format.h index 7a9e32fc6..84e0fc2ee 100644 --- a/src/video_core/amdgpu/pixel_format.h +++ b/src/video_core/amdgpu/pixel_format.h @@ -104,13 +104,18 @@ enum class NumberConversion : u32 { Uint32ToUnorm = 6, }; -struct CompMapping { - CompSwizzle r; - CompSwizzle g; - CompSwizzle b; - CompSwizzle a; +union CompMapping { + struct { + CompSwizzle r; + CompSwizzle g; + CompSwizzle b; + CompSwizzle a; + }; + std::array array; - auto operator<=>(const CompMapping& other) const = default; + bool operator==(const CompMapping& other) const { + return array == other.array; + } template [[nodiscard]] std::array Apply(const std::array& data) const { diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index e4e628c69..d13aeec99 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -412,6 +412,7 @@ bool Instance::CreateDevice() { .hostQueryReset = vk12_features.hostQueryReset, .timelineSemaphore = vk12_features.timelineSemaphore, .bufferDeviceAddress = vk12_features.bufferDeviceAddress, + .shaderOutputLayer = vk12_features.shaderOutputLayer, }, vk::PhysicalDeviceVulkan13Features{ .robustImageAccess = vk13_features.robustImageAccess, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index bce16cbff..56f788ea2 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -23,8 +23,8 @@ extern std::unique_ptr presenter; namespace Vulkan { using Shader::LogicalStage; +using Shader::Output; using Shader::Stage; -using Shader::VsOutput; constexpr static auto SpirvVersion1_6 = 0x00010600U; @@ -35,49 +35,55 @@ constexpr static std::array DescriptorHeapSizes = { vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 1024}, }; -void GatherVertexOutputs(Shader::VertexRuntimeInfo& info, - const AmdGpu::Liverpool::VsOutputControl& ctl) { - const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) { - if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None || - w != VsOutput::None) { - info.outputs[info.num_outputs++] = Shader::VsOutputMap{x, y, z, w}; - } - }; - // VS_OUT_MISC_VEC - add_output(ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None, - ctl.use_vtx_edge_flag - ? VsOutput::EdgeFlag - : (ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None), - ctl.use_vtx_kill_flag - ? VsOutput::KillFlag - : (ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None), - ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None); - // VS_OUT_CCDIST0 - add_output(ctl.IsClipDistEnabled(0) - ? VsOutput::ClipDist0 - : (ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None), - ctl.IsClipDistEnabled(1) - ? VsOutput::ClipDist1 - : (ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None), - ctl.IsClipDistEnabled(2) - ? VsOutput::ClipDist2 - : (ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None), - ctl.IsClipDistEnabled(3) - ? VsOutput::ClipDist3 - : (ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None)); - // VS_OUT_CCDIST1 - add_output(ctl.IsClipDistEnabled(4) - ? VsOutput::ClipDist4 - : (ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None), - ctl.IsClipDistEnabled(5) - ? VsOutput::ClipDist5 - : (ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None), - ctl.IsClipDistEnabled(6) - ? VsOutput::ClipDist6 - : (ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None), - ctl.IsClipDistEnabled(7) - ? VsOutput::ClipDist7 - : (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None)); +static u32 MapOutputs(std::span outputs, + const AmdGpu::Liverpool::VsOutputControl& ctl) { + u32 num_outputs = 0; + + if (ctl.vs_out_misc_enable) { + auto& misc_vec = outputs[num_outputs++]; + misc_vec[0] = ctl.use_vtx_point_size ? Output::PointSprite : Output::None; + misc_vec[1] = ctl.use_vtx_edge_flag + ? Output::EdgeFlag + : (ctl.use_vtx_gs_cut_flag ? Output::GsCutFlag : Output::None); + misc_vec[2] = ctl.use_vtx_kill_flag + ? Output::KillFlag + : (ctl.use_vtx_render_target_idx ? Output::GsMrtIndex : Output::None); + misc_vec[3] = ctl.use_vtx_viewport_idx ? Output::GsVpIndex : Output::None; + } + + if (ctl.vs_out_ccdist0_enable) { + auto& ccdist0 = outputs[num_outputs++]; + ccdist0[0] = ctl.IsClipDistEnabled(0) + ? Output::ClipDist0 + : (ctl.IsCullDistEnabled(0) ? Output::CullDist0 : Output::None); + ccdist0[1] = ctl.IsClipDistEnabled(1) + ? Output::ClipDist1 + : (ctl.IsCullDistEnabled(1) ? Output::CullDist1 : Output::None); + ccdist0[2] = ctl.IsClipDistEnabled(2) + ? Output::ClipDist2 + : (ctl.IsCullDistEnabled(2) ? Output::CullDist2 : Output::None); + ccdist0[3] = ctl.IsClipDistEnabled(3) + ? Output::ClipDist3 + : (ctl.IsCullDistEnabled(3) ? Output::CullDist3 : Output::None); + } + + if (ctl.vs_out_ccdist1_enable) { + auto& ccdist1 = outputs[num_outputs++]; + ccdist1[0] = ctl.IsClipDistEnabled(4) + ? Output::ClipDist4 + : (ctl.IsCullDistEnabled(4) ? Output::CullDist4 : Output::None); + ccdist1[1] = ctl.IsClipDistEnabled(5) + ? Output::ClipDist5 + : (ctl.IsCullDistEnabled(5) ? Output::CullDist5 : Output::None); + ccdist1[2] = ctl.IsClipDistEnabled(6) + ? Output::ClipDist6 + : (ctl.IsCullDistEnabled(6) ? Output::CullDist6 : Output::None); + ccdist1[3] = ctl.IsClipDistEnabled(7) + ? Output::ClipDist7 + : (ctl.IsCullDistEnabled(7) ? Output::CullDist7 : Output::None); + } + + return num_outputs; } const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_stage) { @@ -116,9 +122,9 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS } case Stage::Vertex: { BuildCommon(regs.vs_program); - GatherVertexOutputs(info.vs_info, regs.vs_output_control); info.vs_info.step_rate_0 = regs.vgt_instance_step_rate_0; info.vs_info.step_rate_1 = regs.vgt_instance_step_rate_1; + info.vs_info.num_outputs = MapOutputs(info.vs_info.outputs, regs.vs_output_control); info.vs_info.emulate_depth_negative_one_to_one = !instance.IsDepthClipControlSupported() && regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW; @@ -133,6 +139,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS case Stage::Geometry: { BuildCommon(regs.gs_program); auto& gs_info = info.gs_info; + gs_info.num_outputs = MapOutputs(gs_info.outputs, regs.vs_output_control); gs_info.output_vertices = regs.vgt_gs_max_vert_out; gs_info.num_invocations = regs.vgt_gs_instance_cnt.IsEnabled() ? regs.vgt_gs_instance_cnt.count : 1; @@ -466,7 +473,8 @@ bool PipelineCache::RefreshGraphicsKey() { continue; } - if (!regs.color_target_mask.GetMask(cb) || (key.mrt_mask & (1u << cb)) == 0) { + const u32 target_mask = regs.color_target_mask.GetMask(cb); + if (!target_mask || (key.mrt_mask & (1u << cb)) == 0) { // Attachment is masked out by either color_target_mask or shader mrt_mask. In the case // of the latter we need to change format to undefined, and either way we need to // increment the index for the null attachment binding. @@ -477,7 +485,16 @@ bool PipelineCache::RefreshGraphicsKey() { key.blend_controls[remapped_cb] = regs.blend_control[cb]; key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && !col_buf.info.blend_bypass); - key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)}; + // Apply swizzle to target mask + for (u32 i = 0; i < 4; i++) { + if (target_mask & (1 << i)) { + const auto swizzled_comp = + static_cast(key.color_buffers[remapped_cb].swizzle.array[i]); + constexpr u32 min_comp = static_cast(AmdGpu::CompSwizzle::Red); + const u32 comp = swizzled_comp >= min_comp ? swizzled_comp - min_comp : i; + key.write_masks[remapped_cb] |= vk::ColorComponentFlagBits{1u << comp}; + } + } key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb)); ++remapped_cb; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ec0c38bda..6829979e3 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -112,6 +112,7 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { RenderState state; state.width = instance.GetMaxFramebufferWidth(); state.height = instance.GetMaxFramebufferHeight(); + state.num_layers = std::numeric_limits::max(); cb_descs.clear(); db_desc.reset(); @@ -161,6 +162,7 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { const auto mip = image_view.info.range.base.level; state.width = std::min(state.width, std::max(image.info.size.width >> mip, 1u)); state.height = std::min(state.height, std::max(image.info.size.height >> mip, 1u)); + state.num_layers = std::min(state.num_layers, image_view.info.range.extent.layers); state.color_attachments[state.num_color_attachments++] = { .imageView = *image_view.image_view, .imageLayout = vk::ImageLayout::eUndefined, @@ -194,6 +196,7 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { state.height = std::min(state.height, image.info.size.height); state.has_depth = regs.depth_buffer.DepthValid(); state.has_stencil = regs.depth_buffer.StencilValid(); + state.num_layers = std::min(state.num_layers, image_view.info.range.extent.layers); if (state.has_depth) { state.depth_attachment = { .imageView = *image_view.image_view, @@ -217,6 +220,10 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { texture_cache.TouchMeta(htile_address, slice, false); } + if (state.num_layers == std::numeric_limits::max()) { + state.num_layers = 1; + } + return state; } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 7c3429297..910142232 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -4,6 +4,7 @@ #include #include "common/assert.h" #include "common/debug.h" +#include "common/logging/log.h" #include "imgui/renderer/texture_manager.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -40,7 +41,7 @@ void Scheduler::BeginRendering(const RenderState& new_state) { .offset = {0, 0}, .extent = {render_state.width, render_state.height}, }, - .layerCount = 1, + .layerCount = render_state.num_layers, .colorAttachmentCount = render_state.num_color_attachments, .pColorAttachments = render_state.num_color_attachments > 0 ? render_state.color_attachments.data() diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 3616d8478..bd07a2676 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -24,6 +24,7 @@ struct RenderState { vk::RenderingAttachmentInfo depth_attachment{}; vk::RenderingAttachmentInfo stencil_attachment{}; u32 num_color_attachments{}; + u32 num_layers{1}; bool has_depth{}; bool has_stencil{}; u32 width{};