From 399a725343db2c8b07de94ec95789025bc91dd5c Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Mon, 14 Jul 2025 00:32:02 +0300 Subject: [PATCH 01/12] shader_recompiler: Replace buffer pulling with attribute divisor for instance step rates (#3238) * shader_recompiler: Replace buffer pulling with attribute divisor for instance step rates * flatten_extended_userdata: Remove special step rate buffer handling * Review comments * spirv_emit_context: Name all instance rate attribs properly * spirv: Merge ReadConstBuffer again template function only has 1 user now * attribute: Add missing attributes * translate: Reimplement step rate instance id * Resolve validation warnings * shader_recompiler: Separate vertex inputs from LS stage, cleanup tess --- .../spirv/emit_spirv_context_get_set.cpp | 128 +++++++----------- .../backend/spirv/emit_spirv_instructions.h | 2 +- .../backend/spirv/spirv_emit_context.cpp | 74 ++++------ .../backend/spirv/spirv_emit_context.h | 1 - src/shader_recompiler/frontend/fetch_shader.h | 12 -- .../frontend/translate/translate.cpp | 63 +++++---- src/shader_recompiler/info.h | 14 +- src/shader_recompiler/ir/attribute.cpp | 22 ++- src/shader_recompiler/ir/attribute.h | 2 - src/shader_recompiler/ir/ir_emitter.cpp | 4 +- src/shader_recompiler/ir/ir_emitter.h | 3 +- .../passes/flatten_extended_userdata_pass.cpp | 24 +--- .../ir/passes/ring_access_elimination.cpp | 11 +- src/shader_recompiler/ir/passes/srt.h | 13 +- src/shader_recompiler/runtime_info.h | 6 +- src/shader_recompiler/specialization.h | 20 ++- src/video_core/buffer_cache/buffer_cache.cpp | 5 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 46 +++++-- .../renderer_vulkan/vk_graphics_pipeline.h | 4 +- .../renderer_vulkan/vk_instance.cpp | 4 + .../renderer_vulkan/vk_pipeline_cache.cpp | 19 +-- .../renderer_vulkan/vk_rasterizer.cpp | 5 +- 22 files changed, 208 insertions(+), 274 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index f3a8c518c..40f8d307c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -52,7 +52,7 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) { Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { if (IR::IsParam(attr)) { const u32 attr_index{u32(attr) - u32(IR::Attribute::Param0)}; - if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) { + if (ctx.stage == Stage::Local) { const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]); return ctx.OpAccessChain(component_ptr, ctx.output_attr_array, ctx.ConstU32(attr_index), ctx.ConstU32(element)); @@ -94,13 +94,9 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { std::pair OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr) { if (IR::IsParam(attr)) { - if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) { - return {ctx.F32[1], false}; - } else { - const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; - const auto& info{ctx.output_params.at(index)}; - return {info.component_type, info.is_integer}; - } + const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; + const auto& info{ctx.output_params.at(index)}; + return {info.component_type, info.is_integer}; } if (IR::IsMrt(attr)) { const u32 index{u32(attr) - u32(IR::Attribute::RenderTarget0)}; @@ -120,6 +116,9 @@ std::pair OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr } } // Anonymous namespace +using PointerType = EmitContext::PointerType; +using PointerSize = EmitContext::PointerSize; + Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) { const u32 index = ctx.binding.user_data + ctx.info.ud_mask.Index(reg); const u32 half = PushData::UdRegsIndex + (index >> 2); @@ -131,41 +130,6 @@ Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) { return ud_reg; } -void EmitGetThreadBitScalarReg(EmitContext& ctx) { - UNREACHABLE_MSG("Unreachable instruction"); -} - -void EmitSetThreadBitScalarReg(EmitContext& ctx) { - UNREACHABLE_MSG("Unreachable instruction"); -} - -void EmitGetScalarRegister(EmitContext&) { - UNREACHABLE_MSG("Unreachable instruction"); -} - -void EmitSetScalarRegister(EmitContext&) { - UNREACHABLE_MSG("Unreachable instruction"); -} - -void EmitGetVectorRegister(EmitContext& ctx) { - UNREACHABLE_MSG("Unreachable instruction"); -} - -void EmitSetVectorRegister(EmitContext& ctx) { - UNREACHABLE_MSG("Unreachable instruction"); -} - -void EmitSetGotoVariable(EmitContext&) { - UNREACHABLE_MSG("Unreachable instruction"); -} - -void EmitGetGotoVariable(EmitContext&) { - UNREACHABLE_MSG("Unreachable instruction"); -} - -using PointerType = EmitContext::PointerType; -using PointerSize = EmitContext::PointerSize; - Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) { const u32 flatbuf_off_dw = inst->Flags(); if (!Config::directMemoryAccess()) { @@ -180,39 +144,27 @@ Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) { } } -template -Id ReadConstBuffer(EmitContext& ctx, u32 handle, Id index) { +Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) { const auto& buffer = ctx.buffers[handle]; if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) { index = ctx.OpIAdd(ctx.U32[1], index, offset); } - const auto [id, pointer_type] = buffer.Alias(type); - const auto value_type = type == PointerType::U32 ? ctx.U32[1] : ctx.F32[1]; + const auto [id, pointer_type] = buffer.Alias(PointerType::U32); const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; - const Id result{ctx.OpLoad(value_type, ptr)}; + const Id result{ctx.OpLoad(ctx.U32[1], ptr)}; if (const Id size = buffer.Size(PointerSize::B32); Sirit::ValidId(size)) { const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, size); - return ctx.OpSelect(value_type, in_bounds, result, ctx.u32_zero_value); + return ctx.OpSelect(ctx.U32[1], in_bounds, result, ctx.u32_zero_value); } return result; } -Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) { - return ReadConstBuffer(ctx, handle, index); -} - -Id EmitReadStepRate(EmitContext& ctx, int rate_idx) { - const auto index{rate_idx == 0 ? PushData::Step0Index : PushData::Step1Index}; - return ctx.OpLoad( - ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]), - ctx.push_data_block, ctx.ConstU32(index))); -} - -static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) { +static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) { if (IR::IsPosition(attr)) { ASSERT(attr == IR::Attribute::Position0); const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); - const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, index, ctx.ConstU32(0u))}; + const auto pointer{ + ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index), ctx.ConstU32(0u))}; const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); @@ -222,7 +174,7 @@ static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)}; const auto param = ctx.input_params.at(param_id).id; const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); - const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)}; + const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))}; const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); @@ -230,7 +182,7 @@ static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 UNREACHABLE(); } -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) { +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) { if (ctx.info.l_stage == LogicalStage::Geometry) { return EmitGetAttributeForGeometry(ctx, attr, comp, index); } else if (ctx.info.l_stage == LogicalStage::TessellationControl || @@ -248,18 +200,6 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) { if (IR::IsParam(attr)) { const u32 param_index{u32(attr) - u32(IR::Attribute::Param0)}; const auto& param{ctx.input_params.at(param_index)}; - if (param.buffer_handle >= 0) { - const auto step_rate = EmitReadStepRate(ctx, param.id.value); - const auto offset = ctx.OpIAdd( - ctx.U32[1], - ctx.OpIMul( - ctx.U32[1], - ctx.OpUDiv(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id), step_rate), - ctx.ConstU32(param.num_components)), - ctx.ConstU32(comp)); - return ReadConstBuffer(ctx, param.buffer_handle, offset); - } - Id result; if (param.is_loaded) { // Attribute is either default or manually interpolated. The id points to an already @@ -305,10 +245,6 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) { return ctx.OpLoad(ctx.U32[1], ctx.vertex_index); case IR::Attribute::InstanceId: return ctx.OpLoad(ctx.U32[1], ctx.instance_id); - case IR::Attribute::InstanceId0: - return EmitReadStepRate(ctx, 0); - case IR::Attribute::InstanceId1: - return EmitReadStepRate(ctx, 1); case IR::Attribute::WorkgroupIndex: return ctx.workgroup_index_id; case IR::Attribute::WorkgroupId: @@ -640,4 +576,36 @@ void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a UNREACHABLE_MSG("SPIR-V instruction"); } +void EmitGetThreadBitScalarReg(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitSetThreadBitScalarReg(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitGetScalarRegister(EmitContext&) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitSetScalarRegister(EmitContext&) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitGetVectorRegister(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitSetVectorRegister(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitSetGotoVariable(EmitContext&) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitGetGotoVariable(EmitContext&) { + UNREACHABLE_MSG("Unreachable instruction"); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 74c94754d..37d5d84c9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -108,7 +108,7 @@ Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicCmpSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value, Id cmp_value); -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index); Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 6a731d05c..e16bba755 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -377,35 +377,13 @@ void EmitContext::DefineInputs() { ASSERT(attrib.semantic < IR::NumParams); const auto sharp = attrib.GetSharp(info); const Id type{GetAttributeType(*this, sharp.GetNumberFmt())[4]}; - if (attrib.UsesStepRates()) { - const u32 rate_idx = - attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::OverStepRate0 ? 0 - : 1; - const u32 num_components = AmdGpu::NumComponents(sharp.GetDataFmt()); - const auto buffer = - std::ranges::find_if(info.buffers, [&attrib](const auto& buffer) { - return buffer.instance_attrib == attrib.semantic; - }); - // Note that we pass index rather than Id - input_params[attrib.semantic] = SpirvAttribute{ - .id = {rate_idx}, - .pointer_type = input_u32, - .component_type = U32[1], - .num_components = std::min(attrib.num_elements, num_components), - .is_integer = true, - .is_loaded = false, - .buffer_handle = int(buffer - info.buffers.begin()), - }; + Id id{DefineInput(type, attrib.semantic)}; + if (attrib.GetStepRate() != Gcn::VertexAttribute::InstanceIdType::None) { + Name(id, fmt::format("vs_instance_attr{}", attrib.semantic)); } else { - Id id{DefineInput(type, attrib.semantic)}; - if (attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::Plain) { - Name(id, fmt::format("vs_instance_attr{}", attrib.semantic)); - } else { - Name(id, fmt::format("vs_in_attr{}", attrib.semantic)); - } - input_params[attrib.semantic] = - GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false); + Name(id, fmt::format("vs_in_attr{}", attrib.semantic)); } + input_params[attrib.semantic] = GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false); } break; } @@ -573,7 +551,7 @@ void EmitContext::DefineOutputs() { cull_distances = DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output); } - if (stage == Shader::Stage::Local && runtime_info.ls_info.links_with_tcs) { + if (stage == Stage::Local) { const u32 num_attrs = Common::AlignUp(runtime_info.ls_info.ls_stride, 16) >> 4; if (num_attrs > 0) { const Id type{TypeArray(F32[4], ConstU32(num_attrs))}; @@ -700,12 +678,10 @@ void EmitContext::DefineOutputs() { void EmitContext::DefinePushDataBlock() { // Create push constants block for instance steps rates - const Id struct_type{Name(TypeStruct(U32[1], U32[1], F32[1], F32[1], F32[1], F32[1], U32[4], - U32[4], U32[4], U32[4], U32[4], U32[4], U32[2]), + const Id struct_type{Name(TypeStruct(F32[1], F32[1], F32[1], F32[1], U32[4], U32[4], U32[4], + U32[4], U32[4], U32[4], U32[2]), "AuxData")}; Decorate(struct_type, spv::Decoration::Block); - MemberName(struct_type, PushData::Step0Index, "sr0"); - MemberName(struct_type, PushData::Step1Index, "sr1"); MemberName(struct_type, PushData::XOffsetIndex, "xoffset"); MemberName(struct_type, PushData::YOffsetIndex, "yoffset"); MemberName(struct_type, PushData::XScaleIndex, "xscale"); @@ -717,19 +693,17 @@ void EmitContext::DefinePushDataBlock() { MemberName(struct_type, PushData::BufOffsetIndex + 0, "buf_offsets0"); MemberName(struct_type, PushData::BufOffsetIndex + 1, "buf_offsets1"); MemberName(struct_type, PushData::BufOffsetIndex + 2, "buf_offsets2"); - MemberDecorate(struct_type, PushData::Step0Index, spv::Decoration::Offset, 0U); - MemberDecorate(struct_type, PushData::Step1Index, spv::Decoration::Offset, 4U); - MemberDecorate(struct_type, PushData::XOffsetIndex, spv::Decoration::Offset, 8U); - MemberDecorate(struct_type, PushData::YOffsetIndex, spv::Decoration::Offset, 12U); - MemberDecorate(struct_type, PushData::XScaleIndex, spv::Decoration::Offset, 16U); - MemberDecorate(struct_type, PushData::YScaleIndex, spv::Decoration::Offset, 20U); - MemberDecorate(struct_type, PushData::UdRegsIndex + 0, spv::Decoration::Offset, 24U); - MemberDecorate(struct_type, PushData::UdRegsIndex + 1, spv::Decoration::Offset, 40U); - MemberDecorate(struct_type, PushData::UdRegsIndex + 2, spv::Decoration::Offset, 56U); - MemberDecorate(struct_type, PushData::UdRegsIndex + 3, spv::Decoration::Offset, 72U); - MemberDecorate(struct_type, PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 88U); - MemberDecorate(struct_type, PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 104U); - MemberDecorate(struct_type, PushData::BufOffsetIndex + 2, spv::Decoration::Offset, 120U); + MemberDecorate(struct_type, PushData::XOffsetIndex, spv::Decoration::Offset, 0U); + MemberDecorate(struct_type, PushData::YOffsetIndex, spv::Decoration::Offset, 4U); + MemberDecorate(struct_type, PushData::XScaleIndex, spv::Decoration::Offset, 8U); + MemberDecorate(struct_type, PushData::YScaleIndex, spv::Decoration::Offset, 12U); + MemberDecorate(struct_type, PushData::UdRegsIndex + 0, spv::Decoration::Offset, 16U); + MemberDecorate(struct_type, PushData::UdRegsIndex + 1, spv::Decoration::Offset, 32U); + MemberDecorate(struct_type, PushData::UdRegsIndex + 2, spv::Decoration::Offset, 48U); + MemberDecorate(struct_type, PushData::UdRegsIndex + 3, spv::Decoration::Offset, 64U); + MemberDecorate(struct_type, PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 80U); + MemberDecorate(struct_type, PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 96U); + MemberDecorate(struct_type, PushData::BufOffsetIndex + 2, spv::Decoration::Offset, 112U); push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant); Name(push_data_block, "push_data"); interfaces.push_back(push_data_block); @@ -763,19 +737,19 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte Decorate(id, spv::Decoration::NonWritable); } switch (buffer_type) { - case Shader::BufferType::GdsBuffer: + case BufferType::GdsBuffer: Name(id, "gds_buffer"); break; - case Shader::BufferType::Flatbuf: + case BufferType::Flatbuf: Name(id, "srt_flatbuf"); break; - case Shader::BufferType::BdaPagetable: + case BufferType::BdaPagetable: Name(id, "bda_pagetable"); break; - case Shader::BufferType::FaultBuffer: + case BufferType::FaultBuffer: Name(id, "fault_buffer"); break; - case Shader::BufferType::SharedMemory: + case BufferType::SharedMemory: Name(id, "ssbo_shmem"); break; default: diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 28e9099d8..186925706 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -361,7 +361,6 @@ public: u32 num_components; bool is_integer{}; bool is_loaded{}; - s32 buffer_handle{-1}; }; Id input_attr_array; Id output_attr_array; diff --git a/src/shader_recompiler/frontend/fetch_shader.h b/src/shader_recompiler/frontend/fetch_shader.h index 837caafa0..e77925232 100644 --- a/src/shader_recompiler/frontend/fetch_shader.h +++ b/src/shader_recompiler/frontend/fetch_shader.h @@ -3,7 +3,6 @@ #pragma once -#include #include #include "common/types.h" #include "shader_recompiler/info.h" @@ -29,11 +28,6 @@ struct VertexAttribute { return static_cast(instance_data); } - [[nodiscard]] bool UsesStepRates() const { - const auto step_rate = GetStepRate(); - return step_rate == OverStepRate0 || step_rate == OverStepRate1; - } - [[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Shader::Info& info) const noexcept { return info.ReadUdReg(sgpr_base, dword_offset); } @@ -52,12 +46,6 @@ struct FetchShaderData { s8 vertex_offset_sgpr = -1; ///< SGPR of vertex offset from VADDR s8 instance_offset_sgpr = -1; ///< SGPR of instance offset from VADDR - [[nodiscard]] bool UsesStepRates() const { - return std::ranges::find_if(attributes, [](const VertexAttribute& attribute) { - return attribute.UsesStepRates(); - }) != attributes.end(); - } - bool operator==(const FetchShaderData& other) const { return attributes == other.attributes && vertex_offset_sgpr == other.vertex_offset_sgpr && instance_offset_sgpr == other.instance_offset_sgpr; diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 5853f3e72..310ac9156 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -90,17 +90,40 @@ void Translator::EmitPrologue(IR::Block* first_block) { case LogicalStage::Vertex: // v0: vertex ID, always present ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId)); - // v1: instance ID, step rate 0 - if (runtime_info.num_input_vgprs > 0) { - ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId0)); - } - // v2: instance ID, step rate 1 - if (runtime_info.num_input_vgprs > 1) { - ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId1)); - } - // v3: instance ID, plain - if (runtime_info.num_input_vgprs > 2) { - ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId)); + if (info.stage == Stage::Local) { + // v1: rel patch ID + if (runtime_info.num_input_vgprs > 0) { + ir.SetVectorReg(dst_vreg++, ir.Imm32(0)); + } + // v2: instance ID + if (runtime_info.num_input_vgprs > 1) { + ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId)); + } + } else { + // v1: instance ID, step rate 0 + if (runtime_info.num_input_vgprs > 0) { + if (runtime_info.vs_info.step_rate_0 != 0) { + ir.SetVectorReg(dst_vreg++, + ir.IDiv(ir.GetAttributeU32(IR::Attribute::InstanceId), + ir.Imm32(runtime_info.vs_info.step_rate_0))); + } else { + ir.SetVectorReg(dst_vreg++, ir.Imm32(0)); + } + } + // v2: instance ID, step rate 1 + if (runtime_info.num_input_vgprs > 1) { + if (runtime_info.vs_info.step_rate_1 != 0) { + ir.SetVectorReg(dst_vreg++, + ir.IDiv(ir.GetAttributeU32(IR::Attribute::InstanceId), + ir.Imm32(runtime_info.vs_info.step_rate_1))); + } else { + ir.SetVectorReg(dst_vreg++, ir.Imm32(0)); + } + } + // v3: instance ID, plain + if (runtime_info.num_input_vgprs > 2) { + ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId)); + } } break; case LogicalStage::Fragment: @@ -183,10 +206,8 @@ void Translator::EmitPrologue(IR::Block* first_block) { switch (runtime_info.gs_info.out_primitive[0]) { case AmdGpu::GsOutputPrimitiveType::TriangleStrip: ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2 - [[fallthrough]]; case AmdGpu::GsOutputPrimitiveType::LineStrip: ir.SetVectorReg(IR::VectorReg::V1, ir.Imm32(1u)); // vertex 1 - [[fallthrough]]; default: ir.SetVectorReg(IR::VectorReg::V0, ir.Imm32(0u)); // vertex 0 break; @@ -481,11 +502,11 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra } void Translator::EmitFetch(const GcnInst& inst) { - // Read the pointer to the fetch shader assembly. const auto code_sgpr_base = inst.src[0].code; + + // The fetch shader must be inlined to access as regular buffers, so that + // bounds checks can be emitted to emulate robust buffer access. if (!profile.supports_robust_buffer_access) { - // The fetch shader must be inlined to access as regular buffers, so that - // bounds checks can be emitted to emulate robust buffer access. const auto* code = GetFetchShaderCode(info, code_sgpr_base); GcnCodeSlice slice(code, code + std::numeric_limits::max()); GcnDecodeContext decoder; @@ -535,16 +556,6 @@ void Translator::EmitFetch(const GcnInst& inst) { for (u32 i = 0; i < 4; i++) { ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(swizzled, i)}); } - - // In case of programmable step rates we need to fallback to instance data pulling in - // shader, so VBs should be bound as regular data buffers - if (attrib.UsesStepRates()) { - info.buffers.push_back({ - .sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4), - .used_types = IR::Type::F32, - .instance_attrib = attrib.semantic, - }); - } } } diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 9703643e8..6e12c6816 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -113,17 +113,13 @@ struct FMaskResource { using FMaskResourceList = boost::container::small_vector; struct PushData { - static constexpr u32 Step0Index = 0; - static constexpr u32 Step1Index = 1; - static constexpr u32 XOffsetIndex = 2; - static constexpr u32 YOffsetIndex = 3; - static constexpr u32 XScaleIndex = 4; - static constexpr u32 YScaleIndex = 5; - static constexpr u32 UdRegsIndex = 6; + static constexpr u32 XOffsetIndex = 0; + static constexpr u32 YOffsetIndex = 1; + static constexpr u32 XScaleIndex = 2; + static constexpr u32 YScaleIndex = 3; + static constexpr u32 UdRegsIndex = 4; static constexpr u32 BufOffsetIndex = UdRegsIndex + NumUserDataRegs / 4; - u32 step0; - u32 step1; float xoffset; float yoffset; float xscale; diff --git a/src/shader_recompiler/ir/attribute.cpp b/src/shader_recompiler/ir/attribute.cpp index 6a267e21b..b2f11d141 100644 --- a/src/shader_recompiler/ir/attribute.cpp +++ b/src/shader_recompiler/ir/attribute.cpp @@ -100,22 +100,36 @@ std::string NameOf(Attribute attribute) { return "Param30"; case Attribute::Param31: return "Param31"; + case Attribute::ClipDistance: + return "ClipDistanace"; + case Attribute::CullDistance: + return "CullDistance"; + case Attribute::RenderTargetId: + return "RenderTargetId"; + case Attribute::ViewportId: + return "ViewportId"; case Attribute::VertexId: return "VertexId"; - case Attribute::InstanceId: - return "InstanceId"; case Attribute::PrimitiveId: return "PrimitiveId"; - case Attribute::FragCoord: - return "FragCoord"; + case Attribute::InstanceId: + return "InstanceId"; case Attribute::IsFrontFace: return "IsFrontFace"; + case Attribute::SampleIndex: + return "SampleIndex"; + case Attribute::GlobalInvocationId: + return "GlobalInvocationId"; case Attribute::WorkgroupId: return "WorkgroupId"; + case Attribute::WorkgroupIndex: + return "WorkgroupIndex"; case Attribute::LocalInvocationId: return "LocalInvocationId"; case Attribute::LocalInvocationIndex: return "LocalInvocationIndex"; + case Attribute::FragCoord: + return "FragCoord"; case Attribute::InvocationId: return "InvocationId"; case Attribute::PatchVertices: diff --git a/src/shader_recompiler/ir/attribute.h b/src/shader_recompiler/ir/attribute.h index 68472f052..b6b1c8b59 100644 --- a/src/shader_recompiler/ir/attribute.h +++ b/src/shader_recompiler/ir/attribute.h @@ -73,8 +73,6 @@ enum class Attribute : u64 { LocalInvocationId = 76, LocalInvocationIndex = 77, FragCoord = 78, - InstanceId0 = 79, // step rate 0 - InstanceId1 = 80, // step rate 1 InvocationId = 81, // TCS id in output patch and instanced geometry shader id PatchVertices = 82, TessellationEvaluationPointU = 83, diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 4997145d7..6ca86b2c0 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -255,8 +255,8 @@ void IREmitter::SetM0(const U32& value) { Inst(Opcode::SetM0, value); } -F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, IR::Value index) { - return Inst(Opcode::GetAttribute, attribute, Imm32(comp), index); +F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, u32 index) { + return Inst(Opcode::GetAttribute, attribute, Imm32(comp), Imm32(index)); } U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) { diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 6055df565..a105b042d 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -81,8 +81,7 @@ public: [[nodiscard]] U1 Condition(IR::Condition cond); - [[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, - IR::Value index = IR::Value(u32(0u))); + [[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, u32 index = 0); [[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0); void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0); diff --git a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp index 7253e18c1..e0c99655d 100644 --- a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp +++ b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp @@ -191,7 +191,7 @@ static void VisitPointer(u32 off_dw, IR::Inst* subtree, PassInfo& pass_info, static void GenerateSrtProgram(Info& info, PassInfo& pass_info) { Xbyak::CodeGenerator& c = g_srt_codegen; - if (info.srt_info.srt_reservations.empty() && pass_info.srt_roots.empty()) { + if (pass_info.srt_roots.empty()) { return; } @@ -205,29 +205,7 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) { } info.srt_info.walker_func = c.getCurr(); - pass_info.dst_off_dw = NumUserDataRegs; - - // Special case for V# step rate buffers in fetch shader - for (const auto [sgpr_base, dword_offset, num_dwords] : info.srt_info.srt_reservations) { - // get pointer to V# - if (sgpr_base != IR::NumScalarRegs) { - PushPtr(c, sgpr_base); - } - u32 src_off = dword_offset << 2; - - for (auto j = 0; j < num_dwords; j++) { - c.mov(r11d, ptr[rdi + src_off]); - c.mov(ptr[rsi + (pass_info.dst_off_dw << 2)], r11d); - - src_off += 4; - ++pass_info.dst_off_dw; - } - if (sgpr_base != IR::NumScalarRegs) { - PopPtr(c); - } - } - ASSERT(pass_info.dst_off_dw == info.srt_info.flattened_bufsize_dw); for (const auto& [sgpr_base, root] : pass_info.srt_roots) { diff --git a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp index b292b41b9..e1e5d762c 100644 --- a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp +++ b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp @@ -33,12 +33,9 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim bool is_composite = opcode == IR::Opcode::WriteSharedU64; u32 num_components = opcode == IR::Opcode::WriteSharedU32 ? 1 : 2; - u32 offset = 0; - const auto* addr = inst.Arg(0).InstRecursive(); - if (addr->GetOpcode() == IR::Opcode::IAdd32) { - ASSERT(addr->Arg(1).IsImmediate()); - offset = addr->Arg(1).U32(); - } + ASSERT(inst.Arg(0).IsImmediate()); + + u32 offset = inst.Arg(0).U32(); IR::Value data = is_composite ? ir.UnpackUint2x32(IR::U64{inst.Arg(1).Resolve()}) : inst.Arg(1).Resolve(); for (s32 i = 0; i < num_components; i++) { @@ -116,7 +113,7 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim } const auto shl_inst = inst.Arg(1).TryInstRecursive(); - const auto vertex_id = ir.Imm32(shl_inst->Arg(0).Resolve().U32() >> 2); + const auto vertex_id = shl_inst->Arg(0).Resolve().U32() >> 2; const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1); const auto bucket = offset.Resolve().U32() / 256u; const auto attrib = bucket < 4 ? IR::Attribute::Position0 diff --git a/src/shader_recompiler/ir/passes/srt.h b/src/shader_recompiler/ir/passes/srt.h index 0ddc15ea6..4dce38674 100644 --- a/src/shader_recompiler/ir/passes/srt.h +++ b/src/shader_recompiler/ir/passes/srt.h @@ -20,18 +20,7 @@ struct PersistentSrtInfo { }; PFN_SrtWalker walker_func{}; - boost::container::small_vector srt_reservations; u32 flattened_bufsize_dw = 16; // NumUserDataRegs - - // Special case for fetch shaders because we don't generate IR to read from step rate buffers, - // so we won't see usage with GetUserData/ReadConst. - // Reserve space in the flattened buffer for a sharp ahead of time - u32 ReserveSharp(u32 sgpr_base, u32 dword_offset, u32 num_dwords) { - u32 rv = flattened_bufsize_dw; - srt_reservations.emplace_back(sgpr_base, dword_offset, num_dwords); - flattened_bufsize_dw += num_dwords; - return rv; - } }; -} // namespace Shader \ No newline at end of file +} // namespace Shader diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 5a0408e2c..6cede44a8 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -42,7 +42,6 @@ constexpr u32 MaxStageTypes = static_cast(LogicalStage::NumLogicalStages); struct LocalRuntimeInfo { u32 ls_stride; - bool links_with_tcs; auto operator<=>(const LocalRuntimeInfo&) const noexcept = default; }; @@ -85,6 +84,8 @@ struct VertexRuntimeInfo { std::array outputs; bool emulate_depth_negative_one_to_one{}; bool clip_disable{}; + u32 step_rate_0; + u32 step_rate_1; // Domain AmdGpu::TessellationType tess_type; AmdGpu::TessellationTopology tess_topology; @@ -96,7 +97,8 @@ struct VertexRuntimeInfo { clip_disable == other.clip_disable && tess_type == other.tess_type && tess_topology == other.tess_topology && tess_partitioning == other.tess_partitioning && - hs_output_cp_stride == other.hs_output_cp_stride; + hs_output_cp_stride == other.hs_output_cp_stride && + step_rate_0 == other.step_rate_0 && step_rate_1 == other.step_rate_1; } void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) { diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index e40309aaf..d3e671c58 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -13,7 +13,7 @@ namespace Shader { struct VsAttribSpecialization { - s32 num_components{}; + u32 divisor{}; AmdGpu::NumberClass num_class{}; AmdGpu::CompMapping dst_select{}; @@ -74,13 +74,13 @@ struct SamplerSpecialization { * after the first compilation of a module. */ struct StageSpecialization { - static constexpr size_t MaxStageResources = 64; + static constexpr size_t MaxStageResources = 128; const Shader::Info* info; RuntimeInfo runtime_info; + std::bitset bitset{}; std::optional fetch_shader_data{}; boost::container::small_vector vs_attribs; - std::bitset bitset{}; boost::container::small_vector buffers; boost::container::small_vector images; boost::container::small_vector fmasks; @@ -94,10 +94,16 @@ struct StageSpecialization { if (info_.stage == Stage::Vertex && fetch_shader_data) { // Specialize shader on VS input number types to follow spec. ForEachSharp(vs_attribs, fetch_shader_data->attributes, - [&profile_](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { - spec.num_components = desc.UsesStepRates() - ? AmdGpu::NumComponents(sharp.GetDataFmt()) - : 0; + [&profile_, this](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { + using InstanceIdType = Shader::Gcn::VertexAttribute::InstanceIdType; + if (const auto step_rate = desc.GetStepRate(); + step_rate != InstanceIdType::None) { + spec.divisor = step_rate == InstanceIdType::OverStepRate0 + ? runtime_info.vs_info.step_rate_0 + : (step_rate == InstanceIdType::OverStepRate1 + ? runtime_info.vs_info.step_rate_1 + : 1); + } spec.num_class = profile_.support_legacy_vertex_attributes ? AmdGpu::NumberClass{} : AmdGpu::GetNumberClass(sharp.GetNumberFmt()); diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 28444ac60..42e3c61a5 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -198,10 +198,13 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si } void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) { + const auto& regs = liverpool->regs; Vulkan::VertexInputs attributes; Vulkan::VertexInputs bindings; + Vulkan::VertexInputs divisors; Vulkan::VertexInputs guest_buffers; - pipeline.GetVertexInputs(attributes, bindings, guest_buffers); + pipeline.GetVertexInputs(attributes, bindings, divisors, guest_buffers, + regs.vgt_instance_step_rate_0, regs.vgt_instance_step_rate_1); if (instance.IsVertexInputDynamicState()) { // Update current vertex inputs. diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 7c020a012..3596bb041 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -72,12 +72,21 @@ GraphicsPipeline::GraphicsPipeline( VertexInputs vertex_attributes; VertexInputs vertex_bindings; + VertexInputs divisors; VertexInputs guest_buffers; if (!instance.IsVertexInputDynamicState()) { - GetVertexInputs(vertex_attributes, vertex_bindings, guest_buffers); + const auto& vs_info = runtime_infos[u32(Shader::LogicalStage::Vertex)].vs_info; + GetVertexInputs(vertex_attributes, vertex_bindings, divisors, guest_buffers, + vs_info.step_rate_0, vs_info.step_rate_1); } + const vk::PipelineVertexInputDivisorStateCreateInfo divisor_state = { + .vertexBindingDivisorCount = static_cast(divisors.size()), + .pVertexBindingDivisors = divisors.data(), + }; + const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { + .pNext = divisors.empty() ? nullptr : &divisor_state, .vertexBindingDescriptionCount = static_cast(vertex_bindings.size()), .pVertexBindingDescriptions = vertex_bindings.data(), .vertexAttributeDescriptionCount = static_cast(vertex_attributes.size()), @@ -304,19 +313,17 @@ GraphicsPipeline::GraphicsPipeline( GraphicsPipeline::~GraphicsPipeline() = default; template -void GraphicsPipeline::GetVertexInputs(VertexInputs& attributes, - VertexInputs& bindings, - VertexInputs& guest_buffers) const { +void GraphicsPipeline::GetVertexInputs( + VertexInputs& attributes, VertexInputs& bindings, + VertexInputs& divisors, + VertexInputs& guest_buffers, u32 step_rate_0, u32 step_rate_1) const { + using InstanceIdType = Shader::Gcn::VertexAttribute::InstanceIdType; if (!fetch_shader || fetch_shader->attributes.empty()) { return; } const auto& vs_info = GetStage(Shader::LogicalStage::Vertex); for (const auto& attrib : fetch_shader->attributes) { - if (attrib.UsesStepRates()) { - // Skip attribute binding as the data will be pulled by shader. - continue; - } - + const auto step_rate = attrib.GetStepRate(); const auto& buffer = attrib.GetSharp(vs_info); attributes.push_back(Attribute{ .location = attrib.semantic, @@ -327,12 +334,19 @@ void GraphicsPipeline::GetVertexInputs(VertexInputs& attributes, bindings.push_back(Binding{ .binding = attrib.semantic, .stride = buffer.GetStride(), - .inputRate = attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None - ? vk::VertexInputRate::eVertex - : vk::VertexInputRate::eInstance, + .inputRate = step_rate == InstanceIdType::None ? vk::VertexInputRate::eVertex + : vk::VertexInputRate::eInstance, }); + const u32 divisor = step_rate == InstanceIdType::OverStepRate0 + ? step_rate_0 + : (step_rate == InstanceIdType::OverStepRate1 ? step_rate_1 : 1); if constexpr (std::is_same_v) { - bindings.back().divisor = 1; + bindings.back().divisor = divisor; + } else if (step_rate != InstanceIdType::None) { + divisors.push_back(vk::VertexInputBindingDivisorDescriptionEXT{ + .binding = attrib.semantic, + .divisor = divisor, + }); } guest_buffers.emplace_back(buffer); } @@ -342,11 +356,13 @@ void GraphicsPipeline::GetVertexInputs(VertexInputs& attributes, template void GraphicsPipeline::GetVertexInputs( VertexInputs& attributes, VertexInputs& bindings, - VertexInputs& guest_buffers) const; + VertexInputs& divisors, + VertexInputs& guest_buffers, u32 step_rate_0, u32 step_rate_1) const; template void GraphicsPipeline::GetVertexInputs( VertexInputs& attributes, VertexInputs& bindings, - VertexInputs& guest_buffers) const; + VertexInputs& divisors, + VertexInputs& guest_buffers, u32 step_rate_0, u32 step_rate_1) const; void GraphicsPipeline::BuildDescSetLayout() { boost::container::small_vector bindings; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 59230ae46..ab67a52b4 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -81,7 +81,9 @@ public: /// Gets the attributes and bindings for vertex inputs. template void GetVertexInputs(VertexInputs& attributes, VertexInputs& bindings, - VertexInputs& guest_buffers) const; + VertexInputs& divisors, + VertexInputs& guest_buffers, u32 step_rate_0, + u32 step_rate_1) const; private: void BuildDescSetLayout(); diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 237fa202d..85fc993a9 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -248,6 +248,7 @@ bool Instance::CreateDevice() { // Required ASSERT(add_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME)); ASSERT(add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME)); + ASSERT(add_extension(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME)); // Optional depth_range_unrestricted = add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME); @@ -436,6 +437,9 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{ .legacyVertexAttributes = true, }, + vk::PhysicalDeviceVertexAttributeDivisorFeatures{ + .vertexAttributeInstanceRateDivisor = true, + }, vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT{ .shaderBufferFloat32AtomicMinMax = shader_atomic_float2_features.shaderBufferFloat32AtomicMinMax, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7dd468f9a..8d12b74f3 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -94,15 +94,10 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS switch (stage) { case Stage::Local: { BuildCommon(regs.ls_program); - if (regs.stage_enable.IsStageEnabled(static_cast(Stage::Hull))) { - info.ls_info.links_with_tcs = true; - Shader::TessellationDataConstantBuffer tess_constants; - const auto* pgm = regs.ProgramForStage(static_cast(Stage::Hull)); - const auto params = Liverpool::GetParams(*pgm); - const auto& hull_info = program_cache.at(params.hash)->info; - hull_info.ReadTessConstantBuffer(tess_constants); - info.ls_info.ls_stride = tess_constants.ls_stride; - } + Shader::TessellationDataConstantBuffer tess_constants; + const auto* hull_info = infos[u32(Shader::LogicalStage::TessellationControl)]; + hull_info->ReadTessConstantBuffer(tess_constants); + info.ls_info.ls_stride = tess_constants.ls_stride; break; } case Stage::Hull: { @@ -122,6 +117,8 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS case Stage::Vertex: { BuildCommon(regs.vs_program); GatherVertexOutputs(info.vs_info, regs.vs_output_control); + info.vs_info.step_rate_0 = regs.vgt_instance_step_rate_0; + info.vs_info.step_rate_1 = regs.vgt_instance_step_rate_1; info.vs_info.emulate_depth_negative_one_to_one = !instance.IsDepthClipControlSupported() && regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW; @@ -460,10 +457,6 @@ bool PipelineCache::RefreshGraphicsKey() { // Stride will still be handled outside the pipeline using dynamic state. u32 vertex_binding = 0; for (const auto& attrib : fetch_shader->attributes) { - if (attrib.UsesStepRates()) { - // Skip attribute binding as the data will be pulled by shader. - continue; - } const auto& buffer = attrib.GetSharp(*vs_info); ASSERT(vertex_binding < MaxVertexBufferCount); key.vertex_buffer_formats[vertex_binding++] = diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 5d0a14ce3..2a645f338 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -20,12 +20,9 @@ namespace Vulkan { static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) { - Shader::PushData push_data{}; - push_data.step0 = regs.vgt_instance_step_rate_0; - push_data.step1 = regs.vgt_instance_step_rate_1; - // TODO(roamic): Add support for multiple viewports and geometry shaders when ViewportIndex // is encountered and implemented in the recompiler. + Shader::PushData push_data{}; push_data.xoffset = regs.viewport_control.xoffset_enable ? regs.viewports[0].xoffset : 0.f; push_data.xscale = regs.viewport_control.xscale_enable ? regs.viewports[0].xscale : 1.f; push_data.yoffset = regs.viewport_control.yoffset_enable ? regs.viewports[0].yoffset : 0.f; From 00f4eeddaf080ec65c9ac6d63141577c81819449 Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Mon, 14 Jul 2025 21:23:18 +0300 Subject: [PATCH 02/12] renderer_vulkan: Handle more miscellaneous GPU settings (#3241) * renderer_vulkan: Respect provoking vertex setting * renderer_vulkan: Handle rasterization discard * renderer_vulkan: Implement logic ops * renderer_vulkan: Properly implement depth clamp and clip * renderer_vulkan: Handle line width * Fix build * vk_pipeline_cache: Don't check depth clamp without a depth buffer * liverpool: Fix line control offset * vk_pipeline_cache: Don't run search if depth clamp is disabled * vk_pipeline_cache: Allow using viewport range when it's more restrictive then depth clamp * liverpool: Disable depth clip when near and far planes have different setting * vk_graphics_pipeline: Move warning to pipeline * vk_pipeline_cache: Revert viewport check and remove log * vk_graphics_pipeline: Enable depth clamp when depth clip is disabled and extension is not supported Without the depth clip extension depth clipping is controlled by depth clamping --- src/video_core/amdgpu/liverpool.h | 51 ++++++++--- .../renderer_vulkan/liverpool_to_vk.cpp | 40 +++++++++ .../renderer_vulkan/liverpool_to_vk.h | 2 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 79 +++++++++++++---- .../renderer_vulkan/vk_graphics_pipeline.h | 32 ++++--- .../renderer_vulkan/vk_instance.cpp | 23 +++++ src/video_core/renderer_vulkan/vk_instance.h | 23 +++++ .../renderer_vulkan/vk_pipeline_cache.cpp | 85 +++++++++++++++---- .../renderer_vulkan/vk_pipeline_cache.h | 2 + .../renderer_vulkan/vk_rasterizer.cpp | 16 ++-- .../renderer_vulkan/vk_rasterizer.h | 1 + .../renderer_vulkan/vk_scheduler.cpp | 10 ++- src/video_core/renderer_vulkan/vk_scheduler.h | 26 +++++- 13 files changed, 323 insertions(+), 67 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index c07e9f63a..c517285fb 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -304,6 +304,14 @@ struct Liverpool { } }; + struct LineControl { + u32 width_fixed_point; + + float Width() const { + return static_cast(width_fixed_point) / 8.0; + } + }; + struct ModeControl { s32 msaa_enable : 1; s32 vport_scissor_enable : 1; @@ -513,9 +521,16 @@ struct Liverpool { BitField<19, 1, ClipSpace> clip_space; BitField<21, 1, PrimKillCond> vtx_kill_or; BitField<22, 1, u32> dx_rasterization_kill; - BitField<23, 1, u32> dx_linear_attr_clip_enable; + BitField<24, 1, u32> dx_linear_attr_clip_enable; BitField<26, 1, u32> zclip_near_disable; - BitField<26, 1, u32> zclip_far_disable; + BitField<27, 1, u32> zclip_far_disable; + + bool ZclipEnable() const { + if (zclip_near_disable != zclip_far_disable) { + return false; + } + return !zclip_near_disable; + } }; enum class PolygonMode : u32 { @@ -738,12 +753,7 @@ struct Liverpool { u32 data_w; }; - struct BlendConstants { - float red; - float green; - float blue; - float alpha; - }; + using BlendConstants = std::array; union BlendControl { enum class BlendFactor : u32 { @@ -796,11 +806,29 @@ struct Liverpool { Err = 4u, FmaskDecompress = 5u, }; + enum class LogicOp : u32 { + Clear = 0x00, + Nor = 0x11, + AndInverted = 0x22, + CopyInverted = 0x33, + AndReverse = 0x44, + Invert = 0x55, + Xor = 0x66, + Nand = 0x77, + And = 0x88, + Equiv = 0x99, + Noop = 0xAA, + OrInverted = 0xBB, + Copy = 0xCC, + OrReverse = 0xDD, + Or = 0xEE, + Set = 0xFF, + }; BitField<0, 1, u32> disable_dual_quad; BitField<3, 1, u32> degamma_enable; BitField<4, 3, OperationMode> mode; - BitField<16, 8, u32> rop3; + BitField<16, 8, LogicOp> rop3; }; struct ColorBuffer { @@ -1369,7 +1397,9 @@ struct Liverpool { PolygonControl polygon_control; ViewportControl viewport_control; VsOutputControl vs_output_control; - INSERT_PADDING_WORDS(0xA287 - 0xA207 - 1); + INSERT_PADDING_WORDS(0xA287 - 0xA207 - 6); + LineControl line_control; + INSERT_PADDING_WORDS(4); HsTessFactorClamp hs_clamp; INSERT_PADDING_WORDS(0xA290 - 0xA287 - 2); GsMode vgt_gs_mode; @@ -1695,6 +1725,7 @@ static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202); static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204); static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206); static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207); +static_assert(GFX6_3D_REG_INDEX(line_control) == 0xA282); static_assert(GFX6_3D_REG_INDEX(hs_clamp) == 0xA287); static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290); static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292); diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 5972296c0..fd1a91260 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -245,6 +245,46 @@ vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) { } } +vk::LogicOp LogicOp(Liverpool::ColorControl::LogicOp logic_op) { + using LogicOp = Liverpool::ColorControl::LogicOp; + switch (logic_op) { + case LogicOp::Clear: + return vk::LogicOp::eClear; + case LogicOp::Nor: + return vk::LogicOp::eNor; + case LogicOp::AndInverted: + return vk::LogicOp::eAndInverted; + case LogicOp::CopyInverted: + return vk::LogicOp::eCopyInverted; + case LogicOp::AndReverse: + return vk::LogicOp::eAndReverse; + case LogicOp::Invert: + return vk::LogicOp::eInvert; + case LogicOp::Xor: + return vk::LogicOp::eXor; + case LogicOp::Nand: + return vk::LogicOp::eNand; + case LogicOp::And: + return vk::LogicOp::eAnd; + case LogicOp::Equiv: + return vk::LogicOp::eEquivalent; + case LogicOp::Noop: + return vk::LogicOp::eNoOp; + case LogicOp::OrInverted: + return vk::LogicOp::eOrInverted; + case LogicOp::Copy: + return vk::LogicOp::eCopy; + case LogicOp::OrReverse: + return vk::LogicOp::eOrReverse; + case LogicOp::Or: + return vk::LogicOp::eOr; + case LogicOp::Set: + return vk::LogicOp::eSet; + default: + UNREACHABLE_MSG("Unknown logic op {}", u32(logic_op)); + } +} + // https://github.com/chaotic-cx/mesa-mirror/blob/0954afff5/src/amd/vulkan/radv_sampler.c#L21 vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode) { switch (mode) { diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index 61fd4a8c1..61b7ea0a9 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -34,6 +34,8 @@ bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor); vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func); +vk::LogicOp LogicOp(Liverpool::ColorControl::LogicOp logic_op); + vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode); vk::CompareOp DepthCompare(AmdGpu::DepthCompare comp); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 3596bb041..10e5bed5f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -109,28 +109,63 @@ GraphicsPipeline::GraphicsPipeline( .patchControlPoints = is_rect_list ? 3U : (is_quad_list ? 4U : key.patch_control_points), }; - const vk::PipelineRasterizationStateCreateInfo raster_state = { - .depthClampEnable = false, - .rasterizerDiscardEnable = false, - .polygonMode = LiverpoolToVK::PolygonMode(key.polygon_mode), - .lineWidth = 1.0f, + vk::StructureChain raster_chain = { + vk::PipelineRasterizationStateCreateInfo{ + .depthClampEnable = key.depth_clamp_enable || + (!key.depth_clip_enable && !instance.IsDepthClipEnableSupported()), + .rasterizerDiscardEnable = false, + .polygonMode = LiverpoolToVK::PolygonMode(key.polygon_mode), + .lineWidth = 1.0f, + }, + vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT{ + .provokingVertexMode = key.provoking_vtx_last == Liverpool::ProvokingVtxLast::First + ? vk::ProvokingVertexModeEXT::eFirstVertex + : vk::ProvokingVertexModeEXT::eLastVertex, + }, + vk::PipelineRasterizationDepthClipStateCreateInfoEXT{ + .depthClipEnable = key.depth_clip_enable, + }, }; + if (!instance.IsProvokingVertexSupported()) { + raster_chain.unlink(); + } + if (!instance.IsDepthClipEnableSupported()) { + raster_chain.unlink(); + } + const vk::PipelineMultisampleStateCreateInfo multisampling = { .rasterizationSamples = LiverpoolToVK::NumSamples(key.num_samples, instance.GetFramebufferSampleCounts()), .sampleShadingEnable = false, }; - const vk::PipelineViewportDepthClipControlCreateInfoEXT clip_control = { - .negativeOneToOne = key.clip_space == Liverpool::ClipSpace::MinusWToW, + const vk::DepthClampRangeEXT depth_clamp_range = { + .minDepthClamp = key.min_depth_clamp, + .maxDepthClamp = key.max_depth_clamp, }; - const vk::PipelineViewportStateCreateInfo viewport_info = { - .pNext = instance.IsDepthClipControlSupported() ? &clip_control : nullptr, + vk::StructureChain viewport_chain = { + vk::PipelineViewportStateCreateInfo{}, + vk::PipelineViewportDepthClipControlCreateInfoEXT{ + .negativeOneToOne = key.clip_space == Liverpool::ClipSpace::MinusWToW, + }, + vk::PipelineViewportDepthClampControlCreateInfoEXT{ + .depthClampMode = key.depth_clamp_user_defined_range + ? vk::DepthClampModeEXT::eUserDefinedRange + : vk::DepthClampModeEXT::eViewportRange, + .pDepthClampRange = &depth_clamp_range, + }, }; - boost::container::static_vector dynamic_states = { + if (!instance.IsDepthClampControlSupported()) { + viewport_chain.unlink(); + } + if (!instance.IsDepthClipControlSupported()) { + viewport_chain.unlink(); + } + + boost::container::static_vector dynamic_states = { vk::DynamicState::eViewportWithCount, vk::DynamicState::eScissorWithCount, vk::DynamicState::eBlendConstants, vk::DynamicState::eDepthTestEnable, vk::DynamicState::eDepthWriteEnable, vk::DynamicState::eDepthCompareOp, @@ -138,7 +173,8 @@ GraphicsPipeline::GraphicsPipeline( vk::DynamicState::eStencilTestEnable, vk::DynamicState::eStencilReference, vk::DynamicState::eStencilCompareMask, vk::DynamicState::eStencilWriteMask, vk::DynamicState::eStencilOp, vk::DynamicState::eCullMode, - vk::DynamicState::eFrontFace, + vk::DynamicState::eFrontFace, vk::DynamicState::eRasterizerDiscardEnable, + vk::DynamicState::eLineWidth, }; if (instance.IsPrimitiveRestartDisableSupported()) { @@ -221,11 +257,19 @@ GraphicsPipeline::GraphicsPipeline( }); } + const auto depth_format = + instance.GetSupportedFormat(LiverpoolToVK::DepthFormat(key.z_format, key.stencil_format), + vk::FormatFeatureFlagBits2::eDepthStencilAttachment); const vk::PipelineRenderingCreateInfo pipeline_rendering_ci = { .colorAttachmentCount = key.num_color_attachments, .pColorAttachmentFormats = key.color_formats.data(), - .depthAttachmentFormat = key.depth_format, - .stencilAttachmentFormat = key.stencil_format, + .depthAttachmentFormat = key.z_format != Liverpool::DepthBuffer::ZFormat::Invalid + ? depth_format + : vk::Format::eUndefined, + .stencilAttachmentFormat = + key.stencil_format != Liverpool::DepthBuffer::StencilFormat::Invalid + ? depth_format + : vk::Format::eUndefined, }; std::array attachments; @@ -280,8 +324,9 @@ GraphicsPipeline::GraphicsPipeline( } const vk::PipelineColorBlendStateCreateInfo color_blending = { - .logicOpEnable = false, - .logicOp = vk::LogicOp::eCopy, + .logicOpEnable = + instance.IsLogicOpSupported() && key.logic_op != Liverpool::ColorControl::LogicOp::Copy, + .logicOp = LiverpoolToVK::LogicOp(key.logic_op), .attachmentCount = key.num_color_attachments, .pAttachments = attachments.data(), .blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f}, @@ -294,8 +339,8 @@ GraphicsPipeline::GraphicsPipeline( .pVertexInputState = !instance.IsVertexInputDynamicState() ? &vertex_input_info : nullptr, .pInputAssemblyState = &input_assembly, .pTessellationState = &tessellation_state, - .pViewportState = &viewport_info, - .pRasterizationState = &raster_state, + .pViewportState = &viewport_chain.get(), + .pRasterizationState = &raster_chain.get(), .pMultisampleState = &multisampling, .pColorBlendState = &color_blending, .pDynamicState = &dynamic_info, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index ab67a52b4..1ecfa6b42 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -33,22 +33,32 @@ using VertexInputs = boost::container::static_vector; struct GraphicsPipelineKey { std::array stage_hashes; + std::array vertex_buffer_formats; + u32 patch_control_points; u32 num_color_attachments; std::array color_formats; std::array color_buffers; - vk::Format depth_format; - vk::Format stencil_format; - - u32 num_samples; - u32 mrt_mask; - AmdGpu::PrimitiveType prim_type; - Liverpool::PolygonMode polygon_mode; - Liverpool::ClipSpace clip_space; - Liverpool::ColorBufferMask cb_shader_mask; std::array blend_controls; std::array write_masks; - std::array vertex_buffer_formats; - u32 patch_control_points; + Liverpool::ColorBufferMask cb_shader_mask; + Liverpool::ColorControl::LogicOp logic_op; + u32 num_samples; + u32 mrt_mask; + struct { + Liverpool::DepthBuffer::ZFormat z_format : 2; + Liverpool::DepthBuffer::StencilFormat stencil_format : 1; + u32 depth_clamp_enable : 1; + u32 depth_clamp_user_defined_range : 1; + float min_depth_clamp; + float max_depth_clamp; + }; + struct { + AmdGpu::PrimitiveType prim_type : 5; + Liverpool::PolygonMode polygon_mode : 2; + Liverpool::ClipSpace clip_space : 1; + Liverpool::ProvokingVtxLast provoking_vtx_last : 1; + u32 depth_clip_enable : 1; + }; bool operator==(const GraphicsPipelineKey& key) const noexcept { return std::memcmp(this, &key, sizeof(key)) == 0; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 85fc993a9..3a461b321 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -270,10 +270,13 @@ bool Instance::CreateDevice() { } custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); depth_clip_control = add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); + depth_clip_enable = add_extension(VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME); + depth_clamp_control = add_extension(VK_EXT_DEPTH_CLAMP_CONTROL_EXTENSION_NAME); vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME); fragment_shader_barycentric = add_extension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME); legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME); + provoking_vertex = add_extension(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); shader_stencil_export = add_extension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME); image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME); amd_gcn_shader = add_extension(VK_AMD_GCN_SHADER_EXTENSION_NAME); @@ -362,9 +365,11 @@ bool Instance::CreateDevice() { .dualSrcBlend = features.dualSrcBlend, .logicOp = features.logicOp, .multiDrawIndirect = features.multiDrawIndirect, + .depthClamp = features.depthClamp, .depthBiasClamp = features.depthBiasClamp, .fillModeNonSolid = features.fillModeNonSolid, .depthBounds = features.depthBounds, + .wideLines = features.wideLines, .multiViewport = features.multiViewport, .samplerAnisotropy = features.samplerAnisotropy, .vertexPipelineStoresAndAtomics = features.vertexPipelineStoresAndAtomics, @@ -418,6 +423,12 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceDepthClipControlFeaturesEXT{ .depthClipControl = true, }, + vk::PhysicalDeviceDepthClipEnableFeaturesEXT{ + .depthClipEnable = true, + }, + vk::PhysicalDeviceDepthClampControlFeaturesEXT{ + .depthClampControl = true, + }, vk::PhysicalDeviceRobustness2FeaturesEXT{ .robustBufferAccess2 = robustness2_features.robustBufferAccess2, .robustImageAccess2 = robustness2_features.robustImageAccess2, @@ -437,6 +448,9 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{ .legacyVertexAttributes = true, }, + vk::PhysicalDeviceProvokingVertexFeaturesEXT{ + .provokingVertexLast = true, + }, vk::PhysicalDeviceVertexAttributeDivisorFeatures{ .vertexAttributeInstanceRateDivisor = true, }, @@ -487,6 +501,12 @@ bool Instance::CreateDevice() { if (!depth_clip_control) { device_chain.unlink(); } + if (!depth_clip_enable) { + device_chain.unlink(); + } + if (!depth_clamp_control) { + device_chain.unlink(); + } if (!robustness2) { device_chain.unlink(); } @@ -502,6 +522,9 @@ bool Instance::CreateDevice() { if (!legacy_vertex_attributes) { device_chain.unlink(); } + if (!provoking_vertex) { + device_chain.unlink(); + } if (!shader_atomic_float2) { device_chain.unlink(); } diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 830b1d5c2..67dcc183a 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -109,6 +109,16 @@ public: return depth_clip_control; } + /// Returns true when VK_EXT_depth_clip_enable is supported + bool IsDepthClipEnableSupported() const { + return depth_clip_enable; + } + + /// Returns true when VK_EXT_depth_clamp_control is supported + bool IsDepthClampControlSupported() const { + return depth_clamp_control; + } + /// Returns true when VK_EXT_depth_range_unrestricted is supported bool IsDepthRangeUnrestrictedSupported() const { return depth_range_unrestricted; @@ -150,6 +160,11 @@ public: return legacy_vertex_attributes; } + /// Returns true when VK_EXT_provoking_vertex is supported. + bool IsProvokingVertexSupported() const { + return provoking_vertex; + } + /// Returns true when VK_AMD_shader_image_load_store_lod is supported. bool IsImageLoadStoreLodSupported() const { return image_load_store_lod; @@ -351,6 +366,11 @@ public: return driver_id != vk::DriverId::eMoltenvk; } + /// Returns true if logic ops are supported by the device. + bool IsLogicOpSupported() const { + return features.logicOp; + } + /// Determines if a format is supported for a set of feature flags. [[nodiscard]] bool IsFormatSupported(vk::Format format, vk::FormatFeatureFlags2 flags) const; @@ -399,12 +419,15 @@ private: bool custom_border_color{}; bool fragment_shader_barycentric{}; bool depth_clip_control{}; + bool depth_clip_enable{}; + bool depth_clamp_control{}; bool depth_range_unrestricted{}; bool dynamic_state_3{}; bool vertex_input_dynamic_state{}; bool robustness2{}; bool list_restart{}; bool legacy_vertex_attributes{}; + bool provoking_vertex{}; bool shader_stencil_export{}; bool image_load_store_lod{}; bool amd_gcn_shader{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8d12b74f3..d9e01091e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -285,26 +285,21 @@ bool PipelineCache::RefreshGraphicsKey() { auto& regs = liverpool->regs; auto& key = graphics_key; - const auto depth_format = instance.GetSupportedFormat( - LiverpoolToVK::DepthFormat(regs.depth_buffer.z_info.format, - regs.depth_buffer.stencil_info.format), - vk::FormatFeatureFlagBits2::eDepthStencilAttachment); - if (regs.depth_buffer.DepthValid()) { - key.depth_format = depth_format; - } else { - key.depth_format = vk::Format::eUndefined; - } - if (regs.depth_buffer.StencilValid()) { - key.stencil_format = depth_format; - } else { - key.stencil_format = vk::Format::eUndefined; - } - + key.z_format = regs.depth_buffer.DepthValid() ? regs.depth_buffer.z_info.format.Value() + : Liverpool::DepthBuffer::ZFormat::Invalid; + key.stencil_format = regs.depth_buffer.StencilValid() + ? regs.depth_buffer.stencil_info.format.Value() + : Liverpool::DepthBuffer::StencilFormat::Invalid; + key.depth_clip_enable = regs.clipper_control.ZclipEnable(); + key.clip_space = regs.clipper_control.clip_space; + key.provoking_vtx_last = regs.polygon_control.provoking_vtx_last; key.prim_type = regs.primitive_type; key.polygon_mode = regs.polygon_control.PolyMode(); - key.clip_space = regs.clipper_control.clip_space; + key.logic_op = regs.color_control.rop3; key.num_samples = regs.NumSamples(); + RefreshDepthClampRange(); + const bool skip_cb_binding = regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; @@ -491,7 +486,63 @@ bool PipelineCache::RefreshGraphicsKey() { } return true; -} // namespace Vulkan +} + +void PipelineCache::RefreshDepthClampRange() { + auto& regs = liverpool->regs; + auto& key = graphics_key; + + key.depth_clamp_enable = !regs.depth_render_override.disable_viewport_clamp; + if (key.z_format == Liverpool::DepthBuffer::ZFormat::Invalid || !key.depth_clamp_enable) { + return; + } + + bool depth_clamp_can_use_viewport_range = true; + bool depth_clamp_is_same_on_all_viewports = true; + float zmin = std::numeric_limits::max(); + float zmax = std::numeric_limits::max(); + const auto& vp_ctl = regs.viewport_control; + for (u32 i = 0; i < Liverpool::NumViewports; i++) { + const auto& vp = regs.viewports[i]; + const auto& vp_d = regs.viewport_depths[i]; + if (vp.xscale == 0) { + continue; + } + const auto zoffset = vp_ctl.zoffset_enable ? vp.zoffset : 0.f; + const auto zscale = vp_ctl.zscale_enable ? vp.zscale : 1.f; + + float min_depth; + float max_depth; + if (regs.clipper_control.clip_space == AmdGpu::Liverpool::ClipSpace::MinusWToW) { + min_depth = zoffset - zscale; + max_depth = zoffset + zscale; + } else { + min_depth = zoffset; + max_depth = zoffset + zscale; + } + if (zmin == std::numeric_limits::max()) { + zmin = vp_d.zmin; + zmax = vp_d.zmax; + } + depth_clamp_is_same_on_all_viewports &= (zmin == vp_d.zmin && zmax == vp_d.zmax); + depth_clamp_can_use_viewport_range &= (min_depth == vp_d.zmin && max_depth == vp_d.zmax); + } + + if (zmin == std::numeric_limits::max()) { + return; + } + + if (!depth_clamp_can_use_viewport_range && !depth_clamp_is_same_on_all_viewports) { + LOG_ERROR(Render_Vulkan, + "Viewport depth clamping configuration cannot be accurately emulated"); + } + + key.depth_clamp_user_defined_range = !depth_clamp_can_use_viewport_range; + if (key.depth_clamp_user_defined_range) { + key.min_depth_clamp = zmin; + key.max_depth_clamp = zmax; + } +} bool PipelineCache::RefreshComputeKey() { Shader::Backend::Bindings binding{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index ba3407b48..405275439 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -76,6 +76,8 @@ private: bool RefreshGraphicsKey(); bool RefreshComputeKey(); + void RefreshDepthClampRange(); + void DumpShader(std::span code, u64 hash, Shader::Stage stage, size_t perm_idx, std::string_view ext); std::optional> GetShaderPatch(u64 hash, Shader::Stage stage, size_t perm_idx, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 2a645f338..b6130e873 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -1014,9 +1014,10 @@ void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) const { UpdateViewportScissorState(); UpdateDepthStencilState(); UpdatePrimitiveState(); + UpdateRasterizationState(); auto& dynamic_state = scheduler.GetDynamicState(); - dynamic_state.SetBlendConstants(&liverpool->regs.blend_constants.red); + dynamic_state.SetBlendConstants(liverpool->regs.blend_constants); dynamic_state.SetColorWriteMasks(pipeline.GetWriteMasks()); // Commit new dynamic state to the command buffer. @@ -1086,12 +1087,6 @@ void Rasterizer::UpdateViewportScissorState() const { viewport.maxDepth = zoffset + zscale; } - if (!regs.depth_render_override.disable_viewport_clamp) { - // Apply depth clamp. - viewport.minDepth = std::max(viewport.minDepth, vp_d.zmin); - viewport.maxDepth = std::min(viewport.maxDepth, vp_d.zmax); - } - if (!instance.IsDepthRangeUnrestrictedSupported()) { // Unrestricted depth range not supported by device. Restrict to valid range. viewport.minDepth = std::max(viewport.minDepth, 0.f); @@ -1231,10 +1226,17 @@ void Rasterizer::UpdatePrimitiveState() const { const auto front_face = LiverpoolToVK::FrontFace(regs.polygon_control.front_face); dynamic_state.SetPrimitiveRestartEnabled(prim_restart); + dynamic_state.SetRasterizerDiscardEnabled(regs.clipper_control.dx_rasterization_kill); dynamic_state.SetCullMode(cull_mode); dynamic_state.SetFrontFace(front_face); } +void Rasterizer::UpdateRasterizationState() const { + const auto& regs = liverpool->regs; + auto& dynamic_state = scheduler.GetDynamicState(); + dynamic_state.SetLineWidth(regs.line_control.Width()); +} + void Rasterizer::ScopeMarkerBegin(const std::string_view& str, bool from_guest) { if ((from_guest && !Config::getVkGuestMarkersEnabled()) || (!from_guest && !Config::getVkHostMarkersEnabled())) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 1e1680258..79e7722b8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -94,6 +94,7 @@ private: void UpdateViewportScissorState() const; void UpdateDepthStencilState() const; void UpdatePrimitiveState() const; + void UpdateRasterizationState() const; bool FilterDraw(); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index ac645c9ce..7c3429297 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -308,6 +308,10 @@ void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmd cmdbuf.setPrimitiveRestartEnable(primitive_restart_enable); } } + if (dirty_state.rasterizer_discard_enable) { + dirty_state.rasterizer_discard_enable = false; + cmdbuf.setRasterizerDiscardEnable(rasterizer_discard_enable); + } if (dirty_state.cull_mode) { dirty_state.cull_mode = false; cmdbuf.setCullMode(cull_mode); @@ -318,7 +322,7 @@ void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmd } if (dirty_state.blend_constants) { dirty_state.blend_constants = false; - cmdbuf.setBlendConstants(blend_constants); + cmdbuf.setBlendConstants(blend_constants.data()); } if (dirty_state.color_write_masks) { dirty_state.color_write_masks = false; @@ -326,6 +330,10 @@ void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmd cmdbuf.setColorWriteMaskEXT(0, color_write_masks); } } + if (dirty_state.line_width) { + dirty_state.line_width = false; + cmdbuf.setLineWidth(line_width); + } } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index b5678edbc..3616d8478 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -96,11 +96,13 @@ struct DynamicState { bool stencil_back_compare_mask : 1; bool primitive_restart_enable : 1; + bool rasterizer_discard_enable : 1; bool cull_mode : 1; bool front_face : 1; bool blend_constants : 1; bool color_write_masks : 1; + bool line_width : 1; } dirty_state{}; Viewports viewports{}; @@ -130,11 +132,13 @@ struct DynamicState { u32 stencil_back_compare_mask{}; bool primitive_restart_enable{}; + bool rasterizer_discard_enable{}; vk::CullModeFlags cull_mode{}; vk::FrontFace front_face{}; - float blend_constants[4]{}; + std::array blend_constants{}; ColorWriteMasks color_write_masks{}; + float line_width{}; /// Commits the dynamic state to the provided command buffer. void Commit(const Instance& instance, const vk::CommandBuffer& cmdbuf); @@ -283,19 +287,33 @@ struct DynamicState { } } - void SetBlendConstants(const float blend_constants_[4]) { - if (!std::equal(blend_constants, std::end(blend_constants), blend_constants_)) { - std::memcpy(blend_constants, blend_constants_, sizeof(blend_constants)); + void SetBlendConstants(const std::array blend_constants_) { + if (blend_constants != blend_constants_) { + blend_constants = blend_constants_; dirty_state.blend_constants = true; } } + void SetRasterizerDiscardEnabled(const bool enabled) { + if (rasterizer_discard_enable != enabled) { + rasterizer_discard_enable = enabled; + dirty_state.rasterizer_discard_enable = true; + } + } + void SetColorWriteMasks(const ColorWriteMasks& color_write_masks_) { if (!std::ranges::equal(color_write_masks, color_write_masks_)) { color_write_masks = color_write_masks_; dirty_state.color_write_masks = true; } } + + void SetLineWidth(const float width) { + if (line_width != width) { + line_width = width; + dirty_state.line_width = true; + } + } }; class Scheduler { From b68ca43166be4bcb985f442db465b3fcc7d5e4ab Mon Sep 17 00:00:00 2001 From: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> Date: Mon, 14 Jul 2025 22:44:13 +0200 Subject: [PATCH 03/12] Implement sceKernelGetSystemSwVersion (#3243) * Implement sceKernelGetSystemSwVersion * Set the reported firmware version to that of the game executable --- src/core/libraries/kernel/kernel.cpp | 15 +++++++++++++++ src/core/libraries/kernel/kernel.h | 6 ++++++ 2 files changed, 21 insertions(+) diff --git a/src/core/libraries/kernel/kernel.cpp b/src/core/libraries/kernel/kernel.cpp index 61d2e2f2b..a4d3accac 100644 --- a/src/core/libraries/kernel/kernel.cpp +++ b/src/core/libraries/kernel/kernel.cpp @@ -6,6 +6,7 @@ #include "common/assert.h" #include "common/debug.h" +#include "common/elf_info.h" #include "common/logging/log.h" #include "common/polyfill_thread.h" #include "common/thread.h" @@ -243,6 +244,19 @@ s32 PS4_SYSV_ABI sceKernelSetGPO() { return ORBIS_OK; } +s32 PS4_SYSV_ABI sceKernelGetSystemSwVersion(SwVersionStruct* ret) { + if (ret == nullptr) { + return ORBIS_OK; // but why? + } + ASSERT(ret->struct_size == 40); + u32 fake_fw = Common::ElfInfo::Instance().RawFirmwareVer(); + ret->hex_representation = fake_fw; + std::snprintf(ret->text_representation, 28, "%2x.%03x.%03x", fake_fw >> 0x18, + fake_fw >> 0xc & 0xfff, fake_fw & 0xfff); // why %2x? + LOG_INFO(Lib_Kernel, "called, returned sw version: {}", ret->text_representation); + return ORBIS_OK; +} + void RegisterKernel(Core::Loader::SymbolsResolver* sym) { service_thread = std::jthread{KernelServiceThread}; @@ -258,6 +272,7 @@ void RegisterKernel(Core::Loader::SymbolsResolver* sym) { Libraries::Kernel::RegisterDebug(sym); LIB_OBJ("f7uOxY9mM1U", "libkernel", 1, "libkernel", 1, 1, &g_stack_chk_guard); + LIB_FUNCTION("Mv1zUObHvXI", "libkernel", 1, "libkernel", 1, 1, sceKernelGetSystemSwVersion); LIB_FUNCTION("PfccT7qURYE", "libkernel", 1, "libkernel", 1, 1, kernel_ioctl); LIB_FUNCTION("JGfTMBOdUJo", "libkernel", 1, "libkernel", 1, 1, sceKernelGetFsSandboxRandomWord); LIB_FUNCTION("6xVpy0Fdq+I", "libkernel", 1, "libkernel", 1, 1, _sigprocmask); diff --git a/src/core/libraries/kernel/kernel.h b/src/core/libraries/kernel/kernel.h index 0529c06d5..018759e14 100644 --- a/src/core/libraries/kernel/kernel.h +++ b/src/core/libraries/kernel/kernel.h @@ -35,6 +35,12 @@ struct OrbisWrapperImpl { s32* PS4_SYSV_ABI __Error(); +struct SwVersionStruct { + u64 struct_size; + char text_representation[0x1c]; + u32 hex_representation; +}; + void RegisterKernel(Core::Loader::SymbolsResolver* sym); } // namespace Libraries::Kernel From 97daee836a05c18da655bf7f19d4b39bb024bfb8 Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Tue, 15 Jul 2025 06:11:56 -0500 Subject: [PATCH 04/12] Core: Fix read-only file unmaps on Windows (#3246) * Fix read-only file unmaps Fixes Genshin Impact (CUSA23681) * Slight cleanup Don't need `post_merge_it` anymore. --- src/core/memory.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 3d9bf58a7..12099ea73 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -537,6 +537,7 @@ u64 MemoryManager::UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma vma_base_size - start_in_vma < size ? vma_base_size - start_in_vma : size; const bool has_backing = type == VMAType::Direct || type == VMAType::File; const auto prot = vma_base.prot; + const bool readonly_file = prot == MemoryProt::CpuRead && type == VMAType::File; if (type == VMAType::Free) { return adjusted_size; @@ -554,9 +555,8 @@ u64 MemoryManager::UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma vma.phys_base = 0; vma.disallow_merge = false; vma.name = ""; - const auto post_merge_it = MergeAdjacent(vma_map, new_it); - auto& post_merge_vma = post_merge_it->second; - bool readonly_file = post_merge_vma.prot == MemoryProt::CpuRead && type == VMAType::File; + MergeAdjacent(vma_map, new_it); + if (type != VMAType::Reserved && type != VMAType::PoolReserved) { // If this mapping has GPU access, unmap from GPU. if (IsValidGpuMapping(virtual_addr, size)) { From bf623d4f855e4d04a8596ac43370045caded7546 Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Tue, 15 Jul 2025 07:48:05 -0500 Subject: [PATCH 05/12] Libraries: Implement sceAudio3dTerminate (#3247) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * sceAudio3dTerminate My First Gran Turismo® (CUSA49696) uses this while initializing it's audio system. Without it, the game spams errored sceAudio3dInitialize calls. * Properly close AudioOut handle Based on library decompilation. --- src/core/libraries/audio3d/audio3d.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/core/libraries/audio3d/audio3d.cpp b/src/core/libraries/audio3d/audio3d.cpp index 646c28949..81befc5bf 100644 --- a/src/core/libraries/audio3d/audio3d.cpp +++ b/src/core/libraries/audio3d/audio3d.cpp @@ -526,7 +526,14 @@ s32 PS4_SYSV_ABI sceAudio3dStrError() { } s32 PS4_SYSV_ABI sceAudio3dTerminate() { - LOG_ERROR(Lib_Audio3d, "(STUBBED) called"); + LOG_INFO(Lib_Audio3d, "called"); + if (!state) { + return ORBIS_AUDIO3D_ERROR_NOT_READY; + } + + AudioOut::sceAudioOutOutput(state->audio_out_handle, nullptr); + AudioOut::sceAudioOutClose(state->audio_out_handle); + state.release(); return ORBIS_OK; } From 87f6cce7b1ed286b35dc4312adfc5d584a7b4b4f Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Tue, 15 Jul 2025 18:36:13 +0300 Subject: [PATCH 06/12] vk_instance: Remove usage of depth clamp control (#3248) --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 27 ++------- .../renderer_vulkan/vk_graphics_pipeline.h | 3 - .../renderer_vulkan/vk_instance.cpp | 7 --- src/video_core/renderer_vulkan/vk_instance.h | 6 -- .../renderer_vulkan/vk_pipeline_cache.cpp | 59 +------------------ .../renderer_vulkan/vk_pipeline_cache.h | 2 - 6 files changed, 6 insertions(+), 98 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 10e5bed5f..3c8332c10 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -140,31 +140,14 @@ GraphicsPipeline::GraphicsPipeline( .sampleShadingEnable = false, }; - const vk::DepthClampRangeEXT depth_clamp_range = { - .minDepthClamp = key.min_depth_clamp, - .maxDepthClamp = key.max_depth_clamp, + const vk::PipelineViewportDepthClipControlCreateInfoEXT clip_control = { + .negativeOneToOne = key.clip_space == Liverpool::ClipSpace::MinusWToW, }; - vk::StructureChain viewport_chain = { - vk::PipelineViewportStateCreateInfo{}, - vk::PipelineViewportDepthClipControlCreateInfoEXT{ - .negativeOneToOne = key.clip_space == Liverpool::ClipSpace::MinusWToW, - }, - vk::PipelineViewportDepthClampControlCreateInfoEXT{ - .depthClampMode = key.depth_clamp_user_defined_range - ? vk::DepthClampModeEXT::eUserDefinedRange - : vk::DepthClampModeEXT::eViewportRange, - .pDepthClampRange = &depth_clamp_range, - }, + const vk::PipelineViewportStateCreateInfo viewport_info = { + .pNext = instance.IsDepthClipControlSupported() ? &clip_control : nullptr, }; - if (!instance.IsDepthClampControlSupported()) { - viewport_chain.unlink(); - } - if (!instance.IsDepthClipControlSupported()) { - viewport_chain.unlink(); - } - boost::container::static_vector dynamic_states = { vk::DynamicState::eViewportWithCount, vk::DynamicState::eScissorWithCount, vk::DynamicState::eBlendConstants, vk::DynamicState::eDepthTestEnable, @@ -339,7 +322,7 @@ GraphicsPipeline::GraphicsPipeline( .pVertexInputState = !instance.IsVertexInputDynamicState() ? &vertex_input_info : nullptr, .pInputAssemblyState = &input_assembly, .pTessellationState = &tessellation_state, - .pViewportState = &viewport_chain.get(), + .pViewportState = &viewport_info, .pRasterizationState = &raster_chain.get(), .pMultisampleState = &multisampling, .pColorBlendState = &color_blending, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 1ecfa6b42..75b8c8c73 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -48,9 +48,6 @@ struct GraphicsPipelineKey { Liverpool::DepthBuffer::ZFormat z_format : 2; Liverpool::DepthBuffer::StencilFormat stencil_format : 1; u32 depth_clamp_enable : 1; - u32 depth_clamp_user_defined_range : 1; - float min_depth_clamp; - float max_depth_clamp; }; struct { AmdGpu::PrimitiveType prim_type : 5; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 3a461b321..c0b138fad 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -271,7 +271,6 @@ bool Instance::CreateDevice() { custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); depth_clip_control = add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); depth_clip_enable = add_extension(VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME); - depth_clamp_control = add_extension(VK_EXT_DEPTH_CLAMP_CONTROL_EXTENSION_NAME); vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME); fragment_shader_barycentric = add_extension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME); @@ -426,9 +425,6 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceDepthClipEnableFeaturesEXT{ .depthClipEnable = true, }, - vk::PhysicalDeviceDepthClampControlFeaturesEXT{ - .depthClampControl = true, - }, vk::PhysicalDeviceRobustness2FeaturesEXT{ .robustBufferAccess2 = robustness2_features.robustBufferAccess2, .robustImageAccess2 = robustness2_features.robustImageAccess2, @@ -504,9 +500,6 @@ bool Instance::CreateDevice() { if (!depth_clip_enable) { device_chain.unlink(); } - if (!depth_clamp_control) { - device_chain.unlink(); - } if (!robustness2) { device_chain.unlink(); } diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 67dcc183a..d96abfabe 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -114,11 +114,6 @@ public: return depth_clip_enable; } - /// Returns true when VK_EXT_depth_clamp_control is supported - bool IsDepthClampControlSupported() const { - return depth_clamp_control; - } - /// Returns true when VK_EXT_depth_range_unrestricted is supported bool IsDepthRangeUnrestrictedSupported() const { return depth_range_unrestricted; @@ -420,7 +415,6 @@ private: bool fragment_shader_barycentric{}; bool depth_clip_control{}; bool depth_clip_enable{}; - bool depth_clamp_control{}; bool depth_range_unrestricted{}; bool dynamic_state_3{}; bool vertex_input_dynamic_state{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index d9e01091e..31ede7936 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -290,6 +290,7 @@ bool PipelineCache::RefreshGraphicsKey() { key.stencil_format = regs.depth_buffer.StencilValid() ? regs.depth_buffer.stencil_info.format.Value() : Liverpool::DepthBuffer::StencilFormat::Invalid; + key.depth_clamp_enable = !regs.depth_render_override.disable_viewport_clamp; key.depth_clip_enable = regs.clipper_control.ZclipEnable(); key.clip_space = regs.clipper_control.clip_space; key.provoking_vtx_last = regs.polygon_control.provoking_vtx_last; @@ -298,8 +299,6 @@ bool PipelineCache::RefreshGraphicsKey() { key.logic_op = regs.color_control.rop3; key.num_samples = regs.NumSamples(); - RefreshDepthClampRange(); - const bool skip_cb_binding = regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; @@ -488,62 +487,6 @@ bool PipelineCache::RefreshGraphicsKey() { return true; } -void PipelineCache::RefreshDepthClampRange() { - auto& regs = liverpool->regs; - auto& key = graphics_key; - - key.depth_clamp_enable = !regs.depth_render_override.disable_viewport_clamp; - if (key.z_format == Liverpool::DepthBuffer::ZFormat::Invalid || !key.depth_clamp_enable) { - return; - } - - bool depth_clamp_can_use_viewport_range = true; - bool depth_clamp_is_same_on_all_viewports = true; - float zmin = std::numeric_limits::max(); - float zmax = std::numeric_limits::max(); - const auto& vp_ctl = regs.viewport_control; - for (u32 i = 0; i < Liverpool::NumViewports; i++) { - const auto& vp = regs.viewports[i]; - const auto& vp_d = regs.viewport_depths[i]; - if (vp.xscale == 0) { - continue; - } - const auto zoffset = vp_ctl.zoffset_enable ? vp.zoffset : 0.f; - const auto zscale = vp_ctl.zscale_enable ? vp.zscale : 1.f; - - float min_depth; - float max_depth; - if (regs.clipper_control.clip_space == AmdGpu::Liverpool::ClipSpace::MinusWToW) { - min_depth = zoffset - zscale; - max_depth = zoffset + zscale; - } else { - min_depth = zoffset; - max_depth = zoffset + zscale; - } - if (zmin == std::numeric_limits::max()) { - zmin = vp_d.zmin; - zmax = vp_d.zmax; - } - depth_clamp_is_same_on_all_viewports &= (zmin == vp_d.zmin && zmax == vp_d.zmax); - depth_clamp_can_use_viewport_range &= (min_depth == vp_d.zmin && max_depth == vp_d.zmax); - } - - if (zmin == std::numeric_limits::max()) { - return; - } - - if (!depth_clamp_can_use_viewport_range && !depth_clamp_is_same_on_all_viewports) { - LOG_ERROR(Render_Vulkan, - "Viewport depth clamping configuration cannot be accurately emulated"); - } - - key.depth_clamp_user_defined_range = !depth_clamp_can_use_viewport_range; - if (key.depth_clamp_user_defined_range) { - key.min_depth_clamp = zmin; - key.max_depth_clamp = zmax; - } -} - bool PipelineCache::RefreshComputeKey() { Shader::Backend::Bindings binding{}; const auto& cs_pgm = liverpool->GetCsRegs(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 405275439..ba3407b48 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -76,8 +76,6 @@ private: bool RefreshGraphicsKey(); bool RefreshComputeKey(); - void RefreshDepthClampRange(); - void DumpShader(std::span code, u64 hash, Shader::Stage stage, size_t perm_idx, std::string_view ext); std::optional> GetShaderPatch(u64 hash, Shader::Stage stage, size_t perm_idx, From 4407ebdd9be6710f9852b6d85be4daae2b524f95 Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Tue, 15 Jul 2025 18:49:12 +0300 Subject: [PATCH 07/12] shader_recompiler: Implement guest barycentrics (#3245) * shader_recompiler: Implement guest barycentrics * Review comments and some cleanup --- externals/sirit | 2 +- .../backend/spirv/emit_spirv.cpp | 14 +- .../spirv/emit_spirv_context_get_set.cpp | 112 ++++++------- .../backend/spirv/emit_spirv_special.cpp | 2 +- .../backend/spirv/spirv_emit_context.cpp | 155 +++++++++--------- .../backend/spirv/spirv_emit_context.h | 15 +- .../frontend/translate/translate.cpp | 63 +++---- .../frontend/translate/translate.h | 5 +- .../translate/vector_interpolation.cpp | 75 ++++++++- src/shader_recompiler/info.h | 19 ++- src/shader_recompiler/ir/attribute.cpp | 14 ++ src/shader_recompiler/ir/attribute.h | 36 ++-- src/shader_recompiler/profile.h | 9 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 +- .../renderer_vulkan/vk_instance.cpp | 7 +- src/video_core/renderer_vulkan/vk_instance.h | 6 + .../renderer_vulkan/vk_pipeline_cache.cpp | 6 + 17 files changed, 314 insertions(+), 229 deletions(-) diff --git a/externals/sirit b/externals/sirit index b4eccb336..282083a59 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit b4eccb336f1b1169af48dac1e04015985af86e3e +Subproject commit 282083a595dcca86814dedab2f2b0363ef38f1ec diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index c4c310586..baf9ced25 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -293,9 +293,17 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct if (stage == LogicalStage::Geometry) { ctx.AddCapability(spv::Capability::Geometry); } - if (info.stage == Stage::Fragment && profile.needs_manual_interpolation) { - ctx.AddExtension("SPV_KHR_fragment_shader_barycentric"); - ctx.AddCapability(spv::Capability::FragmentBarycentricKHR); + if (info.stage == Stage::Fragment) { + if (profile.supports_amd_shader_explicit_vertex_parameter) { + ctx.AddExtension("SPV_AMD_shader_explicit_vertex_parameter"); + } else if (profile.supports_fragment_shader_barycentric) { + ctx.AddExtension("SPV_KHR_fragment_shader_barycentric"); + ctx.AddCapability(spv::Capability::FragmentBarycentricKHR); + } + if (info.loads.GetAny(IR::Attribute::BaryCoordSmoothSample) || + info.loads.GetAny(IR::Attribute::BaryCoordNoPerspSample)) { + ctx.AddCapability(spv::Capability::SampleRateShading); + } } if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) { ctx.AddCapability(spv::Capability::Tessellation); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 40f8d307c..ead2a2825 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -45,7 +45,7 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) { return ctx.OpAccessChain(ctx.output_f32, ctx.cull_distances, cull_num); } default: - UNREACHABLE(); + UNREACHABLE_MSG("Vertex output {}", u32(output)); } } @@ -88,7 +88,7 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { case IR::Attribute::Depth: return ctx.frag_depth; default: - throw NotImplementedException("Write attribute {}", attr); + UNREACHABLE_MSG("Write attribute {}", attr); } } @@ -111,7 +111,7 @@ std::pair OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr case IR::Attribute::Depth: return {ctx.F32[1], false}; default: - throw NotImplementedException("Write attribute {}", attr); + UNREACHABLE_MSG("Write attribute {}", attr); } } } // Anonymous namespace @@ -159,81 +159,61 @@ Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) { return result; } -static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) { - if (IR::IsPosition(attr)) { - ASSERT(attr == IR::Attribute::Position0); - const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); - const auto pointer{ - ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index), ctx.ConstU32(0u))}; - const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); - } - - if (IR::IsParam(attr)) { - const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)}; - const auto param = ctx.input_params.at(param_id).id; - const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); - const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))}; - const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); - } - UNREACHABLE(); -} - Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) { - if (ctx.info.l_stage == LogicalStage::Geometry) { - return EmitGetAttributeForGeometry(ctx, attr, comp, index); - } else if (ctx.info.l_stage == LogicalStage::TessellationControl || - ctx.info.l_stage == LogicalStage::TessellationEval) { - if (IR::IsTessCoord(attr)) { - const u32 component = attr == IR::Attribute::TessellationEvaluationPointU ? 0 : 1; - const auto component_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); - const auto pointer{ - ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))}; - return ctx.OpLoad(ctx.F32[1], pointer); - } - UNREACHABLE(); - } - if (IR::IsParam(attr)) { const u32 param_index{u32(attr) - u32(IR::Attribute::Param0)}; const auto& param{ctx.input_params.at(param_index)}; - Id result; - if (param.is_loaded) { - // Attribute is either default or manually interpolated. The id points to an already - // loaded vector. - result = ctx.OpCompositeExtract(param.component_type, param.id, comp); - } else if (param.num_components > 1) { - // Attribute is a vector and we need to access a specific component. - const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))}; - result = ctx.OpLoad(param.component_type, pointer); - } else { - // Attribute is a single float or interger, simply load it. - result = ctx.OpLoad(param.component_type, param.id); - } - if (param.is_integer) { - result = ctx.OpBitcast(ctx.F32[1], result); - } - return result; + const Id value = [&] { + if (param.is_array) { + ASSERT(param.num_components > 1); + if (param.is_loaded) { + return ctx.OpCompositeExtract(param.component_type, param.id_array[index], + comp); + } else { + return ctx.OpLoad(param.component_type, + ctx.OpAccessChain(param.pointer_type, param.id, + ctx.ConstU32(index), ctx.ConstU32(comp))); + } + } else { + ASSERT(!param.is_loaded); + if (param.num_components > 1) { + return ctx.OpLoad( + param.component_type, + ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))); + } else { + return ctx.OpLoad(param.component_type, param.id); + } + } + }(); + return param.is_integer ? ctx.OpBitcast(ctx.F32[1], value) : value; + } + if (IR::IsBarycentricCoord(attr) && ctx.profile.supports_fragment_shader_barycentric) { + ++comp; } - switch (attr) { - case IR::Attribute::FragCoord: { - const Id coord = ctx.OpLoad( - ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.frag_coord, ctx.ConstU32(comp))); - if (comp == 3) { - return ctx.OpFDiv(ctx.F32[1], ctx.ConstF32(1.f), coord); - } - return coord; - } + case IR::Attribute::Position0: + ASSERT(ctx.l_stage == LogicalStage::Geometry); + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.gl_in, ctx.ConstU32(index), + ctx.ConstU32(0U), ctx.ConstU32(comp))); + case IR::Attribute::FragCoord: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.frag_coord, ctx.ConstU32(comp))); case IR::Attribute::TessellationEvaluationPointU: return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value)); case IR::Attribute::TessellationEvaluationPointV: return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.ConstU32(1U))); + case IR::Attribute::BaryCoordSmooth: + return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.bary_coord_smooth, + ctx.ConstU32(comp))); + case IR::Attribute::BaryCoordSmoothSample: + return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.bary_coord_smooth_sample, + ctx.ConstU32(comp))); + case IR::Attribute::BaryCoordNoPersp: + return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.bary_coord_nopersp, + ctx.ConstU32(comp))); default: UNREACHABLE_MSG("Read attribute {}", attr); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index fe7bd3356..70a44cbe4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -9,7 +9,7 @@ namespace Shader::Backend::SPIRV { void EmitPrologue(EmitContext& ctx) { if (ctx.stage == Stage::Fragment) { - ctx.DefineInterpolatedAttribs(); + ctx.DefineAmdPerVertexAttribs(); } if (ctx.info.loads.Get(IR::Attribute::WorkgroupIndex)) { ctx.DefineWorkgroupIndex(); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index e16bba755..f373808d9 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -196,14 +196,15 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) { } EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id, - u32 num_components, bool output) { + u32 num_components, bool output, + bool loaded, bool array) { switch (GetNumberClass(fmt)) { case AmdGpu::NumberClass::Float: - return {id, output ? output_f32 : input_f32, F32[1], num_components, false}; + return {id, output ? output_f32 : input_f32, F32[1], num_components, false, loaded, array}; case AmdGpu::NumberClass::Uint: - return {id, output ? output_u32 : input_u32, U32[1], num_components, true}; + return {id, output ? output_u32 : input_u32, U32[1], num_components, true, loaded, array}; case AmdGpu::NumberClass::Sint: - return {id, output ? output_s32 : input_s32, S32[1], num_components, true}; + return {id, output ? output_s32 : input_s32, S32[1], num_components, true, loaded, array}; default: break; } @@ -298,33 +299,24 @@ void EmitContext::DefineBufferProperties() { } } -void EmitContext::DefineInterpolatedAttribs() { - if (!profile.needs_manual_interpolation) { +void EmitContext::DefineAmdPerVertexAttribs() { + if (!profile.supports_amd_shader_explicit_vertex_parameter) { return; } - // Iterate all input attributes, load them and manually interpolate. for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) { const auto& input = runtime_info.fs_info.inputs[i]; - auto& params = input_params[i]; - if (input.is_flat || params.is_loaded) { + if (input.IsDefault() || info.fs_interpolation[i].primary != Qualifier::PerVertex) { continue; } - const Id p_array{OpLoad(TypeArray(F32[4], ConstU32(3U)), params.id)}; - const Id p0{OpCompositeExtract(F32[4], p_array, 0U)}; - const Id p1{OpCompositeExtract(F32[4], p_array, 1U)}; - const Id p2{OpCompositeExtract(F32[4], p_array, 2U)}; - const Id p10{OpFSub(F32[4], p1, p0)}; - const Id p20{OpFSub(F32[4], p2, p0)}; - const Id bary_coord{OpLoad(F32[3], IsLinear(info.interp_qualifiers[i]) - ? bary_coord_linear_id - : bary_coord_persp_id)}; - const Id bary_coord_y{OpCompositeExtract(F32[1], bary_coord, 1)}; - const Id bary_coord_z{OpCompositeExtract(F32[1], bary_coord, 2)}; - const Id p10_y{OpVectorTimesScalar(F32[4], p10, bary_coord_y)}; - const Id p20_z{OpVectorTimesScalar(F32[4], p20, bary_coord_z)}; - params.id = OpFAdd(F32[4], p0, OpFAdd(F32[4], p10_y, p20_z)); - Name(params.id, fmt::format("fs_in_attr{}", i)); - params.is_loaded = true; + auto& param = input_params[i]; + const Id pointer = param.id; + param.id_array[0] = + OpInterpolateAtVertexAMD(F32[param.num_components], pointer, ConstU32(0U)); + param.id_array[1] = + OpInterpolateAtVertexAMD(F32[param.num_components], pointer, ConstU32(1U)); + param.id_array[2] = + OpInterpolateAtVertexAMD(F32[param.num_components], pointer, ConstU32(2U)); + param.is_loaded = true; } } @@ -342,21 +334,6 @@ void EmitContext::DefineWorkgroupIndex() { Name(workgroup_index_id, "workgroup_index"); } -Id MakeDefaultValue(EmitContext& ctx, u32 default_value) { - switch (default_value) { - case 0: - return ctx.ConstF32(0.f, 0.f, 0.f, 0.f); - case 1: - return ctx.ConstF32(0.f, 0.f, 0.f, 1.f); - case 2: - return ctx.ConstF32(1.f, 1.f, 1.f, 0.f); - case 3: - return ctx.ConstF32(1.f, 1.f, 1.f, 1.f); - default: - UNREACHABLE(); - } -} - void EmitContext::DefineInputs() { if (info.uses_lane_id) { subgroup_local_invocation_id = DefineVariable( @@ -398,49 +375,71 @@ void EmitContext::DefineInputs() { front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input); } - if (profile.needs_manual_interpolation) { - if (info.has_perspective_interp) { - bary_coord_persp_id = + if (info.loads.GetAny(IR::Attribute::BaryCoordSmooth)) { + if (profile.supports_amd_shader_explicit_vertex_parameter) { + bary_coord_smooth = DefineVariable(F32[2], spv::BuiltIn::BaryCoordSmoothAMD, + spv::StorageClass::Input); + } else if (profile.supports_fragment_shader_barycentric) { + bary_coord_smooth = DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input); + } else { + bary_coord_smooth = ConstF32(0.f, 0.f); } - if (info.has_linear_interp) { - bary_coord_linear_id = DefineVariable(F32[3], spv::BuiltIn::BaryCoordNoPerspKHR, - spv::StorageClass::Input); + } + if (info.loads.GetAny(IR::Attribute::BaryCoordSmoothSample)) { + if (profile.supports_amd_shader_explicit_vertex_parameter) { + bary_coord_smooth_sample = DefineVariable( + F32[2], spv::BuiltIn::BaryCoordSmoothSampleAMD, spv::StorageClass::Input); + } else if (profile.supports_fragment_shader_barycentric) { + bary_coord_smooth_sample = + DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input); + // Decorate(bary_coord_smooth_sample, spv::Decoration::Sample); + } else { + bary_coord_smooth_sample = ConstF32(0.f, 0.f); + } + } + if (info.loads.GetAny(IR::Attribute::BaryCoordNoPersp)) { + if (profile.supports_amd_shader_explicit_vertex_parameter) { + bary_coord_nopersp = DefineVariable(F32[2], spv::BuiltIn::BaryCoordNoPerspAMD, + spv::StorageClass::Input); + } else if (profile.supports_fragment_shader_barycentric) { + bary_coord_nopersp = DefineVariable(F32[3], spv::BuiltIn::BaryCoordNoPerspKHR, + spv::StorageClass::Input); + } else { + bary_coord_nopersp = ConstF32(0.f, 0.f); } } for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) { const auto& input = runtime_info.fs_info.inputs[i]; if (input.IsDefault()) { - input_params[i] = { - .id = MakeDefaultValue(*this, input.default_value), - .pointer_type = input_f32, - .component_type = F32[1], - .num_components = 4, - .is_integer = false, - .is_loaded = true, - }; continue; } - const IR::Attribute param{IR::Attribute::Param0 + i}; + const IR::Attribute param = IR::Attribute::Param0 + i; const u32 num_components = info.loads.NumComponents(param); - const Id type{F32[num_components]}; - Id attr_id{}; - if (profile.needs_manual_interpolation && !input.is_flat) { - attr_id = DefineInput(TypeArray(type, ConstU32(3U)), input.param_index); - Decorate(attr_id, spv::Decoration::PerVertexKHR); - Name(attr_id, fmt::format("fs_in_attr{}_p", i)); - } else { - attr_id = DefineInput(type, input.param_index); - Name(attr_id, fmt::format("fs_in_attr{}", i)); - - if (input.is_flat) { - Decorate(attr_id, spv::Decoration::Flat); - } else if (IsLinear(info.interp_qualifiers[i])) { - Decorate(attr_id, spv::Decoration::NoPerspective); + const auto [primary, auxiliary] = info.fs_interpolation[i]; + const Id type = F32[num_components]; + const Id attr_id = [&] { + if (primary == Qualifier::PerVertex && + profile.supports_fragment_shader_barycentric) { + return Name(DefineInput(TypeArray(type, ConstU32(3U)), input.param_index), + fmt::format("fs_in_attr{}_p", i)); } + return Name(DefineInput(type, input.param_index), fmt::format("fs_in_attr{}", i)); + }(); + if (primary == Qualifier::PerVertex) { + Decorate(attr_id, profile.supports_amd_shader_explicit_vertex_parameter + ? spv::Decoration::ExplicitInterpAMD + : spv::Decoration::PerVertexKHR); + } else if (primary != Qualifier::Smooth) { + Decorate(attr_id, primary == Qualifier::Flat ? spv::Decoration::Flat + : spv::Decoration::NoPerspective); } - input_params[i] = - GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false); + if (auxiliary != Qualifier::None) { + Decorate(attr_id, auxiliary == Qualifier::Centroid ? spv::Decoration::Centroid + : spv::Decoration::Sample); + } + input_params[i] = GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, + false, false, primary == Qualifier::PerVertex); } break; case LogicalStage::Compute: @@ -461,17 +460,16 @@ void EmitContext::DefineInputs() { case LogicalStage::Geometry: { primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input); const auto gl_per_vertex = - Name(TypeStruct(TypeVector(F32[1], 4), F32[1], TypeArray(F32[1], ConstU32(1u))), - "gl_PerVertex"); + Name(TypeStruct(F32[4], F32[1], TypeArray(F32[1], ConstU32(1u))), "gl_PerVertex"); MemberName(gl_per_vertex, 0, "gl_Position"); MemberName(gl_per_vertex, 1, "gl_PointSize"); MemberName(gl_per_vertex, 2, "gl_ClipDistance"); MemberDecorate(gl_per_vertex, 0, spv::Decoration::BuiltIn, - static_cast(spv::BuiltIn::Position)); + static_cast(spv::BuiltIn::Position)); MemberDecorate(gl_per_vertex, 1, spv::Decoration::BuiltIn, - static_cast(spv::BuiltIn::PointSize)); + static_cast(spv::BuiltIn::PointSize)); MemberDecorate(gl_per_vertex, 2, spv::Decoration::BuiltIn, - static_cast(spv::BuiltIn::ClipDistance)); + static_cast(spv::BuiltIn::ClipDistance)); Decorate(gl_per_vertex, spv::Decoration::Block); const auto num_verts_in = NumVertices(runtime_info.gs_info.in_primitive); const auto vertices_in = TypeArray(gl_per_vertex, ConstU32(num_verts_in)); @@ -483,7 +481,8 @@ void EmitContext::DefineInputs() { const Id type{TypeArray(F32[4], ConstU32(num_verts_in))}; const Id id{DefineInput(type, param_id)}; Name(id, fmt::format("gs_in_attr{}", param_id)); - input_params[param_id] = {id, input_f32, F32[1], 4}; + input_params[param_id] = + GetAttributeInfo(AmdGpu::NumberFormat::Float, id, 4, false, false, true); } break; } @@ -665,7 +664,7 @@ void EmitContext::DefineOutputs() { for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) { const Id id{DefineOutput(F32[4], attr_id)}; Name(id, fmt::format("out_attr{}", attr_id)); - output_params[attr_id] = {id, output_f32, F32[1], 4u}; + output_params[attr_id] = GetAttributeInfo(AmdGpu::NumberFormat::Float, id, 4, true); } break; } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 186925706..f57dbebd8 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -45,7 +45,7 @@ public: Id Def(const IR::Value& value); void DefineBufferProperties(); - void DefineInterpolatedAttribs(); + void DefineAmdPerVertexAttribs(); void DefineWorkgroupIndex(); [[nodiscard]] Id DefineInput(Id type, std::optional location = std::nullopt, @@ -279,8 +279,9 @@ public: Id shared_memory_u32_type{}; Id shared_memory_u64_type{}; - Id bary_coord_persp_id{}; - Id bary_coord_linear_id{}; + Id bary_coord_smooth{}; + Id bary_coord_smooth_sample{}; + Id bary_coord_nopersp{}; struct TextureDefinition { const VectorIds* data_types; @@ -355,12 +356,16 @@ public: Id sampler_pointer_type{}; struct SpirvAttribute { - Id id; + union { + Id id; + std::array id_array; + }; Id pointer_type; Id component_type; u32 num_components; bool is_integer{}; bool is_loaded{}; + bool is_array{}; }; Id input_attr_array; Id output_attr_array; @@ -390,7 +395,7 @@ private: void DefineFunctions(); SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id, u32 num_components, - bool output); + bool output, bool loaded = false, bool array = false); BufferSpv DefineBuffer(bool is_storage, bool is_written, u32 elem_shift, BufferType buffer_type, Id data_type); diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 310ac9156..578c1f96a 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -21,50 +21,39 @@ namespace Shader::Gcn { -Translator::Translator(Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_) - : info{info_}, runtime_info{runtime_info_}, profile{profile_}, - next_vgpr_num{runtime_info.num_allocated_vgprs} { - if (info.l_stage == LogicalStage::Fragment) { - dst_frag_vreg = GatherInterpQualifiers(); +static IR::VectorReg IterateBarycentrics(const RuntimeInfo& runtime_info, auto&& set_attribute) { + if (runtime_info.stage != Stage::Fragment) { + return IR::VectorReg::V0; } -} - -IR::VectorReg Translator::GatherInterpQualifiers() { u32 dst_vreg{}; if (runtime_info.fs_info.addr_flags.persp_sample_ena) { - vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveSample; // I - vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveSample; // J - info.has_perspective_interp = true; + set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothSample, 0); // I + set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothSample, 1); // J } if (runtime_info.fs_info.addr_flags.persp_center_ena) { - vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCenter; // I - vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCenter; // J - info.has_perspective_interp = true; + set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmooth, 0); // I + set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmooth, 1); // J } if (runtime_info.fs_info.addr_flags.persp_centroid_ena) { - vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCentroid; // I - vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCentroid; // J - info.has_perspective_interp = true; + set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothCentroid, 0); // I + set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothCentroid, 1); // J } if (runtime_info.fs_info.addr_flags.persp_pull_model_ena) { - ++dst_vreg; // I/W - ++dst_vreg; // J/W - ++dst_vreg; // 1/W + set_attribute(dst_vreg++, IR::Attribute::BaryCoordPullModel, 0); // I/W + set_attribute(dst_vreg++, IR::Attribute::BaryCoordPullModel, 1); // J/W + set_attribute(dst_vreg++, IR::Attribute::BaryCoordPullModel, 2); // 1/W } if (runtime_info.fs_info.addr_flags.linear_sample_ena) { - vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearSample; // I - vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearSample; // J - info.has_linear_interp = true; + set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspSample, 0); // I + set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspSample, 1); // J } if (runtime_info.fs_info.addr_flags.linear_center_ena) { - vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCenter; // I - vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCenter; // J - info.has_linear_interp = true; + set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPersp, 0); // I + set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPersp, 1); // J } if (runtime_info.fs_info.addr_flags.linear_centroid_ena) { - vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCentroid; // I - vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCentroid; // J - info.has_linear_interp = true; + set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspCentroid, 0); // I + set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspCentroid, 1); // J } if (runtime_info.fs_info.addr_flags.line_stipple_tex_ena) { ++dst_vreg; @@ -72,6 +61,14 @@ IR::VectorReg Translator::GatherInterpQualifiers() { return IR::VectorReg(dst_vreg); } +Translator::Translator(Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_) + : info{info_}, runtime_info{runtime_info_}, profile{profile_}, + next_vgpr_num{runtime_info.num_allocated_vgprs} { + IterateBarycentrics(runtime_info, [this](u32 vreg, IR::Attribute attrib, u32) { + vgpr_to_interp[vreg] = attrib; + }); +} + void Translator::EmitPrologue(IR::Block* first_block) { ir = IR::IREmitter(*first_block, first_block->begin()); @@ -127,7 +124,10 @@ void Translator::EmitPrologue(IR::Block* first_block) { } break; case LogicalStage::Fragment: - dst_vreg = dst_frag_vreg; + dst_vreg = + IterateBarycentrics(runtime_info, [this](u32 vreg, IR::Attribute attrib, u32 comp) { + ir.SetVectorReg(IR::VectorReg(vreg), ir.GetAttribute(attrib, comp)); + }); if (runtime_info.fs_info.addr_flags.pos_x_float_ena) { if (runtime_info.fs_info.en_flags.pos_x_float_ena) { ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 0)); @@ -151,7 +151,8 @@ void Translator::EmitPrologue(IR::Block* first_block) { } if (runtime_info.fs_info.addr_flags.pos_w_float_ena) { if (runtime_info.fs_info.en_flags.pos_w_float_ena) { - ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 3)); + ir.SetVectorReg(dst_vreg++, + ir.FPRecip(ir.GetAttribute(IR::Attribute::FragCoord, 3))); } else { ir.SetVectorReg(dst_vreg++, ir.Imm32(0.0f)); } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 4b5ff827b..a29bdc993 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -265,6 +265,7 @@ public: // Vector interpolation // VINTRP + void V_INTERP_P1_F32(const GcnInst& inst); void V_INTERP_P2_F32(const GcnInst& inst); void V_INTERP_MOV_F32(const GcnInst& inst); @@ -323,7 +324,6 @@ private: void LogMissingOpcode(const GcnInst& inst); IR::VectorReg GetScratchVgpr(u32 offset); - IR::VectorReg GatherInterpQualifiers(); private: IR::IREmitter ir; @@ -332,8 +332,7 @@ private: const Profile& profile; u32 next_vgpr_num; std::unordered_map vgpr_map; - std::array vgpr_to_interp{}; - IR::VectorReg dst_frag_vreg{}; + std::array vgpr_to_interp{}; bool opcode_missing = false; }; diff --git a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp index 5a287dbe2..c32e80815 100644 --- a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp +++ b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp @@ -5,11 +5,32 @@ namespace Shader::Gcn { +using Interpolation = Info::Interpolation; + +static Interpolation GetInterpolation(IR::Attribute attribute) { + switch (attribute) { + case IR::Attribute::BaryCoordNoPersp: + return {Qualifier::NoPerspective, Qualifier::None}; + case IR::Attribute::BaryCoordNoPerspCentroid: + return {Qualifier::NoPerspective, Qualifier::Centroid}; + case IR::Attribute::BaryCoordNoPerspSample: + return {Qualifier::NoPerspective, Qualifier::Sample}; + case IR::Attribute::BaryCoordSmooth: + return {Qualifier::Smooth, Qualifier::None}; + case IR::Attribute::BaryCoordSmoothCentroid: + return {Qualifier::Smooth, Qualifier::Centroid}; + case IR::Attribute::BaryCoordSmoothSample: + return {Qualifier::Smooth, Qualifier::Sample}; + default: + UNREACHABLE_MSG("Unhandled barycentric attribute {}", NameOf(attribute)); + } +} + void Translator::EmitVectorInterpolation(const GcnInst& inst) { switch (inst.opcode) { // VINTRP case Opcode::V_INTERP_P1_F32: - return; + return V_INTERP_P1_F32(inst); case Opcode::V_INTERP_P2_F32: return V_INTERP_P2_F32(inst); case Opcode::V_INTERP_MOV_F32: @@ -21,19 +42,57 @@ void Translator::EmitVectorInterpolation(const GcnInst& inst) { // VINTRP +void Translator::V_INTERP_P1_F32(const GcnInst& inst) { + if (!profile.needs_manual_interpolation) { + return; + } + // VDST = P10 * VSRC + P0 + const u32 attr_index = inst.control.vintrp.attr; + const IR::Attribute attrib = IR::Attribute::Param0 + attr_index; + const IR::F32 p0 = ir.GetAttribute(attrib, inst.control.vintrp.chan, 0); + const IR::F32 p1 = ir.GetAttribute(attrib, inst.control.vintrp.chan, 1); + const IR::F32 i = GetSrc(inst.src[0]); + const IR::F32 result = ir.FPFma(ir.FPSub(p1, p0), i, p0); + SetDst(inst.dst[0], result); +} + void Translator::V_INTERP_P2_F32(const GcnInst& inst) { const u32 attr_index = inst.control.vintrp.attr; - const auto& attr = runtime_info.fs_info.inputs.at(attr_index); - info.interp_qualifiers[attr_index] = vgpr_to_interp[inst.src[0].code]; - const IR::Attribute attrib{IR::Attribute::Param0 + attr_index}; - SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan)); + const IR::Attribute attrib = IR::Attribute::Param0 + attr_index; + const auto& attr = runtime_info.fs_info.inputs[attr_index]; + auto& interp = info.fs_interpolation[attr_index]; + ASSERT(!attr.IsDefault() && !attr.is_flat); + if (!profile.needs_manual_interpolation) { + interp = GetInterpolation(vgpr_to_interp[inst.src[0].code]); + SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan)); + return; + } + // VDST = P20 * VSRC + VDST + const IR::F32 p0 = ir.GetAttribute(attrib, inst.control.vintrp.chan, 0); + const IR::F32 p2 = ir.GetAttribute(attrib, inst.control.vintrp.chan, 2); + const IR::F32 j = GetSrc(inst.src[0]); + const IR::F32 result = ir.FPFma(ir.FPSub(p2, p0), j, GetSrc(inst.dst[0])); + interp.primary = Qualifier::PerVertex; + SetDst(inst.dst[0], result); } void Translator::V_INTERP_MOV_F32(const GcnInst& inst) { const u32 attr_index = inst.control.vintrp.attr; - const auto& attr = runtime_info.fs_info.inputs.at(attr_index); - const IR::Attribute attrib{IR::Attribute::Param0 + attr_index}; - SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan)); + const IR::Attribute attrib = IR::Attribute::Param0 + attr_index; + const auto& attr = runtime_info.fs_info.inputs[attr_index]; + auto& interp = info.fs_interpolation[attr_index]; + ASSERT(attr.is_flat); + if (profile.supports_amd_shader_explicit_vertex_parameter || + (profile.supports_fragment_shader_barycentric && + !profile.has_incomplete_fragment_shader_barycentric)) { + // VSRC 0=P10, 1=P20, 2=P0 + interp.primary = Qualifier::PerVertex; + SetDst(inst.dst[0], + ir.GetAttribute(attrib, inst.control.vintrp.chan, (inst.src[0].code + 1) % 3)); + } else { + interp.primary = Qualifier::Flat; + SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan)); + } } } // namespace Shader::Gcn diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 6e12c6816..bb5c88584 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -1,5 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later + #pragma once #include @@ -135,6 +136,16 @@ struct PushData { static_assert(sizeof(PushData) <= 128, "PushData size is greater than minimum size guaranteed by Vulkan spec"); +enum class Qualifier : u8 { + None, + Smooth, + NoPerspective, + PerVertex, + Flat, + Centroid, + Sample, +}; + /** * Contains general information generated by the shader recompiler for an input program. */ @@ -194,7 +205,11 @@ struct Info { PersistentSrtInfo srt_info; std::vector flattened_ud_buf; - std::array interp_qualifiers{}; + struct Interpolation { + Qualifier primary; + Qualifier auxiliary; + }; + std::array fs_interpolation{}; IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max; s32 tess_consts_dword_offset = -1; @@ -209,8 +224,6 @@ struct Info { bool has_discard{}; bool has_image_gather{}; bool has_image_query{}; - bool has_perspective_interp{}; - bool has_linear_interp{}; bool uses_buffer_atomic_float_min_max{}; bool uses_image_atomic_float_min_max{}; bool uses_lane_id{}; diff --git a/src/shader_recompiler/ir/attribute.cpp b/src/shader_recompiler/ir/attribute.cpp index b2f11d141..094c34ee8 100644 --- a/src/shader_recompiler/ir/attribute.cpp +++ b/src/shader_recompiler/ir/attribute.cpp @@ -130,6 +130,20 @@ std::string NameOf(Attribute attribute) { return "LocalInvocationIndex"; case Attribute::FragCoord: return "FragCoord"; + case Attribute::BaryCoordNoPersp: + return "BaryCoordNoPersp"; + case Attribute::BaryCoordNoPerspCentroid: + return "BaryCoordNoPerspCentroid"; + case Attribute::BaryCoordNoPerspSample: + return "BaryCoordNoPerspSample"; + case Attribute::BaryCoordSmooth: + return "BaryCoordSmooth"; + case Attribute::BaryCoordSmoothCentroid: + return "BaryCoordSmoothCentroid"; + case Attribute::BaryCoordSmoothSample: + return "BaryCoordSmoothSample"; + case Attribute::BaryCoordPullModel: + return "BaryCoordPullModel"; case Attribute::InvocationId: return "InvocationId"; case Attribute::PatchVertices: diff --git a/src/shader_recompiler/ir/attribute.h b/src/shader_recompiler/ir/attribute.h index b6b1c8b59..00ec6c4b3 100644 --- a/src/shader_recompiler/ir/attribute.h +++ b/src/shader_recompiler/ir/attribute.h @@ -73,24 +73,21 @@ enum class Attribute : u64 { LocalInvocationId = 76, LocalInvocationIndex = 77, FragCoord = 78, - InvocationId = 81, // TCS id in output patch and instanced geometry shader id - PatchVertices = 82, - TessellationEvaluationPointU = 83, - TessellationEvaluationPointV = 84, - PackedHullInvocationInfo = 85, // contains patch id within the VGT and invocation ID + BaryCoordNoPersp = 79, + BaryCoordNoPerspCentroid = 80, + BaryCoordNoPerspSample = 81, + BaryCoordSmooth = 82, + BaryCoordSmoothCentroid = 83, + BaryCoordSmoothSample = 84, + BaryCoordPullModel = 85, + InvocationId = 86, // TCS id in output patch and instanced geometry shader id + PatchVertices = 87, + TessellationEvaluationPointU = 88, + TessellationEvaluationPointV = 89, + PackedHullInvocationInfo = 90, // contains patch id within the VGT and invocation ID Max, }; -enum class Interpolation { - Invalid = 0, - PerspectiveSample = 1, - PerspectiveCenter = 2, - PerspectiveCentroid = 3, - LinearSample = 4, - LinearCenter = 5, - LinearCentroid = 6, -}; - constexpr size_t NumAttributes = static_cast(Attribute::Max); constexpr size_t NumRenderTargets = 8; constexpr size_t NumParams = 32; @@ -112,13 +109,8 @@ constexpr bool IsMrt(Attribute attribute) noexcept { return attribute >= Attribute::RenderTarget0 && attribute <= Attribute::RenderTarget7; } -constexpr bool IsLinear(Interpolation interp) noexcept { - return interp >= Interpolation::LinearSample && interp <= Interpolation::LinearCentroid; -} - -constexpr bool IsPerspective(Interpolation interp) noexcept { - return interp >= Interpolation::PerspectiveSample && - interp <= Interpolation::PerspectiveCentroid; +constexpr bool IsBarycentricCoord(Attribute attribute) noexcept { + return attribute >= Attribute::BaryCoordSmooth && attribute <= Attribute::BaryCoordSmoothSample; } [[nodiscard]] std::string NameOf(Attribute attribute); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index ad36a2e13..d57e18ff0 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -10,16 +10,10 @@ namespace Shader { struct Profile { u32 supported_spirv{0x00010000}; u32 subgroup_size{}; - bool unified_descriptor_binding{}; - bool support_descriptor_aliasing{}; bool support_int8{}; bool support_int16{}; bool support_int64{}; bool support_float64{}; - bool support_vertex_instance_id{}; - bool support_float_controls{}; - bool support_separate_denorm_behavior{}; - bool support_separate_rounding_mode{}; bool support_fp32_denorm_preserve{}; bool support_fp32_denorm_flush{}; bool support_fp32_round_to_zero{}; @@ -33,6 +27,9 @@ struct Profile { bool supports_buffer_int64_atomics{}; bool supports_shared_int64_atomics{}; bool supports_workgroup_explicit_memory_layout{}; + bool supports_amd_shader_explicit_vertex_parameter{}; + bool supports_fragment_shader_barycentric{}; + bool has_incomplete_fragment_shader_barycentric{}; bool has_broken_spirv_clamp{}; bool lower_left_origin_mode{}; bool needs_manual_interpolation{}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 3c8332c10..4d89c83b2 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -137,7 +137,8 @@ GraphicsPipeline::GraphicsPipeline( const vk::PipelineMultisampleStateCreateInfo multisampling = { .rasterizationSamples = LiverpoolToVK::NumSamples(key.num_samples, instance.GetFramebufferSampleCounts()), - .sampleShadingEnable = false, + .sampleShadingEnable = + fs_info.addr_flags.persp_sample_ena || fs_info.addr_flags.linear_sample_ena, }; const vk::PipelineViewportDepthClipControlCreateInfoEXT clip_control = { diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index c0b138fad..119c0a367 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -273,7 +273,12 @@ bool Instance::CreateDevice() { depth_clip_enable = add_extension(VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME); vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME); - fragment_shader_barycentric = add_extension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME); + amd_shader_explicit_vertex_parameter = + add_extension(VK_AMD_SHADER_EXPLICIT_VERTEX_PARAMETER_EXTENSION_NAME); + if (!amd_shader_explicit_vertex_parameter) { + fragment_shader_barycentric = + add_extension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME); + } legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME); provoking_vertex = add_extension(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); shader_stencil_export = add_extension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME); diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index d96abfabe..9be2d9520 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -145,6 +145,11 @@ public: return fragment_shader_barycentric; } + /// Returns true when VK_AMD_shader_explicit_vertex_parameter is supported. + bool IsAmdShaderExplicitVertexParameterSupported() const { + return amd_shader_explicit_vertex_parameter; + } + /// Returns true when VK_EXT_primitive_topology_list_restart is supported. bool IsListRestartSupported() const { return list_restart; @@ -413,6 +418,7 @@ private: u32 queue_family_index{0}; bool custom_border_color{}; bool fragment_shader_barycentric{}; + bool amd_shader_explicit_vertex_parameter{}; bool depth_clip_control{}; bool depth_clip_enable{}; bool depth_range_unrestricted{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 31ede7936..4de8fd73b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -220,6 +220,12 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .supports_shared_int64_atomics = instance_.IsSharedInt64AtomicsSupported(), .supports_workgroup_explicit_memory_layout = instance_.IsWorkgroupMemoryExplicitLayoutSupported(), + .supports_amd_shader_explicit_vertex_parameter = + instance_.IsAmdShaderExplicitVertexParameterSupported(), + .supports_fragment_shader_barycentric = instance_.IsFragmentShaderBarycentricSupported(), + .has_incomplete_fragment_shader_barycentric = + instance_.IsFragmentShaderBarycentricSupported() && + instance.GetDriverID() == vk::DriverId::eMoltenvk, .needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() && instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, .needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary || From 6d6068e0e292e7457c3c55802f7dc4cccef1e226 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?= Date: Tue, 15 Jul 2025 20:39:37 +0200 Subject: [PATCH 08/12] Fix ff1_i32_b64 not accepting vcc as its argument (#3251) --- .../frontend/translate/scalar_alu.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index e3134c300..bb685d4bf 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "common/assert.h" #include "shader_recompiler/frontend/translate/translate.h" @@ -680,9 +681,18 @@ void Translator::S_FF1_I32_B32(const GcnInst& inst) { } void Translator::S_FF1_I32_B64(const GcnInst& inst) { - ASSERT(inst.src[0].field == OperandField::ScalarGPR); - const IR::U32 result{ - ir.BallotFindLsb(ir.Ballot(ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code))))}; + const auto src = [&] { + switch (inst.src[0].field) { + case OperandField::ScalarGPR: + return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code)); + case OperandField::VccLo: + return ir.GetVcc(); + default: + UNREACHABLE_MSG("unhandled operand type {}", magic_enum::enum_name(inst.src[0].field)); + } + }(); + const IR::U32 result{ir.BallotFindLsb(ir.Ballot(src))}; + SetDst(inst.dst[0], result); } From 83475ac8280f9cffd573701abe5b3d3f1f31b3d4 Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Tue, 15 Jul 2025 22:55:57 +0300 Subject: [PATCH 09/12] attribute: Correct bary coord function (#3253) --- src/shader_recompiler/ir/attribute.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/ir/attribute.h b/src/shader_recompiler/ir/attribute.h index 00ec6c4b3..58f28fb81 100644 --- a/src/shader_recompiler/ir/attribute.h +++ b/src/shader_recompiler/ir/attribute.h @@ -110,7 +110,8 @@ constexpr bool IsMrt(Attribute attribute) noexcept { } constexpr bool IsBarycentricCoord(Attribute attribute) noexcept { - return attribute >= Attribute::BaryCoordSmooth && attribute <= Attribute::BaryCoordSmoothSample; + return attribute >= Attribute::BaryCoordNoPersp && + attribute <= Attribute::BaryCoordSmoothSample; } [[nodiscard]] std::string NameOf(Attribute attribute); From a82698d60103b561385fb56a148eaa1dd8c12bc2 Mon Sep 17 00:00:00 2001 From: DanielSvoboda Date: Tue, 15 Jul 2025 18:15:59 -0300 Subject: [PATCH 10/12] qt: fix gui emulatorLanguage (#3250) --- src/qt_gui/settings_dialog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/qt_gui/settings_dialog.cpp b/src/qt_gui/settings_dialog.cpp index ed2a17e25..f903562f9 100644 --- a/src/qt_gui/settings_dialog.cpp +++ b/src/qt_gui/settings_dialog.cpp @@ -437,7 +437,7 @@ void SettingsDialog::LoadValuesFromConfig() { toml::find_or(data, "Settings", "consoleLanguage", 6))) % languageIndexes.size()); ui->emulatorLanguageComboBox->setCurrentIndex( - languages[toml::find_or(data, "GUI", "emulatorLanguage", "en_US")]); + languages[m_gui_settings->GetValue(gui::gen_guiLanguage).toString().toStdString()]); ui->hideCursorComboBox->setCurrentIndex(toml::find_or(data, "Input", "cursorState", 1)); OnCursorStateChanged(toml::find_or(data, "Input", "cursorState", 1)); ui->idleTimeoutSpinBox->setValue(toml::find_or(data, "Input", "cursorHideTimeout", 5)); From 6e350a50852fc6c39029d383064e2fa32c472168 Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Wed, 16 Jul 2025 01:28:03 +0300 Subject: [PATCH 11/12] Avoid clearing HTILE when shader contains address calculation (#3252) * resource_tracking: Mark image as written when its used with atomics * texture_cache: Remove meta registered flag Mostly useless and it is possible for images to switch metas * vk_rasterizer: Use xor as heuristic for HTILE clear --- src/shader_recompiler/frontend/instruction.h | 2 -- src/shader_recompiler/info.h | 1 + .../ir/passes/resource_tracking_pass.cpp | 5 +-- .../ir/passes/shader_info_collection_pass.cpp | 3 ++ .../renderer_vulkan/vk_rasterizer.cpp | 19 +++++++--- src/video_core/texture_cache/image.h | 7 ++-- .../texture_cache/texture_cache.cpp | 36 ++++++++----------- src/video_core/texture_cache/texture_cache.h | 4 +++ 8 files changed, 42 insertions(+), 35 deletions(-) diff --git a/src/shader_recompiler/frontend/instruction.h b/src/shader_recompiler/frontend/instruction.h index 7c2e0bd1e..f4e7bc9f2 100644 --- a/src/shader_recompiler/frontend/instruction.h +++ b/src/shader_recompiler/frontend/instruction.h @@ -3,8 +3,6 @@ #pragma once -#include -#include "common/bit_field.h" #include "shader_recompiler/frontend/opcodes.h" namespace Shader::Gcn { diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index bb5c88584..11dd9c05e 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -222,6 +222,7 @@ struct Info { VAddr pgm_base; bool has_storage_images{}; bool has_discard{}; + bool has_bitwise_xor{}; bool has_image_gather{}; bool has_image_query{}; bool uses_buffer_atomic_float_min_max{}; diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index e5a4beb8b..2cf39c98e 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -455,11 +455,12 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& // Read image sharp. const auto tsharp = TrackSharp(tsharp_handle, info); const auto inst_info = inst.Flags(); - const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite; + const bool is_atomic = IsImageAtomicInstruction(inst); + const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite || is_atomic; const ImageResource image_res = { .sharp_idx = tsharp, .is_depth = bool(inst_info.is_depth), - .is_atomic = IsImageAtomicInstruction(inst), + .is_atomic = is_atomic, .is_array = bool(inst_info.is_array), .is_written = is_written, .is_r128 = bool(inst_info.is_r128), diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index 079827866..8f0e61da2 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -95,6 +95,9 @@ void Visit(Info& info, const IR::Inst& inst) { case IR::Opcode::DiscardCond: info.has_discard = true; break; + case IR::Opcode::BitwiseXor32: + info.has_bitwise_xor = true; + break; case IR::Opcode::ImageGather: case IR::Opcode::ImageGatherDref: info.has_image_gather = true; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index b6130e873..c5f894b10 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -503,9 +503,13 @@ bool Rasterizer::IsComputeMetaClear(const Pipeline* pipeline) { return false; } + // Most of the time when a metadata is updated with a shader it gets cleared. It means + // we can skip the whole dispatch and update the tracked state instead. Also, it is not + // intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we + // will need its full emulation anyways. const auto& info = pipeline->GetStage(Shader::LogicalStage::Compute); - // Assume if a shader reads and writes metas at the same time, it is a copy shader. + // Assume if a shader reads metadata, it is a copy shader. for (const auto& desc : info.buffers) { const VAddr address = desc.GetSharp(info).base_address; if (!desc.IsSpecial() && !desc.is_written && texture_cache.IsMeta(address)) { @@ -513,10 +517,15 @@ bool Rasterizer::IsComputeMetaClear(const Pipeline* pipeline) { } } - // Most of the time when a metadata is updated with a shader it gets cleared. It means - // we can skip the whole dispatch and update the tracked state instead. Also, it is not - // intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we - // will need its full emulation anyways. + // Metadata surfaces are tiled and thus need address calculation to be written properly. + // If a shader wants to encode HTILE, for example, from a depth image it will have to compute + // proper tile address from dispatch invocation id. This address calculation contains an xor + // operation so use it as a heuristic for metadata writes that are probably not clears. + if (info.has_bitwise_xor) { + return false; + } + + // Assume if a shader writes metadata without address calculation, it is a clear shader. for (const auto& desc : info.buffers) { const VAddr address = desc.GetSharp(info).base_address; if (!desc.IsSpecial() && desc.is_written && texture_cache.ClearMeta(address)) { diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 31b67e021..2dbaff053 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -27,10 +27,9 @@ enum ImageFlagBits : u32 { CpuDirty = 1 << 1, ///< Contents have been modified from the CPU GpuDirty = 1 << 2, ///< Contents have been modified from the GPU (valid data in buffer cache) Dirty = MaybeCpuDirty | CpuDirty | GpuDirty, - GpuModified = 1 << 3, ///< Contents have been modified from the GPU - Registered = 1 << 6, ///< True when the image is registered - Picked = 1 << 7, ///< Temporary flag to mark the image as picked - MetaRegistered = 1 << 8, ///< True when metadata for this surface is known and registered + GpuModified = 1 << 3, ///< Contents have been modified from the GPU + Registered = 1 << 6, ///< True when the image is registered + Picked = 1 << 7, ///< Temporary flag to mark the image as picked }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 723b95892..a6657d8d9 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -508,20 +508,16 @@ ImageView& TextureCache::FindRenderTarget(BaseDesc& desc) { UpdateImage(image_id); // Register meta data for this color buffer - if (!(image.flags & ImageFlagBits::MetaRegistered)) { - if (desc.info.meta_info.cmask_addr) { - surface_metas.emplace(desc.info.meta_info.cmask_addr, - MetaDataInfo{.type = MetaDataInfo::Type::CMask}); - image.info.meta_info.cmask_addr = desc.info.meta_info.cmask_addr; - image.flags |= ImageFlagBits::MetaRegistered; - } + if (desc.info.meta_info.cmask_addr) { + surface_metas.emplace(desc.info.meta_info.cmask_addr, + MetaDataInfo{.type = MetaDataInfo::Type::CMask}); + image.info.meta_info.cmask_addr = desc.info.meta_info.cmask_addr; + } - if (desc.info.meta_info.fmask_addr) { - surface_metas.emplace(desc.info.meta_info.fmask_addr, - MetaDataInfo{.type = MetaDataInfo::Type::FMask}); - image.info.meta_info.fmask_addr = desc.info.meta_info.fmask_addr; - image.flags |= ImageFlagBits::MetaRegistered; - } + if (desc.info.meta_info.fmask_addr) { + surface_metas.emplace(desc.info.meta_info.fmask_addr, + MetaDataInfo{.type = MetaDataInfo::Type::FMask}); + image.info.meta_info.fmask_addr = desc.info.meta_info.fmask_addr; } return RegisterImageView(image_id, desc.view_info); @@ -536,15 +532,11 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) { UpdateImage(image_id); // Register meta data for this depth buffer - if (!(image.flags & ImageFlagBits::MetaRegistered)) { - if (desc.info.meta_info.htile_addr) { - surface_metas.emplace( - desc.info.meta_info.htile_addr, - MetaDataInfo{.type = MetaDataInfo::Type::HTile, - .clear_mask = image.info.meta_info.htile_clear_mask}); - image.info.meta_info.htile_addr = desc.info.meta_info.htile_addr; - image.flags |= ImageFlagBits::MetaRegistered; - } + if (desc.info.meta_info.htile_addr) { + surface_metas.emplace(desc.info.meta_info.htile_addr, + MetaDataInfo{.type = MetaDataInfo::Type::HTile, + .clear_mask = image.info.meta_info.htile_clear_mask}); + image.info.meta_info.htile_addr = desc.info.meta_info.htile_addr; } // If there is a stencil attachment, link depth and stencil. diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ff8ffb61c..9a9679c0a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -161,10 +161,12 @@ public: /// Registers an image view for provided image ImageView& RegisterImageView(ImageId image_id, const ImageViewInfo& view_info); + /// Returns true if the specified address is a metadata surface. bool IsMeta(VAddr address) const { return surface_metas.contains(address); } + /// Returns true if a slice of the specified metadata surface has been cleared. bool IsMetaCleared(VAddr address, u32 slice) const { const auto& it = surface_metas.find(address); if (it != surface_metas.end()) { @@ -173,6 +175,7 @@ public: return false; } + /// Clears all slices of the specified metadata surface. bool ClearMeta(VAddr address) { auto it = surface_metas.find(address); if (it != surface_metas.end()) { @@ -182,6 +185,7 @@ public: return false; } + /// Updates the state of a slice of the specified metadata surface. bool TouchMeta(VAddr address, u32 slice, bool is_clear) { auto it = surface_metas.find(address); if (it != surface_metas.end()) { From cf8a6efd3716ba9130e221767a88d2e542241005 Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Wed, 16 Jul 2025 01:54:56 +0300 Subject: [PATCH 12/12] liverpool_to_vk: Don't use remapped format for clear value (#3254) --- src/video_core/amdgpu/liverpool.h | 1 - src/video_core/renderer_vulkan/liverpool_to_vk.cpp | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index c517285fb..d693a0a38 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -1009,7 +1009,6 @@ struct Liverpool { return RemapSwizzle(info.format, mrt_swizzle); } - private: [[nodiscard]] NumberFormat GetFixedNumberFormat() const { // There is a small difference between T# and CB number types, account for it. return info.number_type == NumberFormat::SnormNz ? NumberFormat::Srgb diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index fd1a91260..cd597e16c 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -807,8 +807,8 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) { const auto comp_swizzle = color_buffer.Swizzle(); - const auto format = color_buffer.GetDataFmt(); - const auto number_type = color_buffer.GetNumberFmt(); + const auto format = color_buffer.info.format.Value(); + const auto number_type = color_buffer.GetFixedNumberFormat(); const auto& c0 = color_buffer.clear_word0; const auto& c1 = color_buffer.clear_word1;