diff --git a/CMakeLists.txt b/CMakeLists.txt index b057f55d6..ecab3e02d 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -664,6 +664,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/passes/constant_propagation_pass.cpp src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp + src/shader_recompiler/ir/passes/hull_shader_transform.cpp src/shader_recompiler/ir/passes/identity_removal_pass.cpp src/shader_recompiler/ir/passes/ir_passes.h src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp @@ -683,6 +684,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/opcodes.cpp src/shader_recompiler/ir/opcodes.h src/shader_recompiler/ir/opcodes.inc + src/shader_recompiler/ir/patch.cpp + src/shader_recompiler/ir/patch.h src/shader_recompiler/ir/post_order.cpp src/shader_recompiler/ir/post_order.h src/shader_recompiler/ir/program.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 23800fc49..9548cd5b0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -1,6 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later - +#pragma clang optimize off #include #include #include @@ -72,7 +72,10 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { return arg.VectorReg(); } else if constexpr (std::is_same_v) { return arg.StringLiteral(); + } else if constexpr (std::is_same_v) { + return arg.Patch(); } + UNREACHABLE(); } template @@ -206,6 +209,32 @@ Id DefineMain(EmitContext& ctx, const IR::Program& program) { return main; } +spv::ExecutionMode ExecutionMode(AmdGpu::TessellationType primitive) { + switch (primitive) { + case AmdGpu::TessellationType::Isoline: + return spv::ExecutionMode::Isolines; + case AmdGpu::TessellationType::Triangle: + return spv::ExecutionMode::Triangles; + case AmdGpu::TessellationType::Quad: + return spv::ExecutionMode::Quads; + } + UNREACHABLE_MSG("Tessellation primitive {}", primitive); +} + +spv::ExecutionMode ExecutionMode(AmdGpu::TessellationPartitioning spacing) { + switch (spacing) { + case AmdGpu::TessellationPartitioning::Integer: + return spv::ExecutionMode::SpacingEqual; + case AmdGpu::TessellationPartitioning::FracOdd: + return spv::ExecutionMode::SpacingFractionalOdd; + case AmdGpu::TessellationPartitioning::FracEven: + return spv::ExecutionMode::SpacingFractionalEven; + default: + break; + } + UNREACHABLE_MSG("Tessellation spacing {}", spacing); +} + void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ctx) { ctx.AddCapability(spv::Capability::Image1D); ctx.AddCapability(spv::Capability::Sampled1D); @@ -244,11 +273,12 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct if (info.uses_group_ballot) { ctx.AddCapability(spv::Capability::GroupNonUniformBallot); } - if (info.stage == Stage::Export || info.stage == Stage::Vertex) { + const auto stage = info.l_stage; + if (stage == LogicalStage::Vertex) { ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddCapability(spv::Capability::DrawParameters); } - if (info.stage == Stage::Geometry) { + if (stage == LogicalStage::Geometry) { ctx.AddCapability(spv::Capability::Geometry); } if (info.stage == Stage::Fragment && profile.needs_manual_interpolation) { @@ -257,23 +287,39 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct } } -void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { - const auto& info = program.info; +void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) { const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); spv::ExecutionModel execution_model{}; - switch (program.info.stage) { - case Stage::Compute: { + switch (info.l_stage) { + case LogicalStage::Compute: { const std::array workgroup_size{ctx.runtime_info.cs_info.workgroup_size}; execution_model = spv::ExecutionModel::GLCompute; ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], workgroup_size[2]); break; } - case Stage::Export: - case Stage::Vertex: + case LogicalStage::Vertex: execution_model = spv::ExecutionModel::Vertex; break; - case Stage::Fragment: + case LogicalStage::TessellationControl: + execution_model = spv::ExecutionModel::TessellationControl; + ctx.AddCapability(spv::Capability::Tessellation); + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, + ctx.runtime_info.hs_info.output_control_points); + break; + case LogicalStage::TessellationEval: { + execution_model = spv::ExecutionModel::TessellationEvaluation; + ctx.AddCapability(spv::Capability::Tessellation); + const auto& vs_info = ctx.runtime_info.vs_info; + ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_type)); + ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_partitioning)); + ctx.AddExecutionMode(main, + vs_info.tess_topology == AmdGpu::TessellationTopology::TriangleCcw + ? spv::ExecutionMode::VertexOrderCcw + : spv::ExecutionMode::VertexOrderCw); + break; + } + case LogicalStage::Fragment: execution_model = spv::ExecutionModel::Fragment; if (ctx.profile.lower_left_origin_mode) { ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft); @@ -288,7 +334,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); } break; - case Stage::Geometry: + case LogicalStage::Geometry: execution_model = spv::ExecutionModel::Geometry; ctx.AddExecutionMode(main, GetInputPrimitiveType(ctx.runtime_info.gs_info.in_primitive)); ctx.AddExecutionMode(main, @@ -299,7 +345,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { ctx.runtime_info.gs_info.num_invocations); break; default: - throw NotImplementedException("Stage {}", u32(program.info.stage)); + UNREACHABLE_MSG("Stage {}", u32(info.stage)); } ctx.AddEntryPoint(execution_model, main, "main", interfaces); } @@ -345,7 +391,7 @@ std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in const IR::Program& program, Bindings& binding) { EmitContext ctx{profile, runtime_info, program.info, binding}; const Id main{DefineMain(ctx, program)}; - DefineEntryPoint(program, ctx, main); + DefineEntryPoint(program.info, ctx, main); SetupCapabilities(program.info, profile, ctx); SetupFloatMode(ctx, profile, runtime_info, main); PatchPhiNodes(program, ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index d005169c4..dde7b4806 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" +#include "shader_recompiler/ir/patch.h" #include @@ -242,8 +243,14 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) { } return coord; } + case IR::Attribute::TessellationEvaluationPointU: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value)); + case IR::Attribute::TessellationEvaluationPointV: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.ConstU32(1U))); default: - throw NotImplementedException("Read attribute {}", attr); + UNREACHABLE_MSG("Read attribute {}", attr); } } @@ -269,7 +276,7 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) { ASSERT(ctx.info.stage == Stage::Geometry); return ctx.OpLoad(ctx.U32[1], ctx.primitive_id); default: - throw NotImplementedException("Read U32 attribute {}", attr); + UNREACHABLE_MSG("Read U32 attribute {}", attr); } } @@ -287,6 +294,42 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen } } +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) { + const u32 index{IR::GenericPatchIndex(patch)}; + const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))}; + const Id type{ctx.stage == Stage::Hull ? ctx.output_f32 : ctx.input_f32}; + const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)}; + return ctx.OpLoad(ctx.F32[1], pointer); +} + +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { + const Id pointer{[&] { + if (IR::IsGeneric(patch)) { + const u32 index{IR::GenericPatchIndex(patch)}; + const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))}; + return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element); + } + switch (patch) { + case IR::Patch::TessellationLodLeft: + case IR::Patch::TessellationLodRight: + case IR::Patch::TessellationLodTop: + case IR::Patch::TessellationLodBottom: { + const u32 index{static_cast(patch) - u32(IR::Patch::TessellationLodLeft)}; + const Id index_id{ctx.ConstU32(index)}; + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id); + } + case IR::Patch::TessellationLodInteriorU: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, + ctx.u32_zero_value); + case IR::Patch::TessellationLodInteriorV: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.ConstU32(1u)); + default: + UNREACHABLE_MSG("Patch {}", u32(patch)); + } + }()}; + ctx.OpStore(pointer, value); +} + template static Id EmitLoadBufferU32xN(EmitContext& ctx, u32 handle, Id address) { auto& buffer = ctx.buffers[handle]; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 4ff53670e..27f8e5a91 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -9,6 +9,7 @@ namespace Shader::IR { enum class Attribute : u64; enum class ScalarReg : u32; +enum class Patch : u64; class Inst; class Value; } // namespace Shader::IR @@ -88,6 +89,8 @@ Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index); Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); void EmitSetSampleMask(EmitContext& ctx, Id value); void EmitSetFragDepth(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 1ada2f1f9..874081fc9 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -34,7 +34,7 @@ std::string_view StageName(Stage stage) { case Stage::Compute: return "cs"; } - throw InvalidArgument("Invalid stage {}", u32(stage)); + UNREACHABLE_MSG("Invalid hw stage {}", u32(stage)); } static constexpr u32 NumVertices(AmdGpu::PrimitiveType type) { @@ -65,7 +65,7 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, const Info& info_, Bindings& binding_) : Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_}, - profile{profile_}, stage{info.stage}, binding{binding_} { + profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} { AddCapability(spv::Capability::Shader); DefineArithmeticTypes(); DefineInterfaces(); @@ -268,9 +268,8 @@ void EmitContext::DefineInputs() { U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input); Decorate(subgroup_local_invocation_id, spv::Decoration::Flat); } - switch (stage) { - case Stage::Export: - case Stage::Vertex: { + switch (l_stage) { + case LogicalStage::Vertex: { vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input); base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input); instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input); @@ -311,12 +310,11 @@ void EmitContext::DefineInputs() { } input_params[attrib.semantic] = GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false); - interfaces.push_back(id); } } break; } - case Stage::Fragment: + case LogicalStage::Fragment: frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input); frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output); front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input); @@ -351,15 +349,14 @@ void EmitContext::DefineInputs() { } input_params[semantic] = GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false); - interfaces.push_back(attr_id); } break; - case Stage::Compute: + case LogicalStage::Compute: workgroup_id = DefineVariable(U32[3], spv::BuiltIn::WorkgroupId, spv::StorageClass::Input); local_invocation_id = DefineVariable(U32[3], spv::BuiltIn::LocalInvocationId, spv::StorageClass::Input); break; - case Stage::Geometry: { + case LogicalStage::Geometry: { primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input); const auto gl_per_vertex = Name(TypeStruct(TypeVector(F32[1], 4), F32[1], TypeArray(F32[1], ConstU32(1u))), @@ -389,15 +386,18 @@ void EmitContext::DefineInputs() { } break; } + case LogicalStage::TessellationEval: { + tess_coord = DefineInput(F32[3], std::nullopt, spv::BuiltIn::TessCoord); + break; + } default: break; } } void EmitContext::DefineOutputs() { - switch (stage) { - case Stage::Export: - case Stage::Vertex: { + switch (l_stage) { + case LogicalStage::Vertex: { output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) || info.stores.Get(IR::Attribute::Position2) || @@ -419,11 +419,33 @@ void EmitContext::DefineOutputs() { Name(id, fmt::format("out_attr{}", i)); output_params[i] = GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true); - interfaces.push_back(id); } break; } - case Stage::Fragment: + case LogicalStage::TessellationControl: { + if (info.stores_tess_level_outer) { + const Id type{TypeArray(F32[1], ConstU32(4U))}; + output_tess_level_outer = + DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelOuter); + Decorate(output_tess_level_outer, spv::Decoration::Patch); + } + if (info.stores_tess_level_inner) { + const Id type{TypeArray(F32[1], ConstU32(2U))}; + output_tess_level_inner = + DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelInner); + Decorate(output_tess_level_inner, spv::Decoration::Patch); + } + for (size_t index = 0; index < 30; ++index) { + if (!(info.uses_patches & (1U << index))) { + continue; + } + const Id id{DefineOutput(F32[4], index)}; + Decorate(id, spv::Decoration::Patch); + patches[index] = id; + } + break; + } + case LogicalStage::Fragment: for (u32 i = 0; i < IR::NumRenderTargets; i++) { const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i}; if (!info.stores.GetAny(mrt)) { @@ -435,22 +457,22 @@ void EmitContext::DefineOutputs() { const Id id{DefineOutput(type, i)}; Name(id, fmt::format("frag_color{}", i)); frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true); - interfaces.push_back(id); } break; - case Stage::Geometry: { + case LogicalStage::Geometry: { output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) { const Id id{DefineOutput(F32[4], attr_id)}; Name(id, fmt::format("out_attr{}", attr_id)); output_params[attr_id] = {id, output_f32, F32[1], 4u}; - interfaces.push_back(id); } break; } - default: + case LogicalStage::Compute: break; + default: + UNREACHABLE(); } } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index cd1293328..ea2ca725f 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -46,14 +46,18 @@ public: void DefineBufferOffsets(); void DefineInterpolatedAttribs(); - [[nodiscard]] Id DefineInput(Id type, u32 location) { - const Id input_id{DefineVar(type, spv::StorageClass::Input)}; - Decorate(input_id, spv::Decoration::Location, location); + [[nodiscard]] Id DefineInput(Id type, std::optional location = std::nullopt, + std::optional builtin = std::nullopt) { + const Id input_id{DefineVariable(type, builtin, spv::StorageClass::Input)}; + if (location) { + Decorate(input_id, spv::Decoration::Location, *location); + } return input_id; } - [[nodiscard]] Id DefineOutput(Id type, std::optional location = std::nullopt) { - const Id output_id{DefineVar(type, spv::StorageClass::Output)}; + [[nodiscard]] Id DefineOutput(Id type, std::optional location = std::nullopt, + std::optional builtin = std::nullopt) { + const Id output_id{DefineVariable(type, builtin, spv::StorageClass::Output)}; if (location) { Decorate(output_id, spv::Decoration::Location, *location); } @@ -131,7 +135,8 @@ public: const Info& info; const RuntimeInfo& runtime_info; const Profile& profile; - Stage stage{}; + Stage stage; + LogicalStage l_stage{}; Id void_id{}; Id U8{}; @@ -188,6 +193,11 @@ public: Id clip_distances{}; Id cull_distances{}; + Id output_tess_level_outer{}; + Id output_tess_level_inner{}; + Id tess_coord; + std::array patches{}; + Id workgroup_id{}; Id local_invocation_id{}; Id subgroup_local_invocation_id{}; diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index 5914f9fe3..be5bf273e 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -1,6 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later - +#pragma clang optimize off #include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/ir/reg.h" diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 97978ff6b..d9e92cb78 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -34,9 +34,8 @@ void Translator::EmitPrologue() { } IR::VectorReg dst_vreg = IR::VectorReg::V0; - switch (info.stage) { - case Stage::Vertex: - case Stage::Export: + switch (info.l_stage) { + case LogicalStage::Vertex: // v0: vertex ID, always present ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId)); // v1: instance ID, step rate 0 @@ -122,7 +121,17 @@ void Translator::EmitPrologue() { } } break; - case Stage::Compute: + case LogicalStage::TessellationControl: + ir.SetVectorReg(IR::VectorReg::V0, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); + break; + case LogicalStage::TessellationEval: + ir.SetVectorReg(IR::VectorReg::V0, + ir.GetAttribute(IR::Attribute::TessellationEvaluationPointU)); + ir.SetVectorReg(IR::VectorReg::V1, + ir.GetAttribute(IR::Attribute::TessellationEvaluationPointV)); + ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); + break; + case LogicalStage::Compute: ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 0)); ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1)); ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2)); @@ -137,7 +146,7 @@ void Translator::EmitPrologue() { ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2)); } break; - case Stage::Geometry: + case LogicalStage::Geometry: switch (runtime_info.gs_info.out_primitive[0]) { case AmdGpu::GsOutputPrimitiveType::TriangleStrip: ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2 @@ -152,7 +161,7 @@ void Translator::EmitPrologue() { ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); break; default: - throw NotImplementedException("Unknown shader stage"); + UNREACHABLE_MSG("Unknown shader stage"); } } @@ -503,7 +512,8 @@ void Translate(IR::Block* block, u32 pc, std::span inst_list, Inf // Special case for emitting fetch shader. if (inst.opcode == Opcode::S_SWAPPC_B64) { - ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export); + ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export || + info.stage == Stage::Local); translator.EmitFetch(inst); continue; } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 2f320a6c7..069e2908c 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -217,7 +217,7 @@ public: // VOP3a void V_MAD_F32(const GcnInst& inst); - void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = false); + void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = true); void V_MAD_U32_U24(const GcnInst& inst); void V_CUBEID_F32(const GcnInst& inst); void V_CUBESC_F32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 3e9e677a7..2b32ca2ce 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -1060,8 +1060,14 @@ void Translator::V_CUBEMA_F32(const GcnInst& inst) { void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{ir.BitwiseAnd(GetSrc(inst.src[1]), ir.Imm32(0x1F))}; - const IR::U32 src2{ir.BitwiseAnd(GetSrc(inst.src[2]), ir.Imm32(0x1F))}; + IR::U32 src1{GetSrc(inst.src[1])}; + IR::U32 src2{GetSrc(inst.src[2])}; + if (!src1.IsImmediate()) { + src1 = ir.BitwiseAnd(src1, ir.Imm32(0x1F)); + } + if (!src2.IsImmediate()) { + src2 = ir.BitwiseAnd(src2, ir.Imm32(0x1F)); + } SetDst(inst.dst[0], ir.BitFieldExtract(src0, src1, src2, is_signed)); } diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 74b9c905d..636a473d1 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -187,7 +187,8 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst) buffer_info.index_enable.Assign(mtbuf.idxen); buffer_info.offset_enable.Assign(mtbuf.offen); buffer_info.inst_offset.Assign(mtbuf.offset); - buffer_info.ring_access.Assign(is_ring); + buffer_info.globally_coherent.Assign(mtbuf.glc); + buffer_info.system_coherent.Assign(mtbuf.slc); if (is_typed) { const auto dmft = static_cast(mtbuf.dfmt); const auto nfmt = static_cast(mtbuf.nfmt); @@ -245,7 +246,7 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst const IR::ScalarReg sharp{inst.src[2].code * 4}; const IR::Value soffset{GetSrc(inst.src[3])}; - if (info.stage != Stage::Export && info.stage != Stage::Geometry) { + if (info.stage != Stage::Export && info.stage != Stage::Hull && info.stage != Stage::Geometry) { ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); } @@ -267,7 +268,8 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst buffer_info.index_enable.Assign(mtbuf.idxen); buffer_info.offset_enable.Assign(mtbuf.offen); buffer_info.inst_offset.Assign(mtbuf.offset); - buffer_info.ring_access.Assign(is_ring); + buffer_info.globally_coherent.Assign(mtbuf.glc); + buffer_info.system_coherent.Assign(mtbuf.slc); if (is_typed) { const auto dmft = static_cast(mtbuf.dfmt); const auto nfmt = static_cast(mtbuf.nfmt); diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 494bbb4bb..53db1b5b0 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -163,6 +163,7 @@ struct Info { UserDataMask ud_mask{}; CopyShaderData gs_copy_data; + u32 uses_patches{}; BufferResourceList buffers; TextureBufferResourceList texture_buffers; @@ -175,6 +176,7 @@ struct Info { std::span user_data; Stage stage; + LogicalStage l_stage; u64 pgm_hash{}; VAddr pgm_base; @@ -190,14 +192,16 @@ struct Info { bool uses_shared{}; bool uses_fp16{}; bool uses_fp64{}; + bool stores_tess_level_outer{}; + bool stores_tess_level_inner{}; bool translation_failed{}; // indicates that shader has unsupported instructions bool has_readconst{}; u8 mrt_mask{0u}; bool has_fetch_shader{false}; u32 fetch_shader_sgpr_base{0u}; - explicit Info(Stage stage_, ShaderParams params) - : stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, + explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params) + : stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, user_data{params.user_data} {} template diff --git a/src/shader_recompiler/ir/attribute.cpp b/src/shader_recompiler/ir/attribute.cpp index e219dfb64..503144782 100644 --- a/src/shader_recompiler/ir/attribute.cpp +++ b/src/shader_recompiler/ir/attribute.cpp @@ -114,6 +114,10 @@ std::string NameOf(Attribute attribute) { return "LocalInvocationId"; case Attribute::LocalInvocationIndex: return "LocalInvocationIndex"; + case Attribute::TessellationEvaluationPointU: + return "TessellationEvaluationPointU"; + case Attribute::TessellationEvaluationPointV: + return "TessellationEvaluationPointV"; default: break; } diff --git a/src/shader_recompiler/ir/attribute.h b/src/shader_recompiler/ir/attribute.h index 0890e88f1..9b68fd119 100644 --- a/src/shader_recompiler/ir/attribute.h +++ b/src/shader_recompiler/ir/attribute.h @@ -74,6 +74,8 @@ enum class Attribute : u64 { FragCoord = 77, InstanceId0 = 78, // step rate 0 InstanceId1 = 79, // step rate 1 + TessellationEvaluationPointU = 80, + TessellationEvaluationPointV = 81, Max, }; diff --git a/src/shader_recompiler/ir/basic_block.cpp b/src/shader_recompiler/ir/basic_block.cpp index b4d1a78c7..a312eabde 100644 --- a/src/shader_recompiler/ir/basic_block.cpp +++ b/src/shader_recompiler/ir/basic_block.cpp @@ -94,6 +94,8 @@ static std::string ArgToIndex(std::map& inst_to_index, size return fmt::format("{}", arg.VectorReg()); case Type::Attribute: return fmt::format("{}", arg.Attribute()); + case Type::Patch: + return fmt::format("{}", arg.Patch()); default: return ""; } diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 29b406699..dda247050 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -278,6 +278,14 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp Inst(Opcode::SetAttribute, attribute, value, Imm32(comp)); } +F32 IREmitter::GetPatch(Patch patch) { + return Inst(Opcode::GetPatch, patch); +} + +void IREmitter::SetPatch(Patch patch, const F32& value) { + Inst(Opcode::SetPatch, patch, value); +} + Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) { switch (bit_size) { case 32: diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index f77e22b82..d13c6e935 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -11,6 +11,7 @@ #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/condition.h" #include "shader_recompiler/ir/value.h" +#include "shader_recompiler/ir/patch.h" namespace Shader::IR { @@ -84,6 +85,9 @@ public: [[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0); void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0); + [[nodiscard]] F32 GetPatch(Patch patch); + void SetPatch(Patch patch, const F32& value); + [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset); void WriteShared(int bit_size, const Value& value, const U32& offset); diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index 9b4ad63d2..7f36f44d7 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -52,6 +52,7 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::Discard: case Opcode::DiscardCond: case Opcode::SetAttribute: + case Opcode::SetPatch: case Opcode::StoreBufferU32: case Opcode::StoreBufferU32x2: case Opcode::StoreBufferU32x3: diff --git a/src/shader_recompiler/ir/opcodes.h b/src/shader_recompiler/ir/opcodes.h index be640297a..cd73ace7e 100644 --- a/src/shader_recompiler/ir/opcodes.h +++ b/src/shader_recompiler/ir/opcodes.h @@ -30,7 +30,7 @@ constexpr Type Opaque{Type::Opaque}; constexpr Type ScalarReg{Type::ScalarReg}; constexpr Type VectorReg{Type::VectorReg}; constexpr Type Attribute{Type::Attribute}; -constexpr Type SystemValue{Type::SystemValue}; +constexpr Type Patch{Type::Patch}; constexpr Type U1{Type::U1}; constexpr Type U8{Type::U8}; constexpr Type U16{Type::U16}; diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 8f40ed985..6004a03b5 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -60,6 +60,8 @@ OPCODE(SetGotoVariable, Void, U32, OPCODE(GetAttribute, F32, Attribute, U32, U32, ) OPCODE(GetAttributeU32, U32, Attribute, U32, ) OPCODE(SetAttribute, Void, Attribute, F32, U32, ) +OPCODE(GetPatch, F32, Patch, ) +OPCODE(SetPatch, Void, Patch, F32, ) // Flags OPCODE(GetScc, U1, Void, ) diff --git a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp new file mode 100644 index 000000000..bd1094792 --- /dev/null +++ b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp @@ -0,0 +1,157 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later +#pragma clang optimize off +#include "shader_recompiler/ir/ir_emitter.h" +#include "shader_recompiler/ir/program.h" + +namespace Shader::Optimization { + +/** + * Tessellation shaders pass outputs to the next shader using LDS. + * The Hull shader stage receives input control points stored in LDS. + * + * The LDS layout is: + * - TCS inputs for patch 0 + * - TCS inputs for patch 1 + * - TCS inputs for patch 2 + * - ... + * - TCS outputs for patch 0 + * - TCS outputs for patch 1 + * - TCS outputs for patch 2 + * - ... + * - Per-patch TCS outputs for patch 0 + * - Per-patch TCS outputs for patch 1 + * - Per-patch TCS outputs for patch 2 + * + * If the Hull stage does not write any new control points the driver will + * optimize LDS layout so input and output control point spaces overlap. + * + * Tessellation factors are stored in the per-patch TCS output block + * as well as a factor V# that is automatically bound by the driver. + * + * This pass attempts to resolve LDS accesses to attribute accesses and correctly + * write to the tessellation factor tables. For the latter we replace the + * buffer store instruction to factor writes according to their offset. + * + * LDS stores can either be output control point writes or per-patch data writes. + * This is detected by looking at how the address is formed. In any case the calculation + * will be of the form a * b + c. For output control points a = output_control_point_id + * while for per-patch writes a = patch_id. + * + * Both patch_id and output_control_point_id are packed in VGPR1 by the driver and shader + * uses V_BFE_U32 to extract them. We use the starting bit_pos to determine which is which. + * + * LDS reads are more tricky as amount of different calculations performed can vary. + * The final result, if output control point space is distinct, is of the form: + * patch_id * input_control_point_stride * num_control_points_per_input_patch + a + * The value "a" can be anything in the range of [0, input_control_point_stride] + * + * This pass does not attempt to deduce the exact attribute referenced by "a" but rather + * only using "a" itself index into input attributes. Those are defined as an array in the shader + * layout (location = 0) in vec4[num_control_points_per_input_patch] attr[]; + * ... + * float value = attr[a / in_stride][(a % in_stride) >> 4][(a & 0xF) >> 2]; + * + * This requires knowing in_stride which is not provided to us by the guest. + * To deduce it we perform a breadth first search on the arguments of a DS_READ* + * looking for a buffer load with offset = 0. This will be the buffer holding tessellation + * constants and it contains the value of in_stride we can read at compile time. + * + * NOTE: This pass must be run before constant propagation as it relies on relatively specific + * pattern matching that might be mutated that that optimization pass. + * + */ + +void HullShaderTransform(const IR::Program& program) { + LOG_INFO(Render_Vulkan, "{}", IR::DumpProgram(program)); + for (IR::Block* block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; + const auto opcode = inst.GetOpcode(); + switch (opcode) { + case IR::Opcode::StoreBufferU32: + case IR::Opcode::StoreBufferU32x2: + case IR::Opcode::StoreBufferU32x3: + case IR::Opcode::StoreBufferU32x4: { + const auto info = inst.Flags(); + if (!info.globally_coherent) { + break; + } + const auto GetValue = [&](IR::Value data) -> IR::F32 { + if (auto* inst = data.TryInstRecursive(); + inst && inst->GetOpcode() == IR::Opcode::BitCastU32F32) { + return IR::F32{inst->Arg(0)}; + } + return ir.BitCast(IR::U32{data}); + }; + const u32 num_dwords = u32(opcode) - u32(IR::Opcode::StoreBufferU32) + 1; + const auto factor_idx = info.inst_offset.Value() >> 2; + const IR::Value data = inst.Arg(2); + inst.Invalidate(); + if (num_dwords == 1) { + ir.SetPatch(IR::PatchFactor(factor_idx), GetValue(data)); + break; + } + auto* inst = data.TryInstRecursive(); + ASSERT(inst && (inst->GetOpcode() == IR::Opcode::CompositeConstructU32x2 || + inst->GetOpcode() == IR::Opcode::CompositeConstructU32x3 || + inst->GetOpcode() == IR::Opcode::CompositeConstructU32x4)); + for (s32 i = 0; i < num_dwords; i++) { + ir.SetPatch(IR::PatchFactor(factor_idx + i), GetValue(inst->Arg(i))); + } + break; + } + case IR::Opcode::WriteSharedU32: + case IR::Opcode::WriteSharedU64: { + const u32 num_dwords = opcode == IR::Opcode::WriteSharedU32 ? 1 : 2; + const IR::Value data = inst.Arg(1); + const auto [data_lo, data_hi] = [&] -> std::pair { + if (num_dwords == 1) { + return {IR::U32{data}, IR::U32{}}; + } + const auto* prod = data.InstRecursive(); + return {IR::U32{prod->Arg(0)}, IR::U32{prod->Arg(1)}}; + }(); + const IR::Inst* ds_offset = inst.Arg(0).InstRecursive(); + const u32 offset_dw = ds_offset->Arg(1).U32() >> 4; + IR::Inst* prod = ds_offset->Arg(0).TryInstRecursive(); + ASSERT(prod && (prod->GetOpcode() == IR::Opcode::IAdd32 || + prod->GetOpcode() == IR::Opcode::IMul32)); + if (prod->GetOpcode() == IR::Opcode::IAdd32) { + prod = prod->Arg(0).TryInstRecursive(); + ASSERT(prod && prod->GetOpcode() == IR::Opcode::IMul32); + } + prod = prod->Arg(0).TryInstRecursive(); + ASSERT(prod && prod->GetOpcode() == IR::Opcode::BitFieldSExtract && + prod->Arg(2).IsImmediate() && prod->Arg(2).U32() == 24); + prod = prod->Arg(0).TryInstRecursive(); + ASSERT(prod && prod->GetOpcode() == IR::Opcode::BitFieldUExtract); + const u32 bit_pos = prod->Arg(1).U32(); + const auto SetOutput = [&ir](IR::U32 value, u32 offset_dw, bool is_patch_const) { + const IR::F32 data = ir.BitCast(value); + if (!is_patch_const) { + const u32 param = offset_dw >> 2; + const u32 comp = offset_dw & 3; + ir.SetAttribute(IR::Attribute::Param0 + param, data, comp); + } else { + ir.SetPatch(IR::PatchGeneric(offset_dw), data); + } + }; + ASSERT_MSG(bit_pos == 0 || bit_pos == 8, "Unknown bit extract pos {}", bit_pos); + const bool is_patch_const = bit_pos == 0; + SetOutput(data_lo, offset_dw, is_patch_const); + if (num_dwords > 1) { + SetOutput(data_hi, offset_dw + 1, is_patch_const); + } + inst.Invalidate(); + break; + } + default: + break; + } + } + } + LOG_INFO(Render_Vulkan, "{}", IR::DumpProgram(program)); +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index 7bd47992c..3cb5e11a3 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -18,5 +18,6 @@ void CollectShaderInfoPass(IR::Program& program); void LowerSharedMemToRegisters(IR::Program& program); void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info, Stage stage); +void HullShaderTransform(const IR::Program& program); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp index eb1be2967..345bdbf31 100644 --- a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp +++ b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp @@ -23,12 +23,43 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim }; switch (stage) { + case Stage::Local: { + ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) { + const auto opcode = inst.GetOpcode(); + switch (opcode) { + case IR::Opcode::WriteSharedU64: { + u32 offset = 0; + const auto* addr = inst.Arg(0).InstRecursive(); + if (addr->GetOpcode() == IR::Opcode::IAdd32) { + ASSERT(addr->Arg(1).IsImmediate()); + offset = addr->Arg(1).U32(); + } + const IR::Inst* pair = inst.Arg(1).InstRecursive(); + for (s32 i = 0; i < 2; i++) { + const auto attrib = IR::Attribute::Param0 + (offset / 16); + const auto comp = (offset / 4) % 4; + const IR::U32 value = IR::U32{pair->Arg(i)}; + ir.SetAttribute(attrib, ir.BitCast(value), comp); + offset += 4; + } + inst.Invalidate(); + break; + } + case IR::Opcode::WriteSharedU32: + UNREACHABLE(); + default: + break; + } + }); + break; + } case Stage::Export: { ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) { const auto opcode = inst.GetOpcode(); switch (opcode) { case IR::Opcode::StoreBufferU32: { - if (!inst.Flags().ring_access) { + const auto info = inst.Flags(); + if (!info.system_coherent || !info.globally_coherent) { break; } @@ -61,7 +92,8 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim const auto opcode = inst.GetOpcode(); switch (opcode) { case IR::Opcode::LoadBufferU32: { - if (!inst.Flags().ring_access) { + const auto info = inst.Flags(); + if (!info.system_coherent || !info.globally_coherent) { break; } @@ -80,7 +112,8 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim break; } case IR::Opcode::StoreBufferU32: { - if (!inst.Flags().ring_access) { + const auto info = inst.Flags(); + if (!info.system_coherent || !info.globally_coherent) { break; } diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index 8b93d72e3..c34b59b88 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -17,6 +17,22 @@ void Visit(Info& info, IR::Inst& inst) { case IR::Opcode::GetUserData: info.ud_mask.Set(inst.Arg(0).ScalarReg()); break; + case IR::Opcode::SetPatch: { + const auto patch = inst.Arg(0).Patch(); + if (patch <= IR::Patch::TessellationLodBottom) { + info.stores_tess_level_outer = true; + } else if (patch <= IR::Patch::TessellationLodInteriorV) { + info.stores_tess_level_inner = true; + } else { + info.uses_patches |= 1U << IR::GenericPatchIndex(patch); + } + break; + } + case IR::Opcode::GetPatch: { + const auto patch = inst.Arg(0).Patch(); + info.uses_patches |= 1U << IR::GenericPatchIndex(patch); + break; + } case IR::Opcode::LoadSharedU32: case IR::Opcode::LoadSharedU64: case IR::Opcode::WriteSharedU32: diff --git a/src/shader_recompiler/ir/patch.cpp b/src/shader_recompiler/ir/patch.cpp new file mode 100644 index 000000000..2485bc5b4 --- /dev/null +++ b/src/shader_recompiler/ir/patch.cpp @@ -0,0 +1,28 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/ir/patch.h" + +namespace Shader::IR { + +std::string NameOf(Patch patch) { + switch (patch) { + case Patch::TessellationLodLeft: + return "TessellationLodLeft"; + case Patch::TessellationLodTop: + return "TessellationLodTop"; + case Patch::TessellationLodRight: + return "TessellationLodRight"; + case Patch::TessellationLodBottom: + return "TessellationLodBottom"; + case Patch::TessellationLodInteriorU: + return "TessellationLodInteriorU"; + case Patch::TessellationLodInteriorV: + return "TessellationLodInteriorV"; + default: + const u32 index = u32(patch) - u32(Patch::Component0); + return fmt::format("Component{}", index); + } +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir/patch.h b/src/shader_recompiler/ir/patch.h new file mode 100644 index 000000000..65d2192e6 --- /dev/null +++ b/src/shader_recompiler/ir/patch.h @@ -0,0 +1,173 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "common/types.h" + +namespace Shader::IR { + +enum class Patch : u64 { + TessellationLodLeft, + TessellationLodTop, + TessellationLodRight, + TessellationLodBottom, + TessellationLodInteriorU, + TessellationLodInteriorV, + Component0, + Component1, + Component2, + Component3, + Component4, + Component5, + Component6, + Component7, + Component8, + Component9, + Component10, + Component11, + Component12, + Component13, + Component14, + Component15, + Component16, + Component17, + Component18, + Component19, + Component20, + Component21, + Component22, + Component23, + Component24, + Component25, + Component26, + Component27, + Component28, + Component29, + Component30, + Component31, + Component32, + Component33, + Component34, + Component35, + Component36, + Component37, + Component38, + Component39, + Component40, + Component41, + Component42, + Component43, + Component44, + Component45, + Component46, + Component47, + Component48, + Component49, + Component50, + Component51, + Component52, + Component53, + Component54, + Component55, + Component56, + Component57, + Component58, + Component59, + Component60, + Component61, + Component62, + Component63, + Component64, + Component65, + Component66, + Component67, + Component68, + Component69, + Component70, + Component71, + Component72, + Component73, + Component74, + Component75, + Component76, + Component77, + Component78, + Component79, + Component80, + Component81, + Component82, + Component83, + Component84, + Component85, + Component86, + Component87, + Component88, + Component89, + Component90, + Component91, + Component92, + Component93, + Component94, + Component95, + Component96, + Component97, + Component98, + Component99, + Component100, + Component101, + Component102, + Component103, + Component104, + Component105, + Component106, + Component107, + Component108, + Component109, + Component110, + Component111, + Component112, + Component113, + Component114, + Component115, + Component116, + Component117, + Component118, + Component119, +}; +static_assert(static_cast(Patch::Component119) == 125); + +constexpr bool IsGeneric(Patch patch) noexcept { + return patch >= Patch::Component0 && patch <= Patch::Component119; +} + +constexpr Patch PatchFactor(u32 index) { + return static_cast(index); +} + +constexpr Patch PatchGeneric(u32 index) { + return static_cast(static_cast(Patch::Component0) + index); +} + +constexpr u32 GenericPatchIndex(Patch patch) { + return (static_cast(patch) - static_cast(Patch::Component0)) / 4; +} + +constexpr u32 GenericPatchElement(Patch patch) { + return (static_cast(patch) - static_cast(Patch::Component0)) % 4; +} + +[[nodiscard]] std::string NameOf(Patch patch); + +} // namespace Shader::IR + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(const Shader::IR::Patch patch, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(patch)); + } +}; diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index ca2e9ceb9..19e0da3dd 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -49,7 +49,8 @@ union BufferInstInfo { BitField<0, 1, u32> index_enable; BitField<1, 1, u32> offset_enable; BitField<2, 12, u32> inst_offset; - BitField<14, 1, u32> ring_access; // global + system coherency + BitField<14, 1, u32> system_coherent; + BitField<15, 1, u32> globally_coherent; }; enum class ScalarReg : u32 { diff --git a/src/shader_recompiler/ir/type.h b/src/shader_recompiler/ir/type.h index ec855a77e..0f043fb64 100644 --- a/src/shader_recompiler/ir/type.h +++ b/src/shader_recompiler/ir/type.h @@ -15,7 +15,7 @@ enum class Type { ScalarReg = 1 << 1, VectorReg = 1 << 2, Attribute = 1 << 3, - SystemValue = 1 << 4, + Patch = 1 << 4, U1 = 1 << 5, U8 = 1 << 6, U16 = 1 << 7, diff --git a/src/shader_recompiler/ir/value.cpp b/src/shader_recompiler/ir/value.cpp index 889e99556..8826b80f2 100644 --- a/src/shader_recompiler/ir/value.cpp +++ b/src/shader_recompiler/ir/value.cpp @@ -16,6 +16,8 @@ Value::Value(IR::VectorReg reg) noexcept : type{Type::VectorReg}, vreg{reg} {} Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {} +Value::Value(IR::Patch patch) noexcept : type{Type::Patch}, patch{patch} {} + Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {} Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {} diff --git a/src/shader_recompiler/ir/value.h b/src/shader_recompiler/ir/value.h index dbe8b5cc4..49d85fc28 100644 --- a/src/shader_recompiler/ir/value.h +++ b/src/shader_recompiler/ir/value.h @@ -18,6 +18,7 @@ #include "shader_recompiler/ir/opcodes.h" #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/type.h" +#include "shader_recompiler/ir/patch.h" namespace Shader::IR { @@ -34,6 +35,7 @@ public: explicit Value(IR::ScalarReg reg) noexcept; explicit Value(IR::VectorReg reg) noexcept; explicit Value(IR::Attribute value) noexcept; + explicit Value(IR::Patch patch) noexcept; explicit Value(bool value) noexcept; explicit Value(u8 value) noexcept; explicit Value(u16 value) noexcept; @@ -56,6 +58,7 @@ public: [[nodiscard]] IR::ScalarReg ScalarReg() const; [[nodiscard]] IR::VectorReg VectorReg() const; [[nodiscard]] IR::Attribute Attribute() const; + [[nodiscard]] IR::Patch Patch() const; [[nodiscard]] bool U1() const; [[nodiscard]] u8 U8() const; [[nodiscard]] u16 U16() const; @@ -75,6 +78,7 @@ private: IR::ScalarReg sreg; IR::VectorReg vreg; IR::Attribute attribute; + IR::Patch patch; bool imm_u1; u8 imm_u8; u16 imm_u16; @@ -330,6 +334,11 @@ inline IR::Attribute Value::Attribute() const { return attribute; } +inline IR::Patch Value::Patch() const { + DEBUG_ASSERT(type == Type::Patch); + return patch; +} + inline bool Value::U1() const { if (IsIdentity()) { return inst->Arg(0).U1(); diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 64f842c42..aee69f73b 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -60,9 +60,14 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); // Run optimization passes + const auto stage = program.info.stage; Shader::Optimization::SsaRewritePass(program.post_order_blocks); + if (stage == Stage::Hull) { + Shader::Optimization::HullShaderTransform(program); + } Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); - if (program.info.stage != Stage::Compute) { + Shader::Optimization::RingAccessElimination(program, runtime_info, stage); + if (stage != Stage::Compute) { Shader::Optimization::LowerSharedMemToRegisters(program); } Shader::Optimization::RingAccessElimination(program, runtime_info, program.info.stage); diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 4c779a368..32b4f3ed9 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -23,6 +23,15 @@ enum class Stage : u32 { }; constexpr u32 MaxStageTypes = 7; +enum class LogicalStage : u32 { + Fragment, + Vertex, + TessellationControl, + TessellationEval, + Geometry, + Compute, +}; + [[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept { return static_cast(index); } @@ -64,12 +73,23 @@ struct VertexRuntimeInfo { u32 num_outputs; std::array outputs; bool emulate_depth_negative_one_to_one{}; + AmdGpu::TessellationType tess_type; + AmdGpu::TessellationTopology tess_topology; + AmdGpu::TessellationPartitioning tess_partitioning; bool operator==(const VertexRuntimeInfo& other) const noexcept { - return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one; + return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one && + tess_type == other.tess_type && tess_topology == other.tess_topology && + tess_partitioning == other.tess_partitioning; } }; +struct HullRuntimeInfo { + u32 output_control_points; + + auto operator<=>(const HullRuntimeInfo&) const noexcept = default; +}; + static constexpr auto GsMaxOutputStreams = 4u; using GsOutputPrimTypes = std::array; struct GeometryRuntimeInfo { @@ -152,6 +172,7 @@ struct RuntimeInfo { union { ExportRuntimeInfo es_info; VertexRuntimeInfo vs_info; + HullRuntimeInfo hs_info; GeometryRuntimeInfo gs_info; FragmentRuntimeInfo fs_info; ComputeRuntimeInfo cs_info; @@ -174,6 +195,8 @@ struct RuntimeInfo { return es_info == other.es_info; case Stage::Geometry: return gs_info == other.gs_info; + case Stage::Hull: + return hs_info == other.hs_info; default: return true; } diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 9bc3454d8..16f22ec13 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -956,6 +956,7 @@ struct Liverpool { enum VgtStages : u32 { Vs = 0u, // always enabled EsGs = 0xB0u, + LsHs = 0x45u, }; VgtStages raw; @@ -1059,6 +1060,20 @@ struct Liverpool { }; }; + union LsHsConfig { + u32 raw; + BitField<0, 8, u32> num_patches; + BitField<8, 6, u32> hs_input_control_points; + BitField<14, 6, u32> hs_output_control_points; + }; + + union TessellationConfig { + u32 raw; + BitField<0, 2, TessellationType> type; + BitField<2, 3, TessellationPartitioning> partitioning; + BitField<5, 3, TessellationTopology> topology; + }; + union Eqaa { u32 raw; BitField<0, 1, u32> max_anchor_samples; @@ -1200,9 +1215,10 @@ struct Liverpool { BitField<0, 11, u32> vgt_gs_max_vert_out; INSERT_PADDING_WORDS(0xA2D5 - 0xA2CE - 1); ShaderStageEnable stage_enable; - INSERT_PADDING_WORDS(1); + LsHsConfig ls_hs_config; u32 vgt_gs_vert_itemsize[4]; - INSERT_PADDING_WORDS(4); + TessellationConfig tess_config; + INSERT_PADDING_WORDS(3); PolygonOffset poly_offset; GsInstances vgt_gs_instance_cnt; StreamOutConfig vgt_strmout_config; @@ -1445,6 +1461,7 @@ static_assert(GFX6_3D_REG_INDEX(vgt_gsvs_ring_itemsize) == 0xA2AC); static_assert(GFX6_3D_REG_INDEX(vgt_gs_max_vert_out) == 0xA2CE); static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5); static_assert(GFX6_3D_REG_INDEX(vgt_gs_vert_itemsize[0]) == 0xA2D7); +static_assert(GFX6_3D_REG_INDEX(tess_config) == 0xA2DB); static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF); static_assert(GFX6_3D_REG_INDEX(vgt_gs_instance_cnt) == 0xA2E4); static_assert(GFX6_3D_REG_INDEX(vgt_strmout_config) == 0xA2E5); diff --git a/src/video_core/amdgpu/types.h b/src/video_core/amdgpu/types.h index 6b95ed910..4bffb9ce8 100644 --- a/src/video_core/amdgpu/types.h +++ b/src/video_core/amdgpu/types.h @@ -21,6 +21,26 @@ enum class FpDenormMode : u32 { InOutAllow = 3, }; +enum class TessellationType : u32 { + Isoline = 0, + Triangle = 1, + Quad = 2, +}; + +enum class TessellationPartitioning : u32 { + Integer = 0, + Pow2 = 1, + FracOdd = 2, + FracEven = 3, +}; + +enum class TessellationTopology : u32 { + Point = 0, + Line = 1, + TriangleCw = 2, + TriangleCcw = 3, +}; + // See `VGT_PRIMITIVE_TYPE` description in [Radeon Sea Islands 3D/Compute Register Reference Guide] enum class PrimitiveType : u32 { None = 0, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 795537574..98c283fb8 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -52,7 +52,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul boost::container::static_vector vertex_bindings; boost::container::static_vector vertex_attributes; if (fetch_shader && !instance.IsVertexInputDynamicState()) { - const auto& vs_info = GetStage(Shader::Stage::Vertex); + const auto& vs_info = GetStage(Shader::LogicalStage::Vertex); for (const auto& attrib : fetch_shader->attributes) { if (attrib.UsesStepRates()) { // Skip attribute binding as the data will be pulled by shader @@ -204,7 +204,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul boost::container::static_vector shader_stages; - auto stage = u32(Shader::Stage::Vertex); + auto stage = u32(LogicalStage::Vertex); if (infos[stage]) { shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eVertex, @@ -212,7 +212,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pName = "main", }); } - stage = u32(Shader::Stage::Geometry); + stage = u32(LogicalStage::Geometry); if (infos[stage]) { shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eGeometry, @@ -220,7 +220,23 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pName = "main", }); } - stage = u32(Shader::Stage::Fragment); + stage = u32(LogicalStage::TessellationControl); + if (infos[stage]) { + shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eTessellationControl, + .module = modules[stage], + .pName = "main", + }); + } + stage = u32(LogicalStage::TessellationEval); + if (infos[stage]) { + shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eTessellationEvaluation, + .module = modules[stage], + .pName = "main", + }); + } + stage = u32(LogicalStage::Fragment); if (infos[stage]) { shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eFragment, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 703a0680e..99588cb3b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -73,7 +73,7 @@ public: bool IsEmbeddedVs() const noexcept { static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f; - return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash; + return key.stage_hashes[u32(Shader::LogicalStage::Vertex)] == EmbeddedVsHash; } auto GetWriteMasks() const { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0fa77e19b..87f13010d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -22,6 +22,8 @@ extern std::unique_ptr presenter; namespace Vulkan { +using Shader::LogicalStage; +using Shader::Stage; using Shader::VsOutput; constexpr static std::array DescriptorHeapSizes = { @@ -78,7 +80,7 @@ void GatherVertexOutputs(Shader::VertexRuntimeInfo& info, : (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None)); } -Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { +Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_stage) { auto info = Shader::RuntimeInfo{stage}; const auto& regs = liverpool->regs; const auto BuildCommon = [&](const auto& program) { @@ -89,20 +91,34 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { info.fp_round_mode32 = program.settings.fp_round_mode32; }; switch (stage) { - case Shader::Stage::Export: { + case Stage::Local: { + BuildCommon(regs.ls_program); + break; + } + case Stage::Hull: { + BuildCommon(regs.hs_program); + info.hs_info.output_control_points = regs.ls_hs_config.hs_output_control_points.Value(); + break; + } + case Stage::Export: { BuildCommon(regs.es_program); info.es_info.vertex_data_size = regs.vgt_esgs_ring_itemsize; break; } - case Shader::Stage::Vertex: { + case Stage::Vertex: { BuildCommon(regs.vs_program); GatherVertexOutputs(info.vs_info, regs.vs_output_control); info.vs_info.emulate_depth_negative_one_to_one = !instance.IsDepthClipControlSupported() && regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW; + if (l_stage == LogicalStage::TessellationEval) { + info.vs_info.tess_type = regs.tess_config.type; + info.vs_info.tess_topology = regs.tess_config.topology; + info.vs_info.tess_partitioning = regs.tess_config.partitioning; + } break; } - case Shader::Stage::Geometry: { + case Stage::Geometry: { BuildCommon(regs.gs_program); auto& gs_info = info.gs_info; gs_info.output_vertices = regs.vgt_gs_max_vert_out; @@ -121,7 +137,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { DumpShader(gs_info.vs_copy, gs_info.vs_copy_hash, Shader::Stage::Vertex, 0, "copy.bin"); break; } - case Shader::Stage::Fragment: { + case Stage::Fragment: { BuildCommon(regs.ps_program); info.fs_info.en_flags = regs.ps_input_ena; info.fs_info.addr_flags = regs.ps_input_addr; @@ -143,7 +159,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { } break; } - case Shader::Stage::Compute: { + case Stage::Compute: { const auto& cs_pgm = regs.cs_program; info.num_user_data = cs_pgm.settings.num_user_regs; info.num_allocated_vgprs = regs.cs_program.settings.num_vgprs * 4; @@ -304,7 +320,7 @@ bool PipelineCache::RefreshGraphicsKey() { fetch_shader = std::nullopt; Shader::Backend::Bindings binding{}; - const auto& TryBindStageRemap = [&](Shader::Stage stage_in, Shader::Stage stage_out) -> bool { + const auto& TryBindStage = [&](Shader::Stage stage_in, Shader::LogicalStage stage_out) -> bool { const auto stage_in_idx = static_cast(stage_in); const auto stage_out_idx = static_cast(stage_out); if (!regs.stage_enable.IsStageEnabled(stage_in_idx)) { @@ -331,23 +347,23 @@ bool PipelineCache::RefreshGraphicsKey() { auto params = Liverpool::GetParams(*pgm); std::optional fetch_shader_; std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_, - key.stage_hashes[stage_out_idx]) = GetProgram(stage_in, params, binding); + key.stage_hashes[stage_out_idx]) = + GetProgram(stage_in, stage_out, params, binding); if (fetch_shader_) { fetch_shader = fetch_shader_; } return true; }; - const auto& TryBindStage = [&](Shader::Stage stage) { return TryBindStageRemap(stage, stage); }; - const auto& IsGsFeaturesSupported = [&]() -> bool { // These checks are temporary until all functionality is implemented. return !regs.vgt_gs_mode.onchip && !regs.vgt_strmout_config.raw; }; - TryBindStage(Shader::Stage::Fragment); + infos.fill(nullptr); + TryBindStage(Stage::Fragment, LogicalStage::Fragment); - const auto* fs_info = infos[static_cast(Shader::Stage::Fragment)]; + const auto* fs_info = infos[static_cast(LogicalStage::Fragment)]; key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u; switch (regs.stage_enable.raw) { @@ -355,22 +371,36 @@ bool PipelineCache::RefreshGraphicsKey() { if (!instance.IsGeometryStageSupported() || !IsGsFeaturesSupported()) { return false; } - if (!TryBindStageRemap(Shader::Stage::Export, Shader::Stage::Vertex)) { + if (!TryBindStage(Stage::Export, LogicalStage::Vertex)) { return false; } - if (!TryBindStage(Shader::Stage::Geometry)) { + if (!TryBindStage(Stage::Geometry, LogicalStage::Geometry)) { + return false; + } + break; + } + case Liverpool::ShaderStageEnable::VgtStages::LsHs: { + if (!instance.IsTessellationSupported()) { + break; + } + if (!TryBindStage(Stage::Local, LogicalStage::Vertex)) { + return false; + } + if (!TryBindStage(Stage::Hull, LogicalStage::TessellationControl)) { + return false; + } + if (!TryBindStage(Stage::Vertex, LogicalStage::TessellationEval)) { return false; } break; } default: { - TryBindStage(Shader::Stage::Vertex); - infos[static_cast(Shader::Stage::Geometry)] = nullptr; + TryBindStage(Stage::Vertex, LogicalStage::Vertex); break; } } - const auto vs_info = infos[static_cast(Shader::Stage::Vertex)]; + const auto vs_info = infos[static_cast(Shader::LogicalStage::Vertex)]; if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) { u32 vertex_binding = 0; for (const auto& attrib : fetch_shader->attributes) { @@ -430,7 +460,7 @@ bool PipelineCache::RefreshComputeKey() { const auto* cs_pgm = &liverpool->regs.cs_program; const auto cs_params = Liverpool::GetParams(*cs_pgm); std::tie(infos[0], modules[0], fetch_shader, compute_key.value) = - GetProgram(Shader::Stage::Compute, cs_params, binding); + GetProgram(Shader::Stage::Compute, LogicalStage::Compute, cs_params, binding); return true; } @@ -466,13 +496,13 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, return module; } -std::tuple, u64> -PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params, - Shader::Backend::Bindings& binding) { - const auto runtime_info = BuildRuntimeInfo(stage); +PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stage, + Shader::ShaderParams params, + Shader::Backend::Bindings& binding) { + const auto runtime_info = BuildRuntimeInfo(stage, l_stage); auto [it_pgm, new_program] = program_cache.try_emplace(params.hash); if (new_program) { - it_pgm.value() = std::make_unique(stage, params); + it_pgm.value() = std::make_unique(stage, l_stage, params); auto& program = it_pgm.value(); auto start = binding; const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding); @@ -491,7 +521,7 @@ PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params, const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec); if (it == program->modules.end()) { - auto new_info = Shader::Info(stage, params); + auto new_info = Shader::Info(stage, l_stage, params); module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding); program->AddPermut(module, std::move(spec)); } else { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index c5c2fc98e..d5170dd15 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -34,11 +34,13 @@ struct Program { vk::ShaderModule module; Shader::StageSpecialization spec; }; + using ModuleList = boost::container::small_vector; Shader::Info info; - boost::container::small_vector modules; + ModuleList modules; - explicit Program(Shader::Stage stage, Shader::ShaderParams params) : info{stage, params} {} + explicit Program(Shader::Stage stage, Shader::LogicalStage l_stage, Shader::ShaderParams params) + : info{stage, l_stage, params} {} void AddPermut(vk::ShaderModule module, const Shader::StageSpecialization&& spec) { modules.emplace_back(module, std::move(spec)); @@ -55,10 +57,10 @@ public: const ComputePipeline* GetComputePipeline(); - std::tuple, - u64> - GetProgram(Shader::Stage stage, Shader::ShaderParams params, - Shader::Backend::Bindings& binding); + using Result = std::tuple, u64>; + Result GetProgram(Shader::Stage stage, Shader::LogicalStage l_stage, + Shader::ShaderParams params, Shader::Backend::Bindings& binding); std::optional ReplaceShader(vk::ShaderModule module, std::span spv_code); @@ -74,7 +76,7 @@ private: vk::ShaderModule CompileModule(Shader::Info& info, const Shader::RuntimeInfo& runtime_info, std::span code, size_t perm_idx, Shader::Backend::Bindings& binding); - Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage); + Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage, Shader::LogicalStage l_stage); private: const Instance& instance;