diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index 7802977f5..082ce4221 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -62,7 +62,8 @@ private: class FileBackend { public: explicit FileBackend(const std::filesystem::path& filename) - : file{filename, FS::FileAccessMode::Write, FS::FileType::TextFile} {} + : file{std::filesystem::path("/dev/null"), FS::FileAccessMode::Write, + FS::FileType::TextFile} {} ~FileBackend() = default; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 9548cd5b0..39e5169d4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -1,5 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "shader_recompiler/runtime_info.h" #pragma clang optimize off #include #include @@ -285,6 +286,9 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct ctx.AddExtension("SPV_KHR_fragment_shader_barycentric"); ctx.AddCapability(spv::Capability::FragmentBarycentricKHR); } + if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) { + ctx.AddCapability(spv::Capability::Tessellation); + } } void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) { @@ -309,7 +313,6 @@ void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) { break; case LogicalStage::TessellationEval: { execution_model = spv::ExecutionModel::TessellationEvaluation; - ctx.AddCapability(spv::Capability::Tessellation); const auto& vs_info = ctx.runtime_info.vs_info; ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_type)); ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_partitioning)); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp index 22b3523aa..7a4048bae 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp @@ -34,4 +34,13 @@ void EmitDeviceMemoryBarrier(EmitContext& ctx) { MemoryBarrier(ctx, spv::Scope::Device); } +void EmitTcsOutputBarrier(EmitContext& ctx) { + const auto execution{spv::Scope::Workgroup}; + const auto memory{spv::Scope::Invocation}; + const auto memory_semantics{spv::MemorySemanticsMask::MaskNone}; + ctx.OpControlBarrier(ctx.ConstU32(static_cast(execution)), + ctx.ConstU32(static_cast(memory)), + ctx.ConstU32(static_cast(memory_semantics))); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index dde7b4806..f1e173371 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -2,6 +2,9 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/assert.h" +#include "shader_recompiler/ir/attribute.h" +#include "shader_recompiler/runtime_info.h" +#pragma clang optimize off #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" #include "shader_recompiler/ir/patch.h" @@ -273,8 +276,21 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) { return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value, ctx.u32_zero_value); case IR::Attribute::PrimitiveId: - ASSERT(ctx.info.stage == Stage::Geometry); + ASSERT(ctx.info.l_stage == LogicalStage::Geometry || + ctx.info.l_stage == LogicalStage::TessellationControl || + ctx.info.l_stage == LogicalStage::TessellationEval); return ctx.OpLoad(ctx.U32[1], ctx.primitive_id); + case IR::Attribute::InvocationId: + ASSERT(ctx.info.l_stage == LogicalStage::Geometry || + ctx.info.l_stage == LogicalStage::TessellationControl); + return ctx.OpLoad(ctx.U32[1], ctx.invocation_id); + case IR::Attribute::PatchVertices: + ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl); + return ctx.OpLoad(ctx.U32[1], ctx.patch_vertices); + case IR::Attribute::PackedHullInvocationInfo: + // TODO figure out what to do with this + // should be dead code, but otherwise return 0 or concat PrimitiveId and InvocationId + return ctx.u32_zero_value; default: UNREACHABLE_MSG("Read U32 attribute {}", attr); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 27f8e5a91..29ffb916a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -28,8 +28,6 @@ Id EmitConditionRef(EmitContext& ctx, const IR::Value& value); void EmitReference(EmitContext&); void EmitPhiMove(EmitContext&); void EmitJoin(EmitContext& ctx); -void EmitWorkgroupMemoryBarrier(EmitContext& ctx); -void EmitDeviceMemoryBarrier(EmitContext& ctx); void EmitGetScc(EmitContext& ctx); void EmitGetExec(EmitContext& ctx); void EmitGetVcc(EmitContext& ctx); @@ -53,6 +51,7 @@ void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id arg0, Id arg1, Id arg2, void EmitBarrier(EmitContext& ctx); void EmitWorkgroupMemoryBarrier(EmitContext& ctx); void EmitDeviceMemoryBarrier(EmitContext& ctx); +void EmitTcsOutputBarrier(EmitContext& ctx); Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg); void EmitGetThreadBitScalarReg(EmitContext& ctx); void EmitSetThreadBitScalarReg(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 874081fc9..81376c4f0 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -386,6 +386,13 @@ void EmitContext::DefineInputs() { } break; } + case LogicalStage::TessellationControl: { + invocation_id = + DefineVariable(U32[3], spv::BuiltIn::InvocationId, spv::StorageClass::Input); + patch_vertices = + DefineVariable(U32[1], spv::BuiltIn::PatchVertices, spv::StorageClass::Input); + break; + } case LogicalStage::TessellationEval: { tess_coord = DefineInput(F32[3], std::nullopt, spv::BuiltIn::TessCoord); break; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index ea2ca725f..497aa1d0f 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -193,6 +193,7 @@ public: Id clip_distances{}; Id cull_distances{}; + Id patch_vertices{}; Id output_tess_level_outer{}; Id output_tess_level_inner{}; Id tess_coord; @@ -200,6 +201,7 @@ public: Id workgroup_id{}; Id local_invocation_id{}; + Id invocation_id{}; // for instanced geoshaders or output vertices within TCS patch Id subgroup_local_invocation_id{}; Id image_u32{}; diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index be5bf273e..2c3cd167f 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -1,5 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "shader_recompiler/runtime_info.h" #pragma clang optimize off #include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/ir/reg.h" @@ -73,10 +74,11 @@ void Translator::EmitDataShare(const GcnInst& inst) { void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) { const IR::U32 value{GetSrc(inst.src[0])}; - if (info.stage != Stage::Compute) { - SetDst(inst.dst[0], value); - } else { + if (info.l_stage == LogicalStage::Compute || + info.l_stage == LogicalStage::TessellationControl) { SetDst(inst.dst[0], ir.ReadFirstLane(value)); + } else { + SetDst(inst.dst[0], value); } } diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 5b411d83e..549464580 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include +#include "common/assert.h" #include "shader_recompiler/frontend/translate/translate.h" namespace Shader::Gcn { @@ -78,6 +80,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { return S_BFM_B32(inst); case Opcode::S_MUL_I32: return S_MUL_I32(inst); + case Opcode::S_BFE_I32: + return S_BFE_I32(inst); case Opcode::S_BFE_U32: return S_BFE_U32(inst); case Opcode::S_ABSDIFF_I32: @@ -444,6 +448,24 @@ void Translator::S_BFE_U32(const GcnInst& inst) { ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); } +void Translator::S_BFE_I32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + IR::U32 result; + + ASSERT_MSG(src1.IsImmediate(), "Unhandled S_BFE_I32 with non-immediate mask"); + u32 mask = src1.U32(); + ASSERT(mask != 0); + u32 offset = std::countr_zero(mask); + u32 count = std::popcount(mask); + mask = mask >> offset; + ASSERT_MSG((mask & (mask + 1)) == 0, "mask {} has non-adjacent bits set"); + + result = ir.BitFieldExtract(src0, ir.Imm32(offset), ir.Imm32(count), true); + SetDst(inst.dst[0], result); + ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); +} + void Translator::S_ABSDIFF_I32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index d9e92cb78..84f79bb5f 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -8,6 +8,8 @@ #include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/info.h" +#include "shader_recompiler/ir/attribute.h" +#include "shader_recompiler/ir/reg.h" #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/types.h" @@ -51,7 +53,7 @@ void Translator::EmitPrologue() { ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId)); } break; - case Stage::Fragment: + case LogicalStage::Fragment: dst_vreg = IR::VectorReg::V0; if (runtime_info.fs_info.addr_flags.persp_sample_ena) { ++dst_vreg; // I @@ -121,15 +123,28 @@ void Translator::EmitPrologue() { } } break; - case LogicalStage::TessellationControl: - ir.SetVectorReg(IR::VectorReg::V0, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); + case LogicalStage::TessellationControl: { + ir.SetVectorReg(IR::VectorReg::V1, + ir.GetAttributeU32(IR::Attribute::PackedHullInvocationInfo)); + // Test + // ir.SetPatch(IR::Patch::TessellationLodLeft, ir.Imm32(1.0f)); + // ir.SetPatch(IR::Patch::TessellationLodTop, ir.Imm32(1.0f)); + // ir.SetPatch(IR::Patch::TessellationLodRight, ir.Imm32(1.0f)); + // ir.SetPatch(IR::Patch::TessellationLodBottom, ir.Imm32(1.0f)); + // ir.SetPatch(IR::Patch::TessellationLodInteriorU, ir.Imm32(1.0f)); + // ir.SetPatch(IR::Patch::TessellationLodInteriorV, ir.Imm32(1.0f)); break; + } case LogicalStage::TessellationEval: ir.SetVectorReg(IR::VectorReg::V0, ir.GetAttribute(IR::Attribute::TessellationEvaluationPointU)); ir.SetVectorReg(IR::VectorReg::V1, ir.GetAttribute(IR::Attribute::TessellationEvaluationPointV)); - ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); + // I think V2 is actually the patch id within the patches running on the local CU, used in + // compiler generated address calcs, + // and V3 is the patch id within the draw + ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::TessPatchIdInVgt)); + ir.SetVectorReg(IR::VectorReg::V3, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); break; case LogicalStage::Compute: ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 0)); diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 069e2908c..72263b3bf 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -95,6 +95,7 @@ public: void S_BFM_B32(const GcnInst& inst); void S_MUL_I32(const GcnInst& inst); void S_BFE_U32(const GcnInst& inst); + void S_BFE_I32(const GcnInst& inst); void S_ABSDIFF_I32(const GcnInst& inst); void S_NOT_B32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 636a473d1..b0d7b8b72 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -251,6 +251,10 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst "Non immediate offset not supported"); } + if (info.stage == Stage::Hull) { + // printf("here\n"); // break + } + IR::Value address = [&] -> IR::Value { if (is_ring) { return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset); diff --git a/src/shader_recompiler/ir/attribute.cpp b/src/shader_recompiler/ir/attribute.cpp index 503144782..12edb28dc 100644 --- a/src/shader_recompiler/ir/attribute.cpp +++ b/src/shader_recompiler/ir/attribute.cpp @@ -104,6 +104,8 @@ std::string NameOf(Attribute attribute) { return "VertexId"; case Attribute::InstanceId: return "InstanceId"; + case Attribute::PrimitiveId: + return "PrimitiveId"; case Attribute::FragCoord: return "FragCoord"; case Attribute::IsFrontFace: @@ -114,6 +116,12 @@ std::string NameOf(Attribute attribute) { return "LocalInvocationId"; case Attribute::LocalInvocationIndex: return "LocalInvocationIndex"; + case Attribute::InvocationId: + return "InvocationId"; + case Attribute::PackedHullInvocationInfo: + return "PackedHullInvocationInfo"; + case Attribute::PatchVertices: + return "PatchVertices"; case Attribute::TessellationEvaluationPointU: return "TessellationEvaluationPointU"; case Attribute::TessellationEvaluationPointV: diff --git a/src/shader_recompiler/ir/attribute.h b/src/shader_recompiler/ir/attribute.h index 9b68fd119..3d3e48923 100644 --- a/src/shader_recompiler/ir/attribute.h +++ b/src/shader_recompiler/ir/attribute.h @@ -72,10 +72,14 @@ enum class Attribute : u64 { LocalInvocationId = 75, LocalInvocationIndex = 76, FragCoord = 77, - InstanceId0 = 78, // step rate 0 - InstanceId1 = 79, // step rate 1 - TessellationEvaluationPointU = 80, - TessellationEvaluationPointV = 81, + InstanceId0 = 78, // step rate 0 + InstanceId1 = 79, // step rate 1 + InvocationId = 80, // TCS id in output patch and instanced geometry shader id + PackedHullInvocationInfo = + 81, // PrimitiveId (patch id) and InvocationId (output control point id) + PatchVertices = 82, + TessellationEvaluationPointU = 83, + TessellationEvaluationPointV = 84, Max, }; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index dda247050..25cb9b2b3 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -134,6 +134,10 @@ void IREmitter::DeviceMemoryBarrier() { Inst(Opcode::DeviceMemoryBarrier); } +void IREmitter::TcsOutputBarrier() { + Inst(Opcode::TcsOutputBarrier); +} + U32 IREmitter::GetUserData(IR::ScalarReg reg) { ASSERT(static_cast(reg) < IR::NumScalarRegs); return Inst(Opcode::GetUserData, reg); diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index d13c6e935..00e81d65a 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -10,8 +10,8 @@ #include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/condition.h" -#include "shader_recompiler/ir/value.h" #include "shader_recompiler/ir/patch.h" +#include "shader_recompiler/ir/value.h" namespace Shader::IR { @@ -50,6 +50,7 @@ public: void Barrier(); void WorkgroupMemoryBarrier(); void DeviceMemoryBarrier(); + void TcsOutputBarrier(); [[nodiscard]] U32 GetUserData(IR::ScalarReg reg); [[nodiscard]] U1 GetThreadBitScalarReg(IR::ScalarReg reg); diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index 7f36f44d7..9f3ccd52f 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -44,6 +44,7 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::Barrier: case Opcode::WorkgroupMemoryBarrier: case Opcode::DeviceMemoryBarrier: + case Opcode::TcsOutputBarrier: case Opcode::ConditionRef: case Opcode::Reference: case Opcode::PhiMove: diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 6004a03b5..2d63b6f20 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -24,6 +24,7 @@ OPCODE(ReadConstBuffer, U32, Opaq OPCODE(Barrier, Void, ) OPCODE(WorkgroupMemoryBarrier, Void, ) OPCODE(DeviceMemoryBarrier, Void, ) +OPCODE(TcsOutputBarrier, Void, ) // Geometry shader specific OPCODE(EmitVertex, Void, ) diff --git a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp index bd1094792..d27c75bd1 100644 --- a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp +++ b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp @@ -1,6 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#pragma clang optimize off #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/program.h" @@ -60,9 +59,14 @@ namespace Shader::Optimization { * NOTE: This pass must be run before constant propagation as it relies on relatively specific * pattern matching that might be mutated that that optimization pass. * + * TODO: need to be careful about reading from output arrays at idx other than InvocationID + * Need SPIRV OpControlBarrier + * "Wait for all active invocations within the specified Scope to reach the current point of + * execution." + * Must be placed in uniform control flow */ -void HullShaderTransform(const IR::Program& program) { +void HullShaderTransform(const IR::Program& program, const RuntimeInfo& runtime_info) { LOG_INFO(Render_Vulkan, "{}", IR::DumpProgram(program)); for (IR::Block* block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index 3cb5e11a3..5228006ed 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -16,8 +16,7 @@ void FlattenExtendedUserdataPass(IR::Program& program); void ResourceTrackingPass(IR::Program& program); void CollectShaderInfoPass(IR::Program& program); void LowerSharedMemToRegisters(IR::Program& program); -void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info, - Stage stage); -void HullShaderTransform(const IR::Program& program); +void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info); +void HullShaderTransform(const IR::Program& program, const RuntimeInfo& runtime_info); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp index 345bdbf31..207d82e6f 100644 --- a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp +++ b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp @@ -1,18 +1,89 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/assert.h" +#include "shader_recompiler/info.h" #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/opcodes.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/recompiler.h" +#include "shader_recompiler/runtime_info.h" + +namespace { + +// TODO clean this up. Maybe remove +// from https://github.com/chaotic-cx/mesa-mirror/blob/main/src/amd/compiler/README.md +// basically logical stage x hw stage permutations +enum class SwHwStagePerm { + vertex_vs, + fragment_fs, + vertex_ls, + tess_control_hs, + tess_eval_vs, + vertex_es, + geometry_gs, + gs_copy_vs, + tess_eval_es, + compute_cs, +}; + +static SwHwStagePerm GetSwHwStagePerm(Shader::Stage hw_stage, Shader::LogicalStage sw_stage) { + using namespace Shader; + switch (sw_stage) { + case LogicalStage::Fragment: + ASSERT(hw_stage == Stage::Fragment); + return SwHwStagePerm::fragment_fs; + case LogicalStage::Vertex: { + switch (hw_stage) { + case Stage::Vertex: + return SwHwStagePerm::vertex_vs; + case Stage::Export: + return SwHwStagePerm::vertex_es; + case Stage::Local: + return SwHwStagePerm::vertex_ls; + default: + UNREACHABLE(); + } + } break; + case LogicalStage::TessellationControl: + ASSERT(hw_stage == Stage::Hull); + return SwHwStagePerm::tess_control_hs; + case LogicalStage::TessellationEval: { + switch (hw_stage) { + case Stage::Vertex: + return SwHwStagePerm::tess_eval_vs; + case Stage::Export: + return SwHwStagePerm::tess_eval_es; + default: + UNREACHABLE(); + } + } + case LogicalStage::Geometry: + ASSERT(hw_stage == Stage::Geometry); + return SwHwStagePerm::geometry_gs; + case LogicalStage::GsCopy: + ASSERT(hw_stage == Stage::Vertex); + return SwHwStagePerm::gs_copy_vs; + case LogicalStage::Compute: + ASSERT(hw_stage == Stage::Compute); + return SwHwStagePerm::compute_cs; + default: + UNREACHABLE(); + } +} + +}; // namespace namespace Shader::Optimization { -void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info, - Stage stage) { +void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info) { auto& info = program.info; + Stage stage = info.stage; + LogicalStage l_stage = info.l_stage; + SwHwStagePerm stage_perm = GetSwHwStagePerm(stage, l_stage); + const auto& ForEachInstruction = [&](auto func) { for (IR::Block* block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { @@ -22,38 +93,40 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim } }; - switch (stage) { - case Stage::Local: { + switch (stage_perm) { + case SwHwStagePerm::vertex_ls: { ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) { const auto opcode = inst.GetOpcode(); switch (opcode) { - case IR::Opcode::WriteSharedU64: { + case IR::Opcode::WriteSharedU64: + case IR::Opcode::WriteSharedU32: { + bool is_composite = opcode == IR::Opcode::WriteSharedU64; + u32 num_components = opcode == IR::Opcode::WriteSharedU32 ? 1 : 2; + u32 offset = 0; const auto* addr = inst.Arg(0).InstRecursive(); if (addr->GetOpcode() == IR::Opcode::IAdd32) { ASSERT(addr->Arg(1).IsImmediate()); offset = addr->Arg(1).U32(); } - const IR::Inst* pair = inst.Arg(1).InstRecursive(); - for (s32 i = 0; i < 2; i++) { + IR::Value data = inst.Arg(1).Resolve(); + for (s32 i = 0; i < num_components; i++) { const auto attrib = IR::Attribute::Param0 + (offset / 16); const auto comp = (offset / 4) % 4; - const IR::U32 value = IR::U32{pair->Arg(i)}; + const IR::U32 value = IR::U32{is_composite ? data.Inst()->Arg(i) : data}; ir.SetAttribute(attrib, ir.BitCast(value), comp); offset += 4; } inst.Invalidate(); break; } - case IR::Opcode::WriteSharedU32: - UNREACHABLE(); default: break; } }); break; } - case Stage::Export: { + case SwHwStagePerm::vertex_es: { ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) { const auto opcode = inst.GetOpcode(); switch (opcode) { @@ -84,7 +157,7 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim }); break; } - case Stage::Geometry: { + case SwHwStagePerm::geometry_gs: { const auto& gs_info = runtime_info.gs_info; info.gs_copy_data = Shader::ParseCopyShader(gs_info.vs_copy); @@ -112,8 +185,8 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim break; } case IR::Opcode::StoreBufferU32: { - const auto info = inst.Flags(); - if (!info.system_coherent || !info.globally_coherent) { + const auto buffer_info = inst.Flags(); + if (!buffer_info.system_coherent || !buffer_info.globally_coherent) { break; } diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index aee69f73b..399b08a2a 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -1,6 +1,9 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/config.h" +#include "common/io_file.h" +#include "common/path_util.h" #include "shader_recompiler/frontend/control_flow_graph.h" #include "shader_recompiler/frontend/decode.h" #include "shader_recompiler/frontend/structured_control_flow.h" @@ -61,12 +64,45 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info // Run optimization passes const auto stage = program.info.stage; + + bool dump_ir = true; + bool extra_id_removal = true; // TODO remove all this stuff + auto dumpMatchingIR = [&](std::string phase) { + if (dump_ir) { + if (Config::dumpShaders()) { + std::string s = IR::DumpProgram(program); + using namespace Common::FS; + const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps"; + if (!std::filesystem::exists(dump_dir)) { + std::filesystem::create_directories(dump_dir); + } + const auto filename = + fmt::format("{}_{:#018x}.{}.ir.txt", info.stage, info.pgm_hash, phase); + const auto file = IOFile{dump_dir / filename, FileAccessMode::Write}; + file.WriteString(s); + } + } + }; + Shader::Optimization::SsaRewritePass(program.post_order_blocks); + if (extra_id_removal) { + Shader::Optimization::IdentityRemovalPass(program.blocks); + } if (stage == Stage::Hull) { - Shader::Optimization::HullShaderTransform(program); + dumpMatchingIR("pre_hull"); + Shader::Optimization::HullShaderTransform(program, runtime_info); + dumpMatchingIR("post_hull"); } Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); - Shader::Optimization::RingAccessElimination(program, runtime_info, stage); + if (extra_id_removal) { + Shader::Optimization::IdentityRemovalPass(program.blocks); + } + dumpMatchingIR("pre_ring"); + Shader::Optimization::RingAccessElimination(program, runtime_info); + if (extra_id_removal) { + Shader::Optimization::IdentityRemovalPass(program.blocks); + } + dumpMatchingIR("post_ring"); if (stage != Stage::Compute) { Shader::Optimization::LowerSharedMemToRegisters(program); } diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 32b4f3ed9..808e734ac 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -29,6 +29,7 @@ enum class LogicalStage : u32 { TessellationControl, TessellationEval, Geometry, + GsCopy, Compute, }; @@ -86,8 +87,18 @@ struct VertexRuntimeInfo { struct HullRuntimeInfo { u32 output_control_points; + // trying to debug TODO probably delete this + u32 input_control_points; + u32 num_patches; + u32 num_instances; + u64 tess_factor_memory_base; + AmdGpu::TessellationType tess_type; + AmdGpu::TessellationTopology tess_topology; + AmdGpu::TessellationPartitioning tess_partitioning; - auto operator<=>(const HullRuntimeInfo&) const noexcept = default; + bool operator==(const HullRuntimeInfo& other) const noexcept { + return output_control_points == other.output_control_points; + } }; static constexpr auto GsMaxOutputStreams = 4u; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 16f22ec13..f9dbf71d3 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -591,6 +591,16 @@ struct Liverpool { BitField<2, 2, IndexSwapMode> swap_mode; }; + union MultiVgtParam { + u32 raw; + BitField<0, 16, u32> primgroup_size; + BitField<16, 1, u32> partial_vs_wave_on; + BitField<17, 1, u32> switch_on_eop; + BitField<18, 1, u32> partial_es_wave_on; + BitField<19, 1, u32> switch_on_eoi; + BitField<20, 1, u32> wd_switch_on_eop; + }; + union VgtNumInstances { u32 num_instances; @@ -1074,6 +1084,17 @@ struct Liverpool { BitField<5, 3, TessellationTopology> topology; }; + union TessFactorMemoryBase { + // TODO: was going to use this to check against UD used in tcs shader + // but only seen set to 0 + // Remove this and other added regs if they end up unused + u32 base; + + u64 MemoryBase() const { + return static_cast(base) << 8; + } + }; + union Eqaa { u32 raw; BitField<0, 1, u32> max_anchor_samples; @@ -1208,7 +1229,7 @@ struct Liverpool { INSERT_PADDING_WORDS(0xA2A8 - 0xA2A5 - 1); u32 vgt_instance_step_rate_0; u32 vgt_instance_step_rate_1; - INSERT_PADDING_WORDS(0xA2AB - 0xA2A9 - 1); + MultiVgtParam ia_multi_vgt_param; u32 vgt_esgs_ring_itemsize; u32 vgt_gsvs_ring_itemsize; INSERT_PADDING_WORDS(0xA2CE - 0xA2AC - 1); @@ -1232,6 +1253,8 @@ struct Liverpool { INSERT_PADDING_WORDS(0xC24C - 0xC243); u32 num_indices; VgtNumInstances num_instances; + INSERT_PADDING_WORDS(0xC250 - 0xC24D - 1); + TessFactorMemoryBase vgt_tf_memory_base; }; std::array reg_array{}; @@ -1456,6 +1479,7 @@ static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1); static_assert(GFX6_3D_REG_INDEX(enable_primitive_restart) == 0xA2A5); static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8); static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9); +static_assert(GFX6_3D_REG_INDEX(ia_multi_vgt_param) == 0xA2AA); static_assert(GFX6_3D_REG_INDEX(vgt_esgs_ring_itemsize) == 0xA2AB); static_assert(GFX6_3D_REG_INDEX(vgt_gsvs_ring_itemsize) == 0xA2AC); static_assert(GFX6_3D_REG_INDEX(vgt_gs_max_vert_out) == 0xA2CE); @@ -1473,6 +1497,7 @@ static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A); static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381); static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242); static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D); +static_assert(GFX6_3D_REG_INDEX(vgt_tf_memory_base) == 0xc250); #undef GFX6_3D_REG_INDEX diff --git a/src/video_core/amdgpu/types.h b/src/video_core/amdgpu/types.h index 4bffb9ce8..fa8491665 100644 --- a/src/video_core/amdgpu/types.h +++ b/src/video_core/amdgpu/types.h @@ -3,6 +3,8 @@ #pragma once +#include +#include #include "common/types.h" namespace AmdGpu { @@ -27,6 +29,19 @@ enum class TessellationType : u32 { Quad = 2, }; +constexpr std::string_view NameOf(TessellationType type) { + switch (type) { + case TessellationType::Isoline: + return "Isoline"; + case TessellationType::Triangle: + return "Triangle"; + case TessellationType::Quad: + return "Quad"; + default: + return "Unknown"; + } +} + enum class TessellationPartitioning : u32 { Integer = 0, Pow2 = 1, @@ -34,6 +49,21 @@ enum class TessellationPartitioning : u32 { FracEven = 3, }; +constexpr std::string_view NameOf(TessellationPartitioning partitioning) { + switch (partitioning) { + case TessellationPartitioning::Integer: + return "Integer"; + case TessellationPartitioning::Pow2: + return "Pow2"; + case TessellationPartitioning::FracOdd: + return "FracOdd"; + case TessellationPartitioning::FracEven: + return "FracEven"; + default: + return "Unknown"; + } +} + enum class TessellationTopology : u32 { Point = 0, Line = 1, @@ -41,6 +71,21 @@ enum class TessellationTopology : u32 { TriangleCcw = 3, }; +constexpr std::string_view NameOf(TessellationTopology topology) { + switch (topology) { + case TessellationTopology::Point: + return "Point"; + case TessellationTopology::Line: + return "Line"; + case TessellationTopology::TriangleCw: + return "TriangleCw"; + case TessellationTopology::TriangleCcw: + return "TriangleCcw"; + default: + return "Unknown"; + } +} + // See `VGT_PRIMITIVE_TYPE` description in [Radeon Sea Islands 3D/Compute Register Reference Guide] enum class PrimitiveType : u32 { None = 0, @@ -138,3 +183,33 @@ enum class NumberFormat : u32 { }; } // namespace AmdGpu + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(AmdGpu::TessellationType type, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type)); + } +}; + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(AmdGpu::TessellationPartitioning type, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type)); + } +}; + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(AmdGpu::TessellationTopology type, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type)); + } +}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 98c283fb8..4904b9d1c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -106,6 +106,11 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul key.primitive_restart_index == 0xFFFFFFFF, "Primitive restart index other than -1 is not supported yet"); + const vk::PipelineTessellationStateCreateInfo tessellation_state = { + // TODO how to handle optional member of graphics key when dynamic state not supported? + //.patchControlPoints = key. + }; + const vk::PipelineRasterizationStateCreateInfo raster_state = { .depthClampEnable = false, .rasterizerDiscardEnable = false, @@ -168,6 +173,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul } else { dynamic_states.push_back(vk::DynamicState::eVertexInputBindingStrideEXT); } + ASSERT(instance.IsPatchControlPointsDynamicState()); // TODO remove + if (instance.IsPatchControlPointsDynamicState()) { + dynamic_states.push_back(vk::DynamicState::ePatchControlPointsEXT); + } const vk::PipelineDynamicStateCreateInfo dynamic_info = { .dynamicStateCount = static_cast(dynamic_states.size()), @@ -317,6 +326,8 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pStages = shader_stages.data(), .pVertexInputState = !instance.IsVertexInputDynamicState() ? &vertex_input_info : nullptr, .pInputAssemblyState = &input_assembly, + .pTessellationState = + !instance.IsPatchControlPointsDynamicState() ? &tessellation_state : nullptr, .pViewportState = &viewport_info, .pRasterizationState = &raster_state, .pMultisampleState = &multisampling, diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 81784eb60..009e9a42e 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -258,6 +258,7 @@ bool Instance::CreateDevice() { add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); fragment_shader_barycentric = add_extension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME); + extended_dynamic_state_2 = add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME); // The next two extensions are required to be available together in order to support write masks color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME); diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 81303c9cc..844e1e6c0 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -133,6 +133,10 @@ public: return vertex_input_dynamic_state; } + bool IsPatchControlPointsDynamicState() const { + return extended_dynamic_state_2; + } + /// Returns true when the nullDescriptor feature of VK_EXT_robustness2 is supported. bool IsNullDescriptorSupported() const { return null_descriptor; @@ -333,6 +337,7 @@ private: bool debug_utils_supported{}; bool has_nsight_graphics{}; bool has_renderdoc{}; + bool extended_dynamic_state_2{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 87f13010d..c1d937059 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -98,6 +98,14 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_ case Stage::Hull: { BuildCommon(regs.hs_program); info.hs_info.output_control_points = regs.ls_hs_config.hs_output_control_points.Value(); + info.hs_info.input_control_points = regs.ls_hs_config.hs_input_control_points; + info.hs_info.num_patches = regs.ls_hs_config.num_patches; + // Suspicious about this in apparently "passthrough" hull shader. Probably not releva + info.hs_info.num_instances = regs.num_instances.NumInstances(); + info.hs_info.tess_factor_memory_base = regs.vgt_tf_memory_base.MemoryBase(); + info.hs_info.tess_type = regs.tess_config.type; + info.hs_info.tess_topology = regs.tess_config.topology; + info.hs_info.tess_partitioning = regs.tess_config.partitioning; break; } case Stage::Export: { @@ -236,6 +244,27 @@ const ComputePipeline* PipelineCache::GetComputePipeline() { return it->second.get(); } +bool ShouldSkipShader(u64 shader_hash, const char* shader_type) { + static std::vector skip_hashes = { + 0xbc234799 /* passthrough */, + 0x8453cd1c /* passthrough */, + 0xd67db0ef /* passthrough */, + 0x34121ac6 /* passthrough*/, + 0xa26750c1 /* passthrough, warp */, + 0xbb88db5f /* passthrough */, + 0x90c6fb05 /* passthrough */, + 0x9fd272d7 /* forbidden woods (not PS) */, + 0x2807dd6c /* forbidden woods, down elevator (not PS) */, + 0x627ac5b9 /* ayyylmao*, passthrough */, + 0xb5fb5174 /* rom (not PS) */, + }; + if (std::ranges::contains(skip_hashes, shader_hash)) { + LOG_WARNING(Render_Vulkan, "Skipped {} shader hash {:#x}.", shader_type, shader_hash); + return true; + } + return false; +} + bool PipelineCache::RefreshGraphicsKey() { std::memset(&graphics_key, 0, sizeof(GraphicsPipelineKey)); @@ -344,6 +373,10 @@ bool PipelineCache::RefreshGraphicsKey() { return false; } + if (ShouldSkipShader(bininfo->shader_hash, "graphics")) { + return false; + } + auto params = Liverpool::GetParams(*pgm); std::optional fetch_shader_; std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_, @@ -453,7 +486,7 @@ bool PipelineCache::RefreshGraphicsKey() { key.num_samples = num_samples; return true; -} +} // namespace Vulkan bool PipelineCache::RefreshComputeKey() { Shader::Backend::Bindings binding{}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index eb2ef3600..9e7a333de 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -4,6 +4,7 @@ #include "common/config.h" #include "common/debug.h" #include "core/memory.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" @@ -214,7 +215,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { return; } - const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); + const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex); const auto& fetch_shader = pipeline->GetFetchShader(); buffer_cache.BindVertexBuffers(vs_info, fetch_shader); const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset); @@ -271,7 +272,7 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 return; } - const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); + const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex); const auto& fetch_shader = pipeline->GetFetchShader(); buffer_cache.BindVertexBuffers(vs_info, fetch_shader); buffer_cache.BindIndexBuffer(is_indexed, 0); @@ -932,6 +933,11 @@ void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) { cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eBack, back.stencil_mask); } } + if (instance.IsPatchControlPointsDynamicState()) { + if (regs.primitive_type == AmdGpu::PrimitiveType::PatchPrimitive) { + cmdbuf.setPatchControlPointsEXT(regs.ls_hs_config.hs_input_control_points); + } + } } void Rasterizer::UpdateViewportScissorState() {