From 6a4cf2763a399267a50c20a85e917b5a25c0449b Mon Sep 17 00:00:00 2001 From: Frodo Baggins Date: Wed, 16 Oct 2024 09:51:45 -0700 Subject: [PATCH] fix compiler errors after merge DONT MERGE set log file to /dev/null DONT MERGE linux pthread bb fix save work DONT MERGE dump ir save more work fix mistake with ES shader skip list add input patch control points dynamic state random stuff --- src/common/logging/backend.cpp | 3 +- .../backend/spirv/emit_spirv.cpp | 5 +- .../backend/spirv/emit_spirv_barriers.cpp | 9 ++ .../spirv/emit_spirv_context_get_set.cpp | 18 +++- .../backend/spirv/emit_spirv_instructions.h | 3 +- .../backend/spirv/spirv_emit_context.cpp | 7 ++ .../backend/spirv/spirv_emit_context.h | 2 + .../frontend/translate/data_share.cpp | 8 +- .../frontend/translate/scalar_alu.cpp | 22 ++++ .../frontend/translate/translate.cpp | 23 +++- .../frontend/translate/translate.h | 1 + .../frontend/translate/vector_memory.cpp | 4 + src/shader_recompiler/ir/attribute.cpp | 8 ++ src/shader_recompiler/ir/attribute.h | 12 ++- src/shader_recompiler/ir/ir_emitter.cpp | 4 + src/shader_recompiler/ir/ir_emitter.h | 3 +- src/shader_recompiler/ir/microinstruction.cpp | 1 + src/shader_recompiler/ir/opcodes.inc | 1 + .../ir/passes/hull_shader_transform.cpp | 8 +- src/shader_recompiler/ir/passes/ir_passes.h | 5 +- .../ir/passes/ring_access_elimination.cpp | 101 +++++++++++++++--- src/shader_recompiler/recompiler.cpp | 40 ++++++- src/shader_recompiler/runtime_info.h | 13 ++- src/video_core/amdgpu/liverpool.h | 27 ++++- src/video_core/amdgpu/types.h | 75 +++++++++++++ .../renderer_vulkan/vk_graphics_pipeline.cpp | 11 ++ .../renderer_vulkan/vk_instance.cpp | 1 + src/video_core/renderer_vulkan/vk_instance.h | 5 + .../renderer_vulkan/vk_pipeline_cache.cpp | 35 +++++- .../renderer_vulkan/vk_rasterizer.cpp | 10 +- 30 files changed, 422 insertions(+), 43 deletions(-) diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index 7802977f5..082ce4221 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -62,7 +62,8 @@ private: class FileBackend { public: explicit FileBackend(const std::filesystem::path& filename) - : file{filename, FS::FileAccessMode::Write, FS::FileType::TextFile} {} + : file{std::filesystem::path("/dev/null"), FS::FileAccessMode::Write, + FS::FileType::TextFile} {} ~FileBackend() = default; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 9548cd5b0..39e5169d4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -1,5 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "shader_recompiler/runtime_info.h" #pragma clang optimize off #include #include @@ -285,6 +286,9 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct ctx.AddExtension("SPV_KHR_fragment_shader_barycentric"); ctx.AddCapability(spv::Capability::FragmentBarycentricKHR); } + if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) { + ctx.AddCapability(spv::Capability::Tessellation); + } } void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) { @@ -309,7 +313,6 @@ void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) { break; case LogicalStage::TessellationEval: { execution_model = spv::ExecutionModel::TessellationEvaluation; - ctx.AddCapability(spv::Capability::Tessellation); const auto& vs_info = ctx.runtime_info.vs_info; ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_type)); ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_partitioning)); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp index 22b3523aa..7a4048bae 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp @@ -34,4 +34,13 @@ void EmitDeviceMemoryBarrier(EmitContext& ctx) { MemoryBarrier(ctx, spv::Scope::Device); } +void EmitTcsOutputBarrier(EmitContext& ctx) { + const auto execution{spv::Scope::Workgroup}; + const auto memory{spv::Scope::Invocation}; + const auto memory_semantics{spv::MemorySemanticsMask::MaskNone}; + ctx.OpControlBarrier(ctx.ConstU32(static_cast(execution)), + ctx.ConstU32(static_cast(memory)), + ctx.ConstU32(static_cast(memory_semantics))); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index dde7b4806..f1e173371 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -2,6 +2,9 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/assert.h" +#include "shader_recompiler/ir/attribute.h" +#include "shader_recompiler/runtime_info.h" +#pragma clang optimize off #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" #include "shader_recompiler/ir/patch.h" @@ -273,8 +276,21 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) { return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value, ctx.u32_zero_value); case IR::Attribute::PrimitiveId: - ASSERT(ctx.info.stage == Stage::Geometry); + ASSERT(ctx.info.l_stage == LogicalStage::Geometry || + ctx.info.l_stage == LogicalStage::TessellationControl || + ctx.info.l_stage == LogicalStage::TessellationEval); return ctx.OpLoad(ctx.U32[1], ctx.primitive_id); + case IR::Attribute::InvocationId: + ASSERT(ctx.info.l_stage == LogicalStage::Geometry || + ctx.info.l_stage == LogicalStage::TessellationControl); + return ctx.OpLoad(ctx.U32[1], ctx.invocation_id); + case IR::Attribute::PatchVertices: + ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl); + return ctx.OpLoad(ctx.U32[1], ctx.patch_vertices); + case IR::Attribute::PackedHullInvocationInfo: + // TODO figure out what to do with this + // should be dead code, but otherwise return 0 or concat PrimitiveId and InvocationId + return ctx.u32_zero_value; default: UNREACHABLE_MSG("Read U32 attribute {}", attr); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 27f8e5a91..29ffb916a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -28,8 +28,6 @@ Id EmitConditionRef(EmitContext& ctx, const IR::Value& value); void EmitReference(EmitContext&); void EmitPhiMove(EmitContext&); void EmitJoin(EmitContext& ctx); -void EmitWorkgroupMemoryBarrier(EmitContext& ctx); -void EmitDeviceMemoryBarrier(EmitContext& ctx); void EmitGetScc(EmitContext& ctx); void EmitGetExec(EmitContext& ctx); void EmitGetVcc(EmitContext& ctx); @@ -53,6 +51,7 @@ void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id arg0, Id arg1, Id arg2, void EmitBarrier(EmitContext& ctx); void EmitWorkgroupMemoryBarrier(EmitContext& ctx); void EmitDeviceMemoryBarrier(EmitContext& ctx); +void EmitTcsOutputBarrier(EmitContext& ctx); Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg); void EmitGetThreadBitScalarReg(EmitContext& ctx); void EmitSetThreadBitScalarReg(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 874081fc9..81376c4f0 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -386,6 +386,13 @@ void EmitContext::DefineInputs() { } break; } + case LogicalStage::TessellationControl: { + invocation_id = + DefineVariable(U32[3], spv::BuiltIn::InvocationId, spv::StorageClass::Input); + patch_vertices = + DefineVariable(U32[1], spv::BuiltIn::PatchVertices, spv::StorageClass::Input); + break; + } case LogicalStage::TessellationEval: { tess_coord = DefineInput(F32[3], std::nullopt, spv::BuiltIn::TessCoord); break; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index ea2ca725f..497aa1d0f 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -193,6 +193,7 @@ public: Id clip_distances{}; Id cull_distances{}; + Id patch_vertices{}; Id output_tess_level_outer{}; Id output_tess_level_inner{}; Id tess_coord; @@ -200,6 +201,7 @@ public: Id workgroup_id{}; Id local_invocation_id{}; + Id invocation_id{}; // for instanced geoshaders or output vertices within TCS patch Id subgroup_local_invocation_id{}; Id image_u32{}; diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index be5bf273e..2c3cd167f 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -1,5 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "shader_recompiler/runtime_info.h" #pragma clang optimize off #include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/ir/reg.h" @@ -73,10 +74,11 @@ void Translator::EmitDataShare(const GcnInst& inst) { void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) { const IR::U32 value{GetSrc(inst.src[0])}; - if (info.stage != Stage::Compute) { - SetDst(inst.dst[0], value); - } else { + if (info.l_stage == LogicalStage::Compute || + info.l_stage == LogicalStage::TessellationControl) { SetDst(inst.dst[0], ir.ReadFirstLane(value)); + } else { + SetDst(inst.dst[0], value); } } diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 5b411d83e..549464580 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include +#include "common/assert.h" #include "shader_recompiler/frontend/translate/translate.h" namespace Shader::Gcn { @@ -78,6 +80,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { return S_BFM_B32(inst); case Opcode::S_MUL_I32: return S_MUL_I32(inst); + case Opcode::S_BFE_I32: + return S_BFE_I32(inst); case Opcode::S_BFE_U32: return S_BFE_U32(inst); case Opcode::S_ABSDIFF_I32: @@ -444,6 +448,24 @@ void Translator::S_BFE_U32(const GcnInst& inst) { ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); } +void Translator::S_BFE_I32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + IR::U32 result; + + ASSERT_MSG(src1.IsImmediate(), "Unhandled S_BFE_I32 with non-immediate mask"); + u32 mask = src1.U32(); + ASSERT(mask != 0); + u32 offset = std::countr_zero(mask); + u32 count = std::popcount(mask); + mask = mask >> offset; + ASSERT_MSG((mask & (mask + 1)) == 0, "mask {} has non-adjacent bits set"); + + result = ir.BitFieldExtract(src0, ir.Imm32(offset), ir.Imm32(count), true); + SetDst(inst.dst[0], result); + ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); +} + void Translator::S_ABSDIFF_I32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index d9e92cb78..84f79bb5f 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -8,6 +8,8 @@ #include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/info.h" +#include "shader_recompiler/ir/attribute.h" +#include "shader_recompiler/ir/reg.h" #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/types.h" @@ -51,7 +53,7 @@ void Translator::EmitPrologue() { ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId)); } break; - case Stage::Fragment: + case LogicalStage::Fragment: dst_vreg = IR::VectorReg::V0; if (runtime_info.fs_info.addr_flags.persp_sample_ena) { ++dst_vreg; // I @@ -121,15 +123,28 @@ void Translator::EmitPrologue() { } } break; - case LogicalStage::TessellationControl: - ir.SetVectorReg(IR::VectorReg::V0, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); + case LogicalStage::TessellationControl: { + ir.SetVectorReg(IR::VectorReg::V1, + ir.GetAttributeU32(IR::Attribute::PackedHullInvocationInfo)); + // Test + // ir.SetPatch(IR::Patch::TessellationLodLeft, ir.Imm32(1.0f)); + // ir.SetPatch(IR::Patch::TessellationLodTop, ir.Imm32(1.0f)); + // ir.SetPatch(IR::Patch::TessellationLodRight, ir.Imm32(1.0f)); + // ir.SetPatch(IR::Patch::TessellationLodBottom, ir.Imm32(1.0f)); + // ir.SetPatch(IR::Patch::TessellationLodInteriorU, ir.Imm32(1.0f)); + // ir.SetPatch(IR::Patch::TessellationLodInteriorV, ir.Imm32(1.0f)); break; + } case LogicalStage::TessellationEval: ir.SetVectorReg(IR::VectorReg::V0, ir.GetAttribute(IR::Attribute::TessellationEvaluationPointU)); ir.SetVectorReg(IR::VectorReg::V1, ir.GetAttribute(IR::Attribute::TessellationEvaluationPointV)); - ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); + // I think V2 is actually the patch id within the patches running on the local CU, used in + // compiler generated address calcs, + // and V3 is the patch id within the draw + ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::TessPatchIdInVgt)); + ir.SetVectorReg(IR::VectorReg::V3, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); break; case LogicalStage::Compute: ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 0)); diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 069e2908c..72263b3bf 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -95,6 +95,7 @@ public: void S_BFM_B32(const GcnInst& inst); void S_MUL_I32(const GcnInst& inst); void S_BFE_U32(const GcnInst& inst); + void S_BFE_I32(const GcnInst& inst); void S_ABSDIFF_I32(const GcnInst& inst); void S_NOT_B32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 636a473d1..b0d7b8b72 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -251,6 +251,10 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst "Non immediate offset not supported"); } + if (info.stage == Stage::Hull) { + // printf("here\n"); // break + } + IR::Value address = [&] -> IR::Value { if (is_ring) { return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset); diff --git a/src/shader_recompiler/ir/attribute.cpp b/src/shader_recompiler/ir/attribute.cpp index 503144782..12edb28dc 100644 --- a/src/shader_recompiler/ir/attribute.cpp +++ b/src/shader_recompiler/ir/attribute.cpp @@ -104,6 +104,8 @@ std::string NameOf(Attribute attribute) { return "VertexId"; case Attribute::InstanceId: return "InstanceId"; + case Attribute::PrimitiveId: + return "PrimitiveId"; case Attribute::FragCoord: return "FragCoord"; case Attribute::IsFrontFace: @@ -114,6 +116,12 @@ std::string NameOf(Attribute attribute) { return "LocalInvocationId"; case Attribute::LocalInvocationIndex: return "LocalInvocationIndex"; + case Attribute::InvocationId: + return "InvocationId"; + case Attribute::PackedHullInvocationInfo: + return "PackedHullInvocationInfo"; + case Attribute::PatchVertices: + return "PatchVertices"; case Attribute::TessellationEvaluationPointU: return "TessellationEvaluationPointU"; case Attribute::TessellationEvaluationPointV: diff --git a/src/shader_recompiler/ir/attribute.h b/src/shader_recompiler/ir/attribute.h index 9b68fd119..3d3e48923 100644 --- a/src/shader_recompiler/ir/attribute.h +++ b/src/shader_recompiler/ir/attribute.h @@ -72,10 +72,14 @@ enum class Attribute : u64 { LocalInvocationId = 75, LocalInvocationIndex = 76, FragCoord = 77, - InstanceId0 = 78, // step rate 0 - InstanceId1 = 79, // step rate 1 - TessellationEvaluationPointU = 80, - TessellationEvaluationPointV = 81, + InstanceId0 = 78, // step rate 0 + InstanceId1 = 79, // step rate 1 + InvocationId = 80, // TCS id in output patch and instanced geometry shader id + PackedHullInvocationInfo = + 81, // PrimitiveId (patch id) and InvocationId (output control point id) + PatchVertices = 82, + TessellationEvaluationPointU = 83, + TessellationEvaluationPointV = 84, Max, }; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index dda247050..25cb9b2b3 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -134,6 +134,10 @@ void IREmitter::DeviceMemoryBarrier() { Inst(Opcode::DeviceMemoryBarrier); } +void IREmitter::TcsOutputBarrier() { + Inst(Opcode::TcsOutputBarrier); +} + U32 IREmitter::GetUserData(IR::ScalarReg reg) { ASSERT(static_cast(reg) < IR::NumScalarRegs); return Inst(Opcode::GetUserData, reg); diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index d13c6e935..00e81d65a 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -10,8 +10,8 @@ #include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/condition.h" -#include "shader_recompiler/ir/value.h" #include "shader_recompiler/ir/patch.h" +#include "shader_recompiler/ir/value.h" namespace Shader::IR { @@ -50,6 +50,7 @@ public: void Barrier(); void WorkgroupMemoryBarrier(); void DeviceMemoryBarrier(); + void TcsOutputBarrier(); [[nodiscard]] U32 GetUserData(IR::ScalarReg reg); [[nodiscard]] U1 GetThreadBitScalarReg(IR::ScalarReg reg); diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index 7f36f44d7..9f3ccd52f 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -44,6 +44,7 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::Barrier: case Opcode::WorkgroupMemoryBarrier: case Opcode::DeviceMemoryBarrier: + case Opcode::TcsOutputBarrier: case Opcode::ConditionRef: case Opcode::Reference: case Opcode::PhiMove: diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 6004a03b5..2d63b6f20 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -24,6 +24,7 @@ OPCODE(ReadConstBuffer, U32, Opaq OPCODE(Barrier, Void, ) OPCODE(WorkgroupMemoryBarrier, Void, ) OPCODE(DeviceMemoryBarrier, Void, ) +OPCODE(TcsOutputBarrier, Void, ) // Geometry shader specific OPCODE(EmitVertex, Void, ) diff --git a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp index bd1094792..d27c75bd1 100644 --- a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp +++ b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp @@ -1,6 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#pragma clang optimize off #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/program.h" @@ -60,9 +59,14 @@ namespace Shader::Optimization { * NOTE: This pass must be run before constant propagation as it relies on relatively specific * pattern matching that might be mutated that that optimization pass. * + * TODO: need to be careful about reading from output arrays at idx other than InvocationID + * Need SPIRV OpControlBarrier + * "Wait for all active invocations within the specified Scope to reach the current point of + * execution." + * Must be placed in uniform control flow */ -void HullShaderTransform(const IR::Program& program) { +void HullShaderTransform(const IR::Program& program, const RuntimeInfo& runtime_info) { LOG_INFO(Render_Vulkan, "{}", IR::DumpProgram(program)); for (IR::Block* block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index 3cb5e11a3..5228006ed 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -16,8 +16,7 @@ void FlattenExtendedUserdataPass(IR::Program& program); void ResourceTrackingPass(IR::Program& program); void CollectShaderInfoPass(IR::Program& program); void LowerSharedMemToRegisters(IR::Program& program); -void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info, - Stage stage); -void HullShaderTransform(const IR::Program& program); +void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info); +void HullShaderTransform(const IR::Program& program, const RuntimeInfo& runtime_info); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp index 345bdbf31..207d82e6f 100644 --- a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp +++ b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp @@ -1,18 +1,89 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/assert.h" +#include "shader_recompiler/info.h" #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/opcodes.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/recompiler.h" +#include "shader_recompiler/runtime_info.h" + +namespace { + +// TODO clean this up. Maybe remove +// from https://github.com/chaotic-cx/mesa-mirror/blob/main/src/amd/compiler/README.md +// basically logical stage x hw stage permutations +enum class SwHwStagePerm { + vertex_vs, + fragment_fs, + vertex_ls, + tess_control_hs, + tess_eval_vs, + vertex_es, + geometry_gs, + gs_copy_vs, + tess_eval_es, + compute_cs, +}; + +static SwHwStagePerm GetSwHwStagePerm(Shader::Stage hw_stage, Shader::LogicalStage sw_stage) { + using namespace Shader; + switch (sw_stage) { + case LogicalStage::Fragment: + ASSERT(hw_stage == Stage::Fragment); + return SwHwStagePerm::fragment_fs; + case LogicalStage::Vertex: { + switch (hw_stage) { + case Stage::Vertex: + return SwHwStagePerm::vertex_vs; + case Stage::Export: + return SwHwStagePerm::vertex_es; + case Stage::Local: + return SwHwStagePerm::vertex_ls; + default: + UNREACHABLE(); + } + } break; + case LogicalStage::TessellationControl: + ASSERT(hw_stage == Stage::Hull); + return SwHwStagePerm::tess_control_hs; + case LogicalStage::TessellationEval: { + switch (hw_stage) { + case Stage::Vertex: + return SwHwStagePerm::tess_eval_vs; + case Stage::Export: + return SwHwStagePerm::tess_eval_es; + default: + UNREACHABLE(); + } + } + case LogicalStage::Geometry: + ASSERT(hw_stage == Stage::Geometry); + return SwHwStagePerm::geometry_gs; + case LogicalStage::GsCopy: + ASSERT(hw_stage == Stage::Vertex); + return SwHwStagePerm::gs_copy_vs; + case LogicalStage::Compute: + ASSERT(hw_stage == Stage::Compute); + return SwHwStagePerm::compute_cs; + default: + UNREACHABLE(); + } +} + +}; // namespace namespace Shader::Optimization { -void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info, - Stage stage) { +void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info) { auto& info = program.info; + Stage stage = info.stage; + LogicalStage l_stage = info.l_stage; + SwHwStagePerm stage_perm = GetSwHwStagePerm(stage, l_stage); + const auto& ForEachInstruction = [&](auto func) { for (IR::Block* block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { @@ -22,38 +93,40 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim } }; - switch (stage) { - case Stage::Local: { + switch (stage_perm) { + case SwHwStagePerm::vertex_ls: { ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) { const auto opcode = inst.GetOpcode(); switch (opcode) { - case IR::Opcode::WriteSharedU64: { + case IR::Opcode::WriteSharedU64: + case IR::Opcode::WriteSharedU32: { + bool is_composite = opcode == IR::Opcode::WriteSharedU64; + u32 num_components = opcode == IR::Opcode::WriteSharedU32 ? 1 : 2; + u32 offset = 0; const auto* addr = inst.Arg(0).InstRecursive(); if (addr->GetOpcode() == IR::Opcode::IAdd32) { ASSERT(addr->Arg(1).IsImmediate()); offset = addr->Arg(1).U32(); } - const IR::Inst* pair = inst.Arg(1).InstRecursive(); - for (s32 i = 0; i < 2; i++) { + IR::Value data = inst.Arg(1).Resolve(); + for (s32 i = 0; i < num_components; i++) { const auto attrib = IR::Attribute::Param0 + (offset / 16); const auto comp = (offset / 4) % 4; - const IR::U32 value = IR::U32{pair->Arg(i)}; + const IR::U32 value = IR::U32{is_composite ? data.Inst()->Arg(i) : data}; ir.SetAttribute(attrib, ir.BitCast(value), comp); offset += 4; } inst.Invalidate(); break; } - case IR::Opcode::WriteSharedU32: - UNREACHABLE(); default: break; } }); break; } - case Stage::Export: { + case SwHwStagePerm::vertex_es: { ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) { const auto opcode = inst.GetOpcode(); switch (opcode) { @@ -84,7 +157,7 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim }); break; } - case Stage::Geometry: { + case SwHwStagePerm::geometry_gs: { const auto& gs_info = runtime_info.gs_info; info.gs_copy_data = Shader::ParseCopyShader(gs_info.vs_copy); @@ -112,8 +185,8 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim break; } case IR::Opcode::StoreBufferU32: { - const auto info = inst.Flags(); - if (!info.system_coherent || !info.globally_coherent) { + const auto buffer_info = inst.Flags(); + if (!buffer_info.system_coherent || !buffer_info.globally_coherent) { break; } diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index aee69f73b..399b08a2a 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -1,6 +1,9 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/config.h" +#include "common/io_file.h" +#include "common/path_util.h" #include "shader_recompiler/frontend/control_flow_graph.h" #include "shader_recompiler/frontend/decode.h" #include "shader_recompiler/frontend/structured_control_flow.h" @@ -61,12 +64,45 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info // Run optimization passes const auto stage = program.info.stage; + + bool dump_ir = true; + bool extra_id_removal = true; // TODO remove all this stuff + auto dumpMatchingIR = [&](std::string phase) { + if (dump_ir) { + if (Config::dumpShaders()) { + std::string s = IR::DumpProgram(program); + using namespace Common::FS; + const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps"; + if (!std::filesystem::exists(dump_dir)) { + std::filesystem::create_directories(dump_dir); + } + const auto filename = + fmt::format("{}_{:#018x}.{}.ir.txt", info.stage, info.pgm_hash, phase); + const auto file = IOFile{dump_dir / filename, FileAccessMode::Write}; + file.WriteString(s); + } + } + }; + Shader::Optimization::SsaRewritePass(program.post_order_blocks); + if (extra_id_removal) { + Shader::Optimization::IdentityRemovalPass(program.blocks); + } if (stage == Stage::Hull) { - Shader::Optimization::HullShaderTransform(program); + dumpMatchingIR("pre_hull"); + Shader::Optimization::HullShaderTransform(program, runtime_info); + dumpMatchingIR("post_hull"); } Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); - Shader::Optimization::RingAccessElimination(program, runtime_info, stage); + if (extra_id_removal) { + Shader::Optimization::IdentityRemovalPass(program.blocks); + } + dumpMatchingIR("pre_ring"); + Shader::Optimization::RingAccessElimination(program, runtime_info); + if (extra_id_removal) { + Shader::Optimization::IdentityRemovalPass(program.blocks); + } + dumpMatchingIR("post_ring"); if (stage != Stage::Compute) { Shader::Optimization::LowerSharedMemToRegisters(program); } diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 32b4f3ed9..808e734ac 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -29,6 +29,7 @@ enum class LogicalStage : u32 { TessellationControl, TessellationEval, Geometry, + GsCopy, Compute, }; @@ -86,8 +87,18 @@ struct VertexRuntimeInfo { struct HullRuntimeInfo { u32 output_control_points; + // trying to debug TODO probably delete this + u32 input_control_points; + u32 num_patches; + u32 num_instances; + u64 tess_factor_memory_base; + AmdGpu::TessellationType tess_type; + AmdGpu::TessellationTopology tess_topology; + AmdGpu::TessellationPartitioning tess_partitioning; - auto operator<=>(const HullRuntimeInfo&) const noexcept = default; + bool operator==(const HullRuntimeInfo& other) const noexcept { + return output_control_points == other.output_control_points; + } }; static constexpr auto GsMaxOutputStreams = 4u; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 16f22ec13..f9dbf71d3 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -591,6 +591,16 @@ struct Liverpool { BitField<2, 2, IndexSwapMode> swap_mode; }; + union MultiVgtParam { + u32 raw; + BitField<0, 16, u32> primgroup_size; + BitField<16, 1, u32> partial_vs_wave_on; + BitField<17, 1, u32> switch_on_eop; + BitField<18, 1, u32> partial_es_wave_on; + BitField<19, 1, u32> switch_on_eoi; + BitField<20, 1, u32> wd_switch_on_eop; + }; + union VgtNumInstances { u32 num_instances; @@ -1074,6 +1084,17 @@ struct Liverpool { BitField<5, 3, TessellationTopology> topology; }; + union TessFactorMemoryBase { + // TODO: was going to use this to check against UD used in tcs shader + // but only seen set to 0 + // Remove this and other added regs if they end up unused + u32 base; + + u64 MemoryBase() const { + return static_cast(base) << 8; + } + }; + union Eqaa { u32 raw; BitField<0, 1, u32> max_anchor_samples; @@ -1208,7 +1229,7 @@ struct Liverpool { INSERT_PADDING_WORDS(0xA2A8 - 0xA2A5 - 1); u32 vgt_instance_step_rate_0; u32 vgt_instance_step_rate_1; - INSERT_PADDING_WORDS(0xA2AB - 0xA2A9 - 1); + MultiVgtParam ia_multi_vgt_param; u32 vgt_esgs_ring_itemsize; u32 vgt_gsvs_ring_itemsize; INSERT_PADDING_WORDS(0xA2CE - 0xA2AC - 1); @@ -1232,6 +1253,8 @@ struct Liverpool { INSERT_PADDING_WORDS(0xC24C - 0xC243); u32 num_indices; VgtNumInstances num_instances; + INSERT_PADDING_WORDS(0xC250 - 0xC24D - 1); + TessFactorMemoryBase vgt_tf_memory_base; }; std::array reg_array{}; @@ -1456,6 +1479,7 @@ static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1); static_assert(GFX6_3D_REG_INDEX(enable_primitive_restart) == 0xA2A5); static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8); static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9); +static_assert(GFX6_3D_REG_INDEX(ia_multi_vgt_param) == 0xA2AA); static_assert(GFX6_3D_REG_INDEX(vgt_esgs_ring_itemsize) == 0xA2AB); static_assert(GFX6_3D_REG_INDEX(vgt_gsvs_ring_itemsize) == 0xA2AC); static_assert(GFX6_3D_REG_INDEX(vgt_gs_max_vert_out) == 0xA2CE); @@ -1473,6 +1497,7 @@ static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A); static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381); static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242); static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D); +static_assert(GFX6_3D_REG_INDEX(vgt_tf_memory_base) == 0xc250); #undef GFX6_3D_REG_INDEX diff --git a/src/video_core/amdgpu/types.h b/src/video_core/amdgpu/types.h index 4bffb9ce8..fa8491665 100644 --- a/src/video_core/amdgpu/types.h +++ b/src/video_core/amdgpu/types.h @@ -3,6 +3,8 @@ #pragma once +#include +#include #include "common/types.h" namespace AmdGpu { @@ -27,6 +29,19 @@ enum class TessellationType : u32 { Quad = 2, }; +constexpr std::string_view NameOf(TessellationType type) { + switch (type) { + case TessellationType::Isoline: + return "Isoline"; + case TessellationType::Triangle: + return "Triangle"; + case TessellationType::Quad: + return "Quad"; + default: + return "Unknown"; + } +} + enum class TessellationPartitioning : u32 { Integer = 0, Pow2 = 1, @@ -34,6 +49,21 @@ enum class TessellationPartitioning : u32 { FracEven = 3, }; +constexpr std::string_view NameOf(TessellationPartitioning partitioning) { + switch (partitioning) { + case TessellationPartitioning::Integer: + return "Integer"; + case TessellationPartitioning::Pow2: + return "Pow2"; + case TessellationPartitioning::FracOdd: + return "FracOdd"; + case TessellationPartitioning::FracEven: + return "FracEven"; + default: + return "Unknown"; + } +} + enum class TessellationTopology : u32 { Point = 0, Line = 1, @@ -41,6 +71,21 @@ enum class TessellationTopology : u32 { TriangleCcw = 3, }; +constexpr std::string_view NameOf(TessellationTopology topology) { + switch (topology) { + case TessellationTopology::Point: + return "Point"; + case TessellationTopology::Line: + return "Line"; + case TessellationTopology::TriangleCw: + return "TriangleCw"; + case TessellationTopology::TriangleCcw: + return "TriangleCcw"; + default: + return "Unknown"; + } +} + // See `VGT_PRIMITIVE_TYPE` description in [Radeon Sea Islands 3D/Compute Register Reference Guide] enum class PrimitiveType : u32 { None = 0, @@ -138,3 +183,33 @@ enum class NumberFormat : u32 { }; } // namespace AmdGpu + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(AmdGpu::TessellationType type, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type)); + } +}; + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(AmdGpu::TessellationPartitioning type, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type)); + } +}; + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(AmdGpu::TessellationTopology type, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type)); + } +}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 98c283fb8..4904b9d1c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -106,6 +106,11 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul key.primitive_restart_index == 0xFFFFFFFF, "Primitive restart index other than -1 is not supported yet"); + const vk::PipelineTessellationStateCreateInfo tessellation_state = { + // TODO how to handle optional member of graphics key when dynamic state not supported? + //.patchControlPoints = key. + }; + const vk::PipelineRasterizationStateCreateInfo raster_state = { .depthClampEnable = false, .rasterizerDiscardEnable = false, @@ -168,6 +173,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul } else { dynamic_states.push_back(vk::DynamicState::eVertexInputBindingStrideEXT); } + ASSERT(instance.IsPatchControlPointsDynamicState()); // TODO remove + if (instance.IsPatchControlPointsDynamicState()) { + dynamic_states.push_back(vk::DynamicState::ePatchControlPointsEXT); + } const vk::PipelineDynamicStateCreateInfo dynamic_info = { .dynamicStateCount = static_cast(dynamic_states.size()), @@ -317,6 +326,8 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pStages = shader_stages.data(), .pVertexInputState = !instance.IsVertexInputDynamicState() ? &vertex_input_info : nullptr, .pInputAssemblyState = &input_assembly, + .pTessellationState = + !instance.IsPatchControlPointsDynamicState() ? &tessellation_state : nullptr, .pViewportState = &viewport_info, .pRasterizationState = &raster_state, .pMultisampleState = &multisampling, diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 81784eb60..009e9a42e 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -258,6 +258,7 @@ bool Instance::CreateDevice() { add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); fragment_shader_barycentric = add_extension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME); + extended_dynamic_state_2 = add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME); // The next two extensions are required to be available together in order to support write masks color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME); diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 81303c9cc..844e1e6c0 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -133,6 +133,10 @@ public: return vertex_input_dynamic_state; } + bool IsPatchControlPointsDynamicState() const { + return extended_dynamic_state_2; + } + /// Returns true when the nullDescriptor feature of VK_EXT_robustness2 is supported. bool IsNullDescriptorSupported() const { return null_descriptor; @@ -333,6 +337,7 @@ private: bool debug_utils_supported{}; bool has_nsight_graphics{}; bool has_renderdoc{}; + bool extended_dynamic_state_2{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 87f13010d..c1d937059 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -98,6 +98,14 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_ case Stage::Hull: { BuildCommon(regs.hs_program); info.hs_info.output_control_points = regs.ls_hs_config.hs_output_control_points.Value(); + info.hs_info.input_control_points = regs.ls_hs_config.hs_input_control_points; + info.hs_info.num_patches = regs.ls_hs_config.num_patches; + // Suspicious about this in apparently "passthrough" hull shader. Probably not releva + info.hs_info.num_instances = regs.num_instances.NumInstances(); + info.hs_info.tess_factor_memory_base = regs.vgt_tf_memory_base.MemoryBase(); + info.hs_info.tess_type = regs.tess_config.type; + info.hs_info.tess_topology = regs.tess_config.topology; + info.hs_info.tess_partitioning = regs.tess_config.partitioning; break; } case Stage::Export: { @@ -236,6 +244,27 @@ const ComputePipeline* PipelineCache::GetComputePipeline() { return it->second.get(); } +bool ShouldSkipShader(u64 shader_hash, const char* shader_type) { + static std::vector skip_hashes = { + 0xbc234799 /* passthrough */, + 0x8453cd1c /* passthrough */, + 0xd67db0ef /* passthrough */, + 0x34121ac6 /* passthrough*/, + 0xa26750c1 /* passthrough, warp */, + 0xbb88db5f /* passthrough */, + 0x90c6fb05 /* passthrough */, + 0x9fd272d7 /* forbidden woods (not PS) */, + 0x2807dd6c /* forbidden woods, down elevator (not PS) */, + 0x627ac5b9 /* ayyylmao*, passthrough */, + 0xb5fb5174 /* rom (not PS) */, + }; + if (std::ranges::contains(skip_hashes, shader_hash)) { + LOG_WARNING(Render_Vulkan, "Skipped {} shader hash {:#x}.", shader_type, shader_hash); + return true; + } + return false; +} + bool PipelineCache::RefreshGraphicsKey() { std::memset(&graphics_key, 0, sizeof(GraphicsPipelineKey)); @@ -344,6 +373,10 @@ bool PipelineCache::RefreshGraphicsKey() { return false; } + if (ShouldSkipShader(bininfo->shader_hash, "graphics")) { + return false; + } + auto params = Liverpool::GetParams(*pgm); std::optional fetch_shader_; std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_, @@ -453,7 +486,7 @@ bool PipelineCache::RefreshGraphicsKey() { key.num_samples = num_samples; return true; -} +} // namespace Vulkan bool PipelineCache::RefreshComputeKey() { Shader::Backend::Bindings binding{}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index eb2ef3600..9e7a333de 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -4,6 +4,7 @@ #include "common/config.h" #include "common/debug.h" #include "core/memory.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" @@ -214,7 +215,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { return; } - const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); + const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex); const auto& fetch_shader = pipeline->GetFetchShader(); buffer_cache.BindVertexBuffers(vs_info, fetch_shader); const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset); @@ -271,7 +272,7 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 return; } - const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); + const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex); const auto& fetch_shader = pipeline->GetFetchShader(); buffer_cache.BindVertexBuffers(vs_info, fetch_shader); buffer_cache.BindIndexBuffer(is_indexed, 0); @@ -932,6 +933,11 @@ void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) { cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eBack, back.stencil_mask); } } + if (instance.IsPatchControlPointsDynamicState()) { + if (regs.primitive_type == AmdGpu::PrimitiveType::PatchPrimitive) { + cmdbuf.setPatchControlPointsEXT(regs.ls_hs_config.hs_input_control_points); + } + } } void Rasterizer::UpdateViewportScissorState() {