diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp index 7a4048bae..611225e8b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp @@ -18,9 +18,16 @@ void MemoryBarrier(EmitContext& ctx, spv::Scope scope) { void EmitBarrier(EmitContext& ctx) { const auto execution{spv::Scope::Workgroup}; - const auto memory{spv::Scope::Workgroup}; - const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease | - spv::MemorySemanticsMask::WorkgroupMemory}; + spv::Scope memory; + spv::MemorySemanticsMask memory_semantics; + if (ctx.l_stage == Shader::LogicalStage::TessellationControl) { + memory = spv::Scope::Invocation; + memory_semantics = spv::MemorySemanticsMask::MaskNone; + } else { + memory = spv::Scope::Workgroup; + memory_semantics = + spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::WorkgroupMemory; + } ctx.OpControlBarrier(ctx.ConstU32(static_cast(execution)), ctx.ConstU32(static_cast(memory)), ctx.ConstU32(static_cast(memory_semantics))); @@ -34,13 +41,4 @@ void EmitDeviceMemoryBarrier(EmitContext& ctx) { MemoryBarrier(ctx, spv::Scope::Device); } -void EmitTcsOutputBarrier(EmitContext& ctx) { - const auto execution{spv::Scope::Workgroup}; - const auto memory{spv::Scope::Invocation}; - const auto memory_semantics{spv::MemorySemanticsMask::MaskNone}; - ctx.OpControlBarrier(ctx.ConstU32(static_cast(execution)), - ctx.ConstU32(static_cast(memory)), - ctx.ConstU32(static_cast(memory_semantics))); -} - } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index ffaf2a637..f3db6af56 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -207,7 +207,7 @@ Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, I } Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) { - if (ctx.info.stage == Stage::Geometry) { + if (ctx.info.l_stage == LogicalStage::Geometry) { return EmitGetAttributeForGeometry(ctx, attr, comp, index); } else if (ctx.info.l_stage == LogicalStage::TessellationControl || ctx.info.l_stage == LogicalStage::TessellationEval) { @@ -363,7 +363,8 @@ void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id co Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) { const u32 index{IR::GenericPatchIndex(patch)}; const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))}; - const Id type{ctx.stage == Stage::Hull ? ctx.output_f32 : ctx.input_f32}; + const Id type{ctx.l_stage == LogicalStage::TessellationControl ? ctx.output_f32 + : ctx.input_f32}; const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)}; return ctx.OpLoad(ctx.F32[1], pointer); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 980b8870e..ab7e396dc 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -51,7 +51,6 @@ void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id arg0, Id arg1, Id arg2, void EmitBarrier(EmitContext& ctx); void EmitWorkgroupMemoryBarrier(EmitContext& ctx); void EmitDeviceMemoryBarrier(EmitContext& ctx); -void EmitTcsOutputBarrier(EmitContext& ctx); Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg); void EmitGetThreadBitScalarReg(EmitContext& ctx); void EmitSetThreadBitScalarReg(EmitContext& ctx); diff --git a/src/shader_recompiler/frontend/translate/scalar_flow.cpp b/src/shader_recompiler/frontend/translate/scalar_flow.cpp index fe9a5c8e8..ef8bab789 100644 --- a/src/shader_recompiler/frontend/translate/scalar_flow.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_flow.cpp @@ -35,12 +35,7 @@ void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) { } void Translator::S_BARRIER() { - if (info.l_stage == LogicalStage::TessellationControl) { - // TODO: ASSERT that we're in uniform control flow - ir.TcsOutputBarrier(); - } else { - ir.Barrier(); - } + ir.Barrier(); } void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) { diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index b216d325d..fc74fd0f5 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -134,10 +134,6 @@ void IREmitter::DeviceMemoryBarrier() { Inst(Opcode::DeviceMemoryBarrier); } -void IREmitter::TcsOutputBarrier() { - Inst(Opcode::TcsOutputBarrier); -} - U32 IREmitter::GetUserData(IR::ScalarReg reg) { ASSERT(static_cast(reg) < IR::NumScalarRegs); return Inst(Opcode::GetUserData, reg); diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 3bd6ef1ec..859ee8a7d 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -50,7 +50,6 @@ public: void Barrier(); void WorkgroupMemoryBarrier(); void DeviceMemoryBarrier(); - void TcsOutputBarrier(); [[nodiscard]] U32 GetUserData(IR::ScalarReg reg); [[nodiscard]] U1 GetThreadBitScalarReg(IR::ScalarReg reg); diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index 21b9f048e..6e7bbe661 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -44,7 +44,6 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::Barrier: case Opcode::WorkgroupMemoryBarrier: case Opcode::DeviceMemoryBarrier: - case Opcode::TcsOutputBarrier: case Opcode::ConditionRef: case Opcode::Reference: case Opcode::PhiMove: diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index ad2a23f76..71c686054 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -24,7 +24,6 @@ OPCODE(ReadConstBuffer, U32, Opaq OPCODE(Barrier, Void, ) OPCODE(WorkgroupMemoryBarrier, Void, ) OPCODE(DeviceMemoryBarrier, Void, ) -OPCODE(TcsOutputBarrier, Void, ) // Geometry shader specific OPCODE(EmitVertex, Void, ) diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index b95503357..00e4ef83a 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -65,38 +65,18 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info // Run optimization passes const auto stage = program.info.stage; - auto dumpMatchingIR = [&](std::string phase) { - if (Config::dumpShaders()) { - std::string s = IR::DumpProgram(program); - using namespace Common::FS; - const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps"; - if (!std::filesystem::exists(dump_dir)) { - std::filesystem::create_directories(dump_dir); - } - const auto filename = - fmt::format("{}_{:#018x}.{}.ir.txt", info.stage, info.pgm_hash, phase); - const auto file = IOFile{dump_dir / filename, FileAccessMode::Write}; - file.WriteString(s); - } - }; - - dumpMatchingIR("init"); - Shader::Optimization::SsaRewritePass(program.post_order_blocks); Shader::Optimization::IdentityRemovalPass(program.blocks); Shader::Optimization::ConstantPropagationPass( program.post_order_blocks); // TODO const fold spam for now while testing - if (stage == Stage::Hull) { + if (info.l_stage == LogicalStage::TessellationControl) { Shader::Optimization::TessellationPreprocess(program, runtime_info); Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); - dumpMatchingIR("pre_hull"); Shader::Optimization::HullShaderTransform(program, runtime_info); - dumpMatchingIR("post_hull"); } else if (info.l_stage == LogicalStage::TessellationEval) { Shader::Optimization::TessellationPreprocess(program, runtime_info); Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); Shader::Optimization::DomainShaderTransform(program, runtime_info); - dumpMatchingIR("post_domain"); } Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); Shader::Optimization::RingAccessElimination(program, runtime_info, stage); @@ -109,7 +89,6 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info Shader::Optimization::IdentityRemovalPass(program.blocks); Shader::Optimization::DeadCodeEliminationPass(program); Shader::Optimization::CollectShaderInfoPass(program); - dumpMatchingIR("final"); return program; } diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index ea1e1faa9..7addfb2ef 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -22,7 +22,6 @@ enum class Stage : u32 { Local, Compute, }; -constexpr u32 MaxStageTypes = 7; // Vertex intentionally comes after TCS/TES due to order of compilation enum class LogicalStage : u32 { @@ -31,11 +30,12 @@ enum class LogicalStage : u32 { TessellationEval, Vertex, Geometry, - GsCopy, // TODO delete this, but causes crash somehow (probably wrong use of Shader::Stage - // somewhere) Compute, + NumLogicalStages }; +constexpr u32 MaxStageTypes = static_cast(LogicalStage::NumLogicalStages); + [[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept { return static_cast(index); } diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 8acce8539..b6172d37b 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -598,16 +598,6 @@ struct Liverpool { BitField<2, 2, IndexSwapMode> swap_mode; }; - union MultiVgtParam { - u32 raw; - BitField<0, 16, u32> primgroup_size; - BitField<16, 1, u32> partial_vs_wave_on; - BitField<17, 1, u32> switch_on_eop; - BitField<18, 1, u32> partial_es_wave_on; - BitField<19, 1, u32> switch_on_eoi; - BitField<20, 1, u32> wd_switch_on_eop; - }; - union VgtNumInstances { u32 num_instances; @@ -982,7 +972,7 @@ struct Liverpool { BitField<3, 2, u32> es_en; BitField<5, 1, u32> gs_en; BitField<6, 2, u32> vs_en; - BitField<8, 24, u32> dynamic_hs; // TODO testing + BitField<8, 1, u32> dynamic_hs; bool IsStageEnabled(u32 stage) const { switch (stage) { @@ -1236,7 +1226,7 @@ struct Liverpool { INSERT_PADDING_WORDS(0xA2A8 - 0xA2A5 - 1); u32 vgt_instance_step_rate_0; u32 vgt_instance_step_rate_1; - MultiVgtParam ia_multi_vgt_param; + INSERT_PADDING_WORDS(0xA2AB - 0xA2A9 - 1); u32 vgt_esgs_ring_itemsize; u32 vgt_gsvs_ring_itemsize; INSERT_PADDING_WORDS(0xA2CE - 0xA2AC - 1); @@ -1487,7 +1477,6 @@ static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1); static_assert(GFX6_3D_REG_INDEX(enable_primitive_restart) == 0xA2A5); static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8); static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9); -static_assert(GFX6_3D_REG_INDEX(ia_multi_vgt_param) == 0xA2AA); static_assert(GFX6_3D_REG_INDEX(vgt_esgs_ring_itemsize) == 0xA2AB); static_assert(GFX6_3D_REG_INDEX(vgt_gsvs_ring_itemsize) == 0xA2AC); static_assert(GFX6_3D_REG_INDEX(vgt_gs_max_vert_out) == 0xA2CE); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 830f8ede9..14e78e410 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -20,8 +20,6 @@ namespace Vulkan { -using Shader::LogicalStage; // TODO - GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_, const GraphicsPipelineKey& key_, vk::PipelineCache pipeline_cache, @@ -33,7 +31,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul const vk::Device device = instance.GetDevice(); std::ranges::copy(infos, stages.begin()); BuildDescSetLayout(); - const bool uses_tessellation = stages[u32(LogicalStage::TessellationControl)]; + const bool uses_tessellation = stages[u32(Shader::LogicalStage::TessellationControl)]; const vk::PushConstantRange push_constants = { .stageFlags = gp_stage_flags, @@ -215,7 +213,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul boost::container::static_vector shader_stages; - auto stage = u32(LogicalStage::Vertex); + auto stage = u32(Shader::LogicalStage::Vertex); if (infos[stage]) { shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eVertex, @@ -223,7 +221,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pName = "main", }); } - stage = u32(LogicalStage::Geometry); + stage = u32(Shader::LogicalStage::Geometry); if (infos[stage]) { shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eGeometry, @@ -231,7 +229,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pName = "main", }); } - stage = u32(LogicalStage::TessellationControl); + stage = u32(Shader::LogicalStage::TessellationControl); if (infos[stage]) { shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eTessellationControl, @@ -239,7 +237,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pName = "main", }); } - stage = u32(LogicalStage::TessellationEval); + stage = u32(Shader::LogicalStage::TessellationEval); if (infos[stage]) { shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eTessellationEvaluation, @@ -247,7 +245,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pName = "main", }); } - stage = u32(LogicalStage::Fragment); + stage = u32(Shader::LogicalStage::Fragment); if (infos[stage]) { shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eFragment, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.h b/src/video_core/renderer_vulkan/vk_pipeline_common.h index 61ae318b6..1b13a1797 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_common.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.h @@ -38,6 +38,7 @@ public: } auto GetStages() const { + static_assert(static_cast(Shader::LogicalStage::Compute) == Shader::MaxStageTypes - 1); if (is_compute) { return std::span{stages.cend() - 1, stages.cend()}; } else {