From 5e35a306072116b6846b2416364e72bc6eb1c34b Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Tue, 30 Jul 2024 19:42:14 +0300 Subject: [PATCH] vector_alu: Proper V_MBCNT_U32_B32 --- .../backend/spirv/emit_spirv_instructions.h | 1 + .../backend/spirv/emit_spirv_warp.cpp | 4 +++ .../backend/spirv/spirv_emit_context.cpp | 11 ++++--- .../backend/spirv/spirv_emit_context.h | 1 + .../frontend/structured_control_flow.cpp | 13 ++++---- .../frontend/structured_control_flow.h | 3 +- .../frontend/translate/translate.cpp | 9 +++--- .../frontend/translate/translate.h | 8 +++-- .../frontend/translate/vector_alu.cpp | 31 +++++++++++++++++++ src/shader_recompiler/ir/ir_emitter.cpp | 4 +++ src/shader_recompiler/ir/ir_emitter.h | 1 + src/shader_recompiler/ir/opcodes.inc | 1 + src/shader_recompiler/profile.h | 1 + src/shader_recompiler/recompiler.cpp | 4 +-- src/shader_recompiler/recompiler.h | 5 ++- .../renderer_vulkan/vk_instance.cpp | 5 ++- src/video_core/renderer_vulkan/vk_instance.h | 6 ++++ .../renderer_vulkan/vk_pipeline_cache.cpp | 7 +++-- 18 files changed, 91 insertions(+), 24 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index b1ab1ad8b..51899eb4d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -390,6 +390,7 @@ Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitLaneId(EmitContext& ctx); +Id EmitWarpId(EmitContext& ctx); Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index a17515887..bd4ac0668 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -10,6 +10,10 @@ Id SubgroupScope(EmitContext& ctx) { return ctx.ConstU32(static_cast(spv::Scope::Subgroup)); } +Id EmitWarpId(EmitContext& ctx) { + return ctx.OpLoad(ctx.U32[1], ctx.subgroup_id); +} + Id EmitLaneId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 6d8de362c..32d511c32 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -127,6 +127,7 @@ Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) { case AmdGpu::NumberFormat::Float: case AmdGpu::NumberFormat::Unorm: case AmdGpu::NumberFormat::Snorm: + case AmdGpu::NumberFormat::SnormNz: return ctx.F32[4]; case AmdGpu::NumberFormat::Sint: return ctx.S32[4]; @@ -147,6 +148,7 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f case AmdGpu::NumberFormat::Float: case AmdGpu::NumberFormat::Unorm: case AmdGpu::NumberFormat::Snorm: + case AmdGpu::NumberFormat::SnormNz: return {id, input_f32, F32[1], 4}; case AmdGpu::NumberFormat::Uint: return {id, input_u32, U32[1], 4}; @@ -223,11 +225,10 @@ void EmitContext::DefineInputs(const Info& info) { break; } case Stage::Fragment: - if (info.uses_group_quad) { - subgroup_local_invocation_id = DefineVariable( - U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input); - Decorate(subgroup_local_invocation_id, spv::Decoration::Flat); - } + subgroup_id = DefineVariable(U32[1], spv::BuiltIn::SubgroupId, spv::StorageClass::Input); + subgroup_local_invocation_id = DefineVariable( + U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input); + Decorate(subgroup_local_invocation_id, spv::Decoration::Flat); frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input); frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output); front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 2aa1bf780..34c13d3f9 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -180,6 +180,7 @@ public: Id workgroup_id{}; Id local_invocation_id{}; + Id subgroup_id{}; Id subgroup_local_invocation_id{}; Id image_u32{}; diff --git a/src/shader_recompiler/frontend/structured_control_flow.cpp b/src/shader_recompiler/frontend/structured_control_flow.cpp index 346f00aa4..798a52431 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/structured_control_flow.cpp @@ -600,13 +600,13 @@ public: TranslatePass(ObjectPool& inst_pool_, ObjectPool& block_pool_, ObjectPool& stmt_pool_, Statement& root_stmt, IR::AbstractSyntaxList& syntax_list_, std::span inst_list_, - Info& info_) + Info& info_, const Profile& profile_) : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, - syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_} { + syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_}, profile{profile_} { Visit(root_stmt, nullptr, nullptr); IR::Block& first_block{*syntax_list.front().data.block}; - Translator{&first_block, info}.EmitPrologue(); + Translator{&first_block, info, profile}.EmitPrologue(); } private: @@ -635,7 +635,7 @@ private: const u32 start = stmt.block->begin_index; const u32 size = stmt.block->end_index - start + 1; Translate(current_block, stmt.block->begin, inst_list.subspan(start, size), - info); + info, profile); } break; } @@ -815,16 +815,17 @@ private: const Block dummy_flow_block{.is_dummy = true}; std::span inst_list; Info& info; + const Profile& profile; }; } // Anonymous namespace IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, ObjectPool& block_pool, - CFG& cfg, Info& info) { + CFG& cfg, Info& info, const Profile& profile) { ObjectPool stmt_pool{64}; GotoPass goto_pass{cfg, stmt_pool}; Statement& root{goto_pass.RootStatement()}; IR::AbstractSyntaxList syntax_list; - TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, info}; + TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, info, profile}; ASSERT_MSG(!info.translation_failed, "Shader translation has failed"); return syntax_list; } diff --git a/src/shader_recompiler/frontend/structured_control_flow.h b/src/shader_recompiler/frontend/structured_control_flow.h index 09814349c..f58ae9c1a 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.h +++ b/src/shader_recompiler/frontend/structured_control_flow.h @@ -11,12 +11,13 @@ namespace Shader { struct Info; +struct Profile; } namespace Shader::Gcn { [[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, ObjectPool& block_pool, CFG& cfg, - Info& info); + Info& info, const Profile& profile); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 5988683bc..25cc55863 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -16,8 +16,8 @@ namespace Shader::Gcn { -Translator::Translator(IR::Block* block_, Info& info_) - : ir{*block_, block_->begin()}, info{info_} {} +Translator::Translator(IR::Block* block_, Info& info_, const Profile& profile_) + : ir{*block_, block_->begin()}, info{info_}, profile{profile_} {} void Translator::EmitPrologue() { ir.Prologue(); @@ -487,11 +487,12 @@ void Translator::LogMissingOpcode(const GcnInst& inst) { info.translation_failed = true; } -void Translate(IR::Block* block, u32 pc, std::span inst_list, Info& info) { +void Translate(IR::Block* block, u32 pc, std::span inst_list, + Info& info, const Profile& profile) { if (inst_list.empty()) { return; } - Translator translator{block, info}; + Translator translator{block, info, profile}; for (const auto& inst : inst_list) { pc += inst.length; diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 1a0726af0..f57040d53 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -11,6 +11,7 @@ namespace Shader { struct Info; +struct Profile; } namespace Shader::Gcn { @@ -53,7 +54,7 @@ enum class NegateMode : u32 { class Translator { public: - explicit Translator(IR::Block* block_, Info& info); + explicit Translator(IR::Block* block_, Info& info, const Profile& profile); // Instruction categories void EmitPrologue(); @@ -176,6 +177,7 @@ public: void V_CVT_FLR_I32_F32(const GcnInst& inst); void V_CMP_CLASS_F32(const GcnInst& inst); void V_FFBL_B32(const GcnInst& inst); + void V_MBCNT_U32_B32(bool is_low, const GcnInst& inst); // Vector Memory void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst); @@ -216,9 +218,11 @@ private: private: IR::IREmitter ir; Info& info; + const Profile& profile; bool opcode_missing = false; }; -void Translate(IR::Block* block, u32 block_base, std::span inst_list, Info& info); +void Translate(IR::Block* block, u32 block_base, std::span inst_list, + Info& info, const Profile& profile); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 839e74720..11fffbebc 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "shader_recompiler/profile.h" #include "shader_recompiler/frontend/translate/translate.h" namespace Shader::Gcn { @@ -292,6 +293,11 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_CMP_U32(ConditionOp::GE, false, true, inst); case Opcode::V_CMPX_TRU_U32: return V_CMP_U32(ConditionOp::TRU, false, true, inst); + + case Opcode::V_MBCNT_LO_U32_B32: + return V_MBCNT_U32_B32(true, inst); + case Opcode::V_MBCNT_HI_U32_B32: + return V_MBCNT_U32_B32(false, inst); default: LogMissingOpcode(inst); } @@ -910,4 +916,29 @@ void Translator::V_FFBL_B32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FindILsb(src0)); } +void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 lane_id = ir.LaneId(); + + const auto [warp_half, mask_shift] = [&]() -> std::pair { + if (profile.subgroup_size == 32) { + const IR::U32 warp_half = ir.BitwiseAnd(ir.WarpId(), ir.Imm32(1)); + return std::make_pair(warp_half, lane_id); + } + const IR::U32 warp_half = ir.ShiftRightLogical(lane_id, ir.Imm32(5)); + const IR::U32 mask_shift = ir.BitwiseAnd(lane_id, ir.Imm32(0x1F)); + return std::make_pair(warp_half, mask_shift); + }(); + + const IR::U32 thread_mask = ir.ISub(ir.ShiftLeftLogical(ir.Imm32(1), mask_shift), ir.Imm32(1)); + const IR::U1 is_odd_warp = ir.INotEqual(warp_half, ir.Imm32(0)); + const IR::U32 mask = IR::U32{ir.Select(is_odd_warp, + is_low ? ir.Imm32(~0U) : thread_mask, + is_low ? thread_mask : ir.Imm32(0))}; + const IR::U32 masked_value = ir.BitwiseAnd(src0, mask); + const IR::U32 result = ir.IAdd(src1, ir.BitCount(masked_value)); + SetDst(inst.dst[0], result); +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index c06ce2813..03404aca0 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -373,6 +373,10 @@ U32 IREmitter::LaneId() { return Inst(Opcode::LaneId); } +U32 IREmitter::WarpId() { + return Inst(Opcode::WarpId); +} + U32 IREmitter::QuadShuffle(const U32& value, const U32& index) { return Inst(Opcode::QuadShuffle, value, index); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index ecebb75e6..a65e46136 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -95,6 +95,7 @@ public: BufferInstInfo info); [[nodiscard]] U32 LaneId(); + [[nodiscard]] U32 WarpId(); [[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 28b145511..aa2fd3f82 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -326,4 +326,5 @@ OPCODE(ImageAtomicExchange32, U32, Opaq // Warp operations OPCODE(LaneId, U32, ) +OPCODE(WarpId, U32, ) OPCODE(QuadShuffle, U32, U32, U32 ) diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 54b347300..badd54554 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -9,6 +9,7 @@ namespace Shader { struct Profile { u32 supported_spirv{0x00010000}; + u32 subgroup_size{}; bool unified_descriptor_binding{}; bool support_descriptor_aliasing{}; bool support_int8{}; diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index f0c3e16a9..0773958d9 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -28,7 +28,7 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { } IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, - std::span token, const Info&& info) { + std::span token, const Info&& info, const Profile& profile) { // Ensure first instruction is expected. constexpr u32 token_mov_vcchi = 0xBEEB03FF; ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm"); @@ -49,7 +49,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& inst_pool, ObjectPool& block_pool, - std::span code, const Info&& info); + std::span code, const Info&& info, + const Profile& profile); } // namespace Shader diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 0355aea72..0b6b2e00c 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -167,7 +167,10 @@ bool Instance::CreateDevice() { const vk::StructureChain properties_chain = physical_device.getProperties2(); + vk::PhysicalDeviceExternalMemoryHostPropertiesEXT, + vk::PhysicalDeviceVulkan11Properties>(); + subgroup_size = properties_chain.get().subgroupSize; + LOG_INFO(Render_Vulkan, "Physical device subgroup size {}", subgroup_size); features = feature_chain.get().features; if (available_extensions.empty()) { diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 32965ddb1..a8c0dcf45 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -188,6 +188,11 @@ public: return properties.limits.nonCoherentAtomSize; } + /// Returns the subgroup size of the selected physical device. + u32 SubgroupSize() const { + return subgroup_size; + } + /// Returns the maximum supported elements in a texel buffer u32 MaxTexelBufferElements() const { return properties.limits.maxTexelBufferElements; @@ -249,6 +254,7 @@ private: bool workgroup_memory_explicit_layout{}; bool color_write_en{}; u64 min_imported_host_pointer_alignment{}; + u32 subgroup_size{}; bool tooling_info{}; bool debug_utils_supported{}; bool has_nsight_graphics{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 67994485a..4a37cdba4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -109,6 +109,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, pipeline_cache = instance.GetDevice().createPipelineCacheUnique({}); profile = Shader::Profile{ .supported_spirv = 0x00010600U, + .subgroup_size = instance.SubgroupSize(), .support_explicit_workgroup_layout = true, }; } @@ -268,7 +269,8 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs); info.pgm_base = pgm->Address(); info.pgm_hash = hash; - programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info)); + programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info), + profile); // Compile IR to SPIR-V auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i], binding); @@ -308,7 +310,8 @@ std::unique_ptr PipelineCache::CreateComputePipeline() { Shader::Info info = MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs); info.pgm_base = cs_pgm.Address(); - auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info)); + auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info), + profile); // Compile IR to SPIR-V u32 binding{};