diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 2f606eb45..55a76da99 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -304,6 +304,7 @@ Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); Id EmitBitReverse32(EmitContext& ctx, Id value); Id EmitBitCount32(EmitContext& ctx, Id value); +Id EmitBitCount64(EmitContext& ctx, Id value); Id EmitBitwiseNot32(EmitContext& ctx, Id value); Id EmitFindSMsb32(EmitContext& ctx, Id value); Id EmitFindUMsb32(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 02af92385..323cf9b2f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -201,6 +201,10 @@ Id EmitBitCount32(EmitContext& ctx, Id value) { return ctx.OpBitCount(ctx.U32[1], value); } +Id EmitBitCount64(EmitContext& ctx, Id value) { + return ctx.OpBitCount(ctx.U64, value); +} + Id EmitBitwiseNot32(EmitContext& ctx, Id value) { return ctx.OpNot(ctx.U32[1], value); } diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index f96fd0f40..181cad62e 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -100,6 +100,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { return S_BREV_B32(inst); case Opcode::S_BCNT1_I32_B32: return S_BCNT1_I32_B32(inst); + case Opcode::S_BCNT1_I32_B64: + return S_BCNT1_I32_B64(inst); case Opcode::S_FF1_I32_B32: return S_FF1_I32_B32(inst); case Opcode::S_AND_SAVEEXEC_B64: @@ -585,6 +587,12 @@ void Translator::S_BCNT1_I32_B32(const GcnInst& inst) { ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); } +void Translator::S_BCNT1_I32_B64(const GcnInst& inst) { + const IR::U32 result = ir.BitCount(GetSrc64(inst.src[0])); + SetDst(inst.dst[0], result); + ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); +} + void Translator::S_FF1_I32_B32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 result{ir.Select(ir.IEqual(src0, ir.Imm32(0U)), ir.Imm32(-1), ir.FindILsb(src0))}; diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index fd4d8d86a..52ea31d66 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -111,6 +111,7 @@ public: void S_NOT_B64(const GcnInst& inst); void S_BREV_B32(const GcnInst& inst); void S_BCNT1_I32_B32(const GcnInst& inst); + void S_BCNT1_I32_B64(const GcnInst& inst); void S_FF1_I32_B32(const GcnInst& inst); void S_GETPC_B64(u32 pc, const GcnInst& inst); void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst); diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index c241ec984..044f2aa01 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -1273,8 +1273,15 @@ U32 IREmitter::BitReverse(const U32& value) { return Inst(Opcode::BitReverse32, value); } -U32 IREmitter::BitCount(const U32& value) { - return Inst(Opcode::BitCount32, value); +U32 IREmitter::BitCount(const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::BitCount32, value); + case Type::U64: + return Inst(Opcode::BitCount64, value); + default: + ThrowInvalidType(value.Type()); + } } U32 IREmitter::BitwiseNot(const U32& value) { diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 4cf44107e..35a60413e 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -229,7 +229,7 @@ public: [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, bool is_signed = false); [[nodiscard]] U32 BitReverse(const U32& value); - [[nodiscard]] U32 BitCount(const U32& value); + [[nodiscard]] U32 BitCount(const U32U64& value); [[nodiscard]] U32 BitwiseNot(const U32& value); [[nodiscard]] U32 FindSMsb(const U32& value); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index aafd43ea8..769b76ef4 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -284,6 +284,7 @@ OPCODE(BitFieldSExtract, U32, U32, OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) OPCODE(BitReverse32, U32, U32, ) OPCODE(BitCount32, U32, U32, ) +OPCODE(BitCount64, U32, U64, ) OPCODE(BitwiseNot32, U32, U32, ) OPCODE(FindSMsb32, U32, U32, )