From ed84d7460ca91e6fcc417cae85f3e37d9a77c835 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Wed, 16 Oct 2024 21:51:25 +0300 Subject: [PATCH] shader_recompiler: Implement more opcodes --- .../frontend/translate/scalar_alu.cpp | 24 +++++++++++++++---- .../frontend/translate/translate.h | 3 ++- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 36c1ec85f..1e627d95c 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -92,8 +92,12 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { break; case Opcode::S_BREV_B32: return S_BREV_B32(inst); + case Opcode::S_BCNT1_I32_B64: + return S_BCNT1_I32_B64(inst); case Opcode::S_AND_SAVEEXEC_B64: - return S_AND_SAVEEXEC_B64(inst); + return S_SAVEEXEC_B64(NegateMode::None, false, inst); + case Opcode::S_ORN2_SAVEEXEC_B64: + return S_SAVEEXEC_B64(NegateMode::Src1, true, inst); default: LogMissingOpcode(inst); } @@ -540,11 +544,17 @@ void Translator::S_BREV_B32(const GcnInst& inst) { SetDst(inst.dst[0], ir.BitReverse(GetSrc(inst.src[0]))); } -void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) { +void Translator::S_BCNT1_I32_B64(const GcnInst& inst) { + const IR::U32 result = ir.BitCount(GetSrc(inst.src[0])); + SetDst(inst.dst[0], result); + ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); +} + +void Translator::S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst) { // This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs) // However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination // SGPR we have a special IR opcode for SPGRs that act as thread masks. - const IR::U1 exec{ir.GetExec()}; + IR::U1 exec{ir.GetExec()}; const IR::U1 src = [&] { switch (inst.src[0].field) { case OperandField::VccLo: @@ -568,7 +578,13 @@ void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) { } // Update EXEC. - const IR::U1 result = ir.LogicalAnd(exec, src); + if (negate == NegateMode::Src1) { + exec = ir.LogicalNot(exec); + } + IR::U1 result = is_or ? ir.LogicalOr(exec, src) : ir.LogicalAnd(exec, src); + if (negate == NegateMode::Result) { + result = ir.LogicalNot(result); + } ir.SetExec(result); ir.SetScc(result); } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index b70d4b829..be9c4d62c 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -108,8 +108,9 @@ public: void S_MOV_B64(const GcnInst& inst); void S_NOT_B64(const GcnInst& inst); void S_BREV_B32(const GcnInst& inst); + void S_BCNT1_I32_B64(const GcnInst& inst); void S_GETPC_B64(u32 pc, const GcnInst& inst); - void S_AND_SAVEEXEC_B64(const GcnInst& inst); + void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst); // SOPC void S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst);