From e23749bf0e23e5a9431565c8e08b90fa76ee8cda Mon Sep 17 00:00:00 2001 From: Vinicius Rangel Date: Sun, 21 Jul 2024 05:03:21 -0300 Subject: [PATCH] impl V_ADDC_U32 & V_MAD_U64_U32 --- .../backend/spirv/emit_spirv_instructions.h | 1 + .../backend/spirv/emit_spirv_integer.cpp | 4 + .../frontend/translate/translate.cpp | 6 ++ .../frontend/translate/translate.h | 2 + .../frontend/translate/vector_alu.cpp | 80 +++++++++++++++++++ src/shader_recompiler/ir/ir_emitter.cpp | 40 +++++++++- src/shader_recompiler/ir/ir_emitter.h | 4 +- src/shader_recompiler/ir/opcodes.inc | 3 + src/shader_recompiler/ir/value.h | 1 + 9 files changed, 138 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index e2b411e47..6c7a551ff 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -258,6 +258,7 @@ Id EmitISub64(EmitContext& ctx, Id a, Id b); Id EmitSMulExt(EmitContext& ctx, Id a, Id b); Id EmitUMulExt(EmitContext& ctx, Id a, Id b); Id EmitIMul32(EmitContext& ctx, Id a, Id b); +Id EmitIMul64(EmitContext& ctx, Id a, Id b); Id EmitSDiv32(EmitContext& ctx, Id a, Id b); Id EmitUDiv32(EmitContext& ctx, Id a, Id b); Id EmitINeg32(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index d5a0f2767..e2c73286f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -84,6 +84,10 @@ Id EmitIMul32(EmitContext& ctx, Id a, Id b) { return ctx.OpIMul(ctx.U32[1], a, b); } +Id EmitIMul64(EmitContext& ctx, Id a, Id b) { + return ctx.OpIMul(ctx.U64, a, b); +} + Id EmitSDiv32(EmitContext& ctx, Id a, Id b) { return ctx.OpSDiv(ctx.U32[1], a, b); } diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 6e5f7f8b0..abde32bef 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -319,6 +319,9 @@ void Translate(IR::Block* block, u32 block_base, std::span inst_l case Opcode::V_ADD_I32: translator.V_ADD_I32(inst); break; + case Opcode::V_ADDC_U32: + translator.V_ADDC_U32(inst); + break; case Opcode::V_CVT_F32_I32: translator.V_CVT_F32_I32(inst); break; @@ -469,6 +472,9 @@ void Translate(IR::Block* block, u32 block_base, std::span inst_l case Opcode::IMAGE_LOAD: translator.IMAGE_LOAD(false, inst); break; + case Opcode::V_MAD_U64_U32: + translator.V_MAD_U64_U32(inst); + break; case Opcode::V_CMP_GE_I32: translator.V_CMP_U32(ConditionOp::GE, true, false, inst); break; diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 2aa6f7124..287d7943e 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -100,6 +100,7 @@ public: void V_AND_B32(const GcnInst& inst); void V_LSHLREV_B32(const GcnInst& inst); void V_ADD_I32(const GcnInst& inst); + void V_ADDC_U32(const GcnInst& inst); void V_CVT_F32_I32(const GcnInst& inst); void V_CVT_F32_U32(const GcnInst& inst); void V_MAD_F32(const GcnInst& inst); @@ -129,6 +130,7 @@ public: void V_CVT_U32_F32(const GcnInst& inst); void V_SUBREV_F32(const GcnInst& inst); void V_SUBREV_I32(const GcnInst& inst); + void V_MAD_U64_U32(const GcnInst& inst); void V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst); void V_LSHRREV_B32(const GcnInst& inst); void V_MUL_HI_U32(bool is_signed, const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index ca648f882..036640364 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -92,6 +92,41 @@ void Translator::V_ADD_I32(const GcnInst& inst) { // TODO: Carry } +void Translator::V_ADDC_U32(const GcnInst& inst) { + IR::U32 src0; + const IR::Value src0_0{GetSrc(inst.src[0])}; + if (src0_0.Type() == IR::Type::F32 || src0_0.Type() == IR::Type::F64) { + src0 = ir.ConvertFToU(32, IR::F32F64(src0_0)); + } else if (src0_0.Type() == IR::Type::U32) { + src0 = IR::U32U64(src0_0); + } else { + UNREACHABLE(); + } + + IR::U32 src1; + const IR::Value src1_0{GetSrc(inst.src[1])}; + if (src1_0.Type() == IR::Type::F32 || src1_0.Type() == IR::Type::F64) { + src1 = ir.ConvertFToU(32, IR::F32F64(src1_0)); + } else if (src1_0.Type() == IR::Type::U32) { + src1 = IR::U32U64(src1_0); + } else { + UNREACHABLE(); + } + + IR::U32 scarry; + if (inst.src_count == 3) { // VOP3 + scarry = {GetSrc(inst.src[2])}; + } else { // VOP2 + scarry = {ir.GetVccLo()}; + } + + IR::U32 result = ir.IAdd(ir.IAdd(src0, src1), scarry); + + const IR::VectorReg dst_reg{inst.dst[0].code}; + ir.SetVectorReg(dst_reg, result); + ir.SetVcc(ir.IGreaterThan(result, ir.Imm32(0xFFFFFFFF), false)); +} + void Translator::V_CVT_F32_I32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::VectorReg dst_reg{inst.dst[0].code}; @@ -294,6 +329,51 @@ void Translator::V_SUBREV_I32(const GcnInst& inst) { // TODO: Carry-out } +void Translator::V_MAD_U64_U32(const GcnInst& inst) { + IR::U32 src0; + const IR::Value src0_0{GetSrc(inst.src[0])}; + if (src0_0.Type() == IR::Type::F32 || src0_0.Type() == IR::Type::F64) { + src0 = ir.ConvertFToU(32, IR::F32F64(src0_0)); + } else if (src0_0.Type() == IR::Type::U64) { + src0 = ir.UConvert(32, IR::U64(src0_0)); + } else if (src0_0.Type() == IR::Type::U32) { + src0 = IR::U32(src0_0); + } else { + UNREACHABLE(); + } + + IR::U32 src1; + const IR::Value src1_0{GetSrc(inst.src[1])}; + if (src1_0.Type() == IR::Type::F32 || src1_0.Type() == IR::Type::F64) { + src1 = ir.ConvertFToU(32, IR::F32F64(src1_0)); + } else if (src1_0.Type() == IR::Type::U64) { + src1 = ir.UConvert(32, IR::U64(src1_0)); + } else if (src1_0.Type() == IR::Type::U32) { + src1 = IR::U32(src1_0); + } else { + UNREACHABLE(); + } + + IR::U64 src2; + const IR::Value src2_0{GetSrc(inst.src[2])}; + if (src2_0.Type() == IR::Type::F32 || src2_0.Type() == IR::Type::F64) { + src2 = ir.ConvertFToU(64, IR::F32F64(src2_0)); + } else if (src2_0.Type() == IR::Type::U64) { + src2 = IR::U64(src2_0); + } else if (src2_0.Type() == IR::Type::U32) { + src2 = ir.UConvert(64, IR::U32(src2_0)); + } else { + UNREACHABLE(); + } + + IR::U64 result; + result = ir.IMul(src0, src1); + result = ir.IAdd(ir.UConvert(64, result), src2); + + const IR::VectorReg dst_reg{inst.dst[0].code}; + ir.SetVectorReg64(dst_reg, result); +} + void Translator::V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index cd4fdaa29..6b0be5631 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -168,11 +168,21 @@ void IREmitter::SetScalarReg(IR::ScalarReg reg, const U32F32& value) { Inst(Opcode::SetScalarRegister, reg, value_typed); } +void IREmitter::SetScalarReg64(IR::ScalarReg reg, const U64F64& value) { + const U64 value_typed = value.Type() == Type::F64 ? BitCast(F64{value}) : U64{value}; + Inst(Opcode::SetScalarRegister, reg, value_typed); +} + void IREmitter::SetVectorReg(IR::VectorReg reg, const U32F32& value) { const U32 value_typed = value.Type() == Type::F32 ? BitCast(F32{value}) : U32{value}; Inst(Opcode::SetVectorRegister, reg, value_typed); } +void IREmitter::SetVectorReg64(IR::VectorReg reg, const U64F64& value) { + const U64 value_typed = value.Type() == Type::F64 ? BitCast(F64{value}) : U64{value}; + Inst(Opcode::SetVectorRegister, reg, value); +} + U1 IREmitter::GetGotoVariable(u32 id) { return Inst(Opcode::GetGotoVariable, id); } @@ -964,8 +974,18 @@ IR::Value IREmitter::IMulExt(const U32& a, const U32& b, bool is_signed) { return Inst(is_signed ? Opcode::SMulExt : Opcode::UMulExt, a, b); } -U32 IREmitter::IMul(const U32& a, const U32& b) { - return Inst(Opcode::IMul32, a, b); +U32U64 IREmitter::IMul(const U32U64& a, const U32U64& b) { + if (a.Type() != b.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::U32: + return Inst(Opcode::IMul32, a, b); + case Type::U64: + return Inst(Opcode::IMul64, a, b); + default: + ThrowInvalidType(a.Type()); + } } U32 IREmitter::IDiv(const U32& a, const U32& b, bool is_signed) { @@ -1168,6 +1188,13 @@ U32U64 IREmitter::ConvertFToU(size_t bitsize, const F32F64& value) { default: ThrowInvalidType(value.Type()); } + case 64: + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::ConvertU64F32, value); + default: + ThrowInvalidType(value.Type()); + } default: UNREACHABLE_MSG("Invalid destination bitsize {}", bitsize); } @@ -1227,6 +1254,15 @@ U16U32U64 IREmitter::UConvert(size_t result_bitsize, const U16U32U64& value) { switch (value.Type()) { case Type::U32: return Inst(Opcode::ConvertU16U32, value); + default: + ThrowInvalidType(value.Type()); + } + case 64: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertU64U32, value); + default: + ThrowInvalidType(value.Type()); } } throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index e7512430a..4ce973ae1 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -57,7 +57,9 @@ public: template [[nodiscard]] T GetVectorReg(IR::VectorReg reg); void SetScalarReg(IR::ScalarReg reg, const U32F32& value); + void SetScalarReg64(IR::ScalarReg reg, const U64F64& value); void SetVectorReg(IR::VectorReg reg, const U32F32& value); + void SetVectorReg64(IR::VectorReg reg, const U64F64& value); [[nodiscard]] U1 GetGotoVariable(u32 id); void SetGotoVariable(u32 id, const U1& value); @@ -159,7 +161,7 @@ public: [[nodiscard]] Value IAddCary(const U32& a, const U32& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); [[nodiscard]] Value IMulExt(const U32& a, const U32& b, bool is_signed = false); - [[nodiscard]] U32 IMul(const U32& a, const U32& b); + [[nodiscard]] U32U64 IMul(const U32U64& a, const U32U64& b); [[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false); [[nodiscard]] U32U64 INeg(const U32U64& value); [[nodiscard]] U32 IAbs(const U32& value); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 9aefc8b39..f5b0ff362 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -227,6 +227,7 @@ OPCODE(IAddCary32, U32x2, U32, OPCODE(ISub32, U32, U32, U32, ) OPCODE(ISub64, U64, U64, U64, ) OPCODE(IMul32, U32, U32, U32, ) +OPCODE(IMul64, U64, U64, U64, ) OPCODE(SMulExt, U32x2, U32, U32, ) OPCODE(UMulExt, U32x2, U32, U32, ) OPCODE(SDiv32, U32, U32, U32, ) @@ -289,6 +290,8 @@ OPCODE(ConvertF64S32, F64, U32, OPCODE(ConvertF64U32, F64, U32, ) OPCODE(ConvertF32U16, F32, U16, ) OPCODE(ConvertU16U32, U16, U32, ) +OPCODE(ConvertU64U32, U64, U32, ) +OPCODE(ConvertU64F32, U64, F32, ) // Image operations OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) diff --git a/src/shader_recompiler/ir/value.h b/src/shader_recompiler/ir/value.h index a43c17f5b..db939eaa5 100644 --- a/src/shader_recompiler/ir/value.h +++ b/src/shader_recompiler/ir/value.h @@ -220,6 +220,7 @@ using F16 = TypedValue; using F32 = TypedValue; using F64 = TypedValue; using U32F32 = TypedValue; +using U64F64 = TypedValue; using U32U64 = TypedValue; using U16U32U64 = TypedValue; using F32F64 = TypedValue;