From 116554e42534f7375836d6349cb1467dc9b3bc9a Mon Sep 17 00:00:00 2001 From: baggins183 Date: Mon, 28 Jul 2025 00:27:13 -0700 Subject: [PATCH] V_ALIGNBYTE_B32 and V_ALIGNBIT_B32 (#3316) * implement V_ALIGNBYTE_B32 and V_ALIGNBIT_B32 * fix mask * uncomment alignbit --- .../frontend/translate/translate.h | 2 ++ .../frontend/translate/vector_alu.cpp | 23 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index ae904b822..d90806728 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -257,6 +257,8 @@ public: void V_CVT_PK_I16_I32(const GcnInst& inst); void V_CVT_PK_U8_F32(const GcnInst& inst); void V_LSHL_B64(const GcnInst& inst); + void V_ALIGNBIT_B32(const GcnInst& inst); + void V_ALIGNBYTE_B32(const GcnInst& inst); void V_MUL_F64(const GcnInst& inst); void V_MAX_F64(const GcnInst& inst); void V_MUL_LO_U32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 017c77fb0..20e49708f 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -390,6 +390,10 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_CVT_PK_U8_F32(inst); case Opcode::V_LSHL_B64: return V_LSHL_B64(inst); + case Opcode::V_ALIGNBIT_B32: + return V_ALIGNBIT_B32(inst); + case Opcode::V_ALIGNBYTE_B32: + return V_ALIGNBYTE_B32(inst); case Opcode::V_MUL_F64: return V_MUL_F64(inst); case Opcode::V_MAX_F64: @@ -1296,6 +1300,25 @@ void Translator::V_LSHL_B64(const GcnInst& inst) { SetDst64(inst.dst[0], ir.ShiftLeftLogical(src0, ir.BitwiseAnd(src1, ir.Imm64(u64(0x3F))))); } +void Translator::V_ALIGNBIT_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 src2{ir.BitwiseAnd(GetSrc(inst.src[2]), ir.Imm32(0x1F))}; + const IR::U32 lo{ir.ShiftRightLogical(src1, src2)}; + const IR::U32 hi{ir.ShiftLeftLogical(src0, ir.ISub(ir.Imm32(32), src2))}; + SetDst(inst.dst[0], ir.BitwiseOr(lo, hi)); +} + +void Translator::V_ALIGNBYTE_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 src2{ir.BitwiseAnd(GetSrc(inst.src[2]), ir.Imm32(0x3))}; + const IR::U32 shift{ir.ShiftLeftLogical(src2, ir.Imm32(3))}; + const IR::U32 lo{ir.ShiftRightLogical(src1, shift)}; + const IR::U32 hi{ir.ShiftLeftLogical(src0, ir.ISub(ir.Imm32(32), shift))}; + SetDst(inst.dst[0], ir.BitwiseOr(lo, hi)); +} + void Translator::V_MUL_F64(const GcnInst& inst) { const IR::F64 src0{GetSrc64(inst.src[0])}; const IR::F64 src1{GetSrc64(inst.src[1])};