V_ALIGNBYTE_B32 and V_ALIGNBIT_B32 (#3316)

* implement V_ALIGNBYTE_B32 and V_ALIGNBIT_B32

* fix mask

* uncomment alignbit
This commit is contained in:
baggins183 2025-07-28 00:27:13 -07:00 committed by GitHub
parent df85efde7c
commit 116554e425
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 25 additions and 0 deletions

View File

@ -257,6 +257,8 @@ public:
void V_CVT_PK_I16_I32(const GcnInst& inst); void V_CVT_PK_I16_I32(const GcnInst& inst);
void V_CVT_PK_U8_F32(const GcnInst& inst); void V_CVT_PK_U8_F32(const GcnInst& inst);
void V_LSHL_B64(const GcnInst& inst); void V_LSHL_B64(const GcnInst& inst);
void V_ALIGNBIT_B32(const GcnInst& inst);
void V_ALIGNBYTE_B32(const GcnInst& inst);
void V_MUL_F64(const GcnInst& inst); void V_MUL_F64(const GcnInst& inst);
void V_MAX_F64(const GcnInst& inst); void V_MAX_F64(const GcnInst& inst);
void V_MUL_LO_U32(const GcnInst& inst); void V_MUL_LO_U32(const GcnInst& inst);

View File

@ -390,6 +390,10 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
return V_CVT_PK_U8_F32(inst); return V_CVT_PK_U8_F32(inst);
case Opcode::V_LSHL_B64: case Opcode::V_LSHL_B64:
return V_LSHL_B64(inst); return V_LSHL_B64(inst);
case Opcode::V_ALIGNBIT_B32:
return V_ALIGNBIT_B32(inst);
case Opcode::V_ALIGNBYTE_B32:
return V_ALIGNBYTE_B32(inst);
case Opcode::V_MUL_F64: case Opcode::V_MUL_F64:
return V_MUL_F64(inst); return V_MUL_F64(inst);
case Opcode::V_MAX_F64: case Opcode::V_MAX_F64:
@ -1296,6 +1300,25 @@ void Translator::V_LSHL_B64(const GcnInst& inst) {
SetDst64(inst.dst[0], ir.ShiftLeftLogical(src0, ir.BitwiseAnd(src1, ir.Imm64(u64(0x3F))))); SetDst64(inst.dst[0], ir.ShiftLeftLogical(src0, ir.BitwiseAnd(src1, ir.Imm64(u64(0x3F)))));
} }
void Translator::V_ALIGNBIT_B32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 src2{ir.BitwiseAnd(GetSrc(inst.src[2]), ir.Imm32(0x1F))};
const IR::U32 lo{ir.ShiftRightLogical(src1, src2)};
const IR::U32 hi{ir.ShiftLeftLogical(src0, ir.ISub(ir.Imm32(32), src2))};
SetDst(inst.dst[0], ir.BitwiseOr(lo, hi));
}
void Translator::V_ALIGNBYTE_B32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 src2{ir.BitwiseAnd(GetSrc(inst.src[2]), ir.Imm32(0x3))};
const IR::U32 shift{ir.ShiftLeftLogical(src2, ir.Imm32(3))};
const IR::U32 lo{ir.ShiftRightLogical(src1, shift)};
const IR::U32 hi{ir.ShiftLeftLogical(src0, ir.ISub(ir.Imm32(32), shift))};
SetDst(inst.dst[0], ir.BitwiseOr(lo, hi));
}
void Translator::V_MUL_F64(const GcnInst& inst) { void Translator::V_MUL_F64(const GcnInst& inst) {
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])}; const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
const IR::F64 src1{GetSrc64<IR::F64>(inst.src[1])}; const IR::F64 src1{GetSrc64<IR::F64>(inst.src[1])};