mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-23 18:45:36 +00:00
* shader_recompiler: Tessellation WIP * fix compiler errors after merge DONT MERGE set log file to /dev/null DONT MERGE linux pthread bb fix save work DONT MERGE dump ir save more work fix mistake with ES shader skip list add input patch control points dynamic state random stuff * WIP Tessellation partial implementation. Squash commits * test: make local/tcs use attr arrays * attr arrays in TCS/TES * dont define empty attr arrays * switch to special opcodes for tess tcs/tes reads and tcs writes * impl tcs/tes read attr insts * rebase fix * save some work * save work probably broken and slow * put Vertex LogicalStage after TCS and TES to fix bindings * more refactors * refactor pattern matching and optimize modulos (disabled) * enable modulo opt * copyright * rebase fixes * remove some prints * remove some stuff * Add TCS/TES support for shader patching and use LogicalStage * refactor and handle wider DS instructions * get rid of GetAttributes for special tess constants reads. Immediately replace some upon seeing readconstbuffer. Gets rid of some extra passes over IR * stop relying on GNMX HsConstants struct. Change runtime_info.hs_info and some regs * delete some more stuff * update comments for current implementation * some cleanup * uint error * more cleanup * remove patch control points dynamic state (because runtime_info already depends on it) * fix potential problem with determining passthrough --------- Co-authored-by: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com>
1316 lines
46 KiB
C++
1316 lines
46 KiB
C++
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
#include "shader_recompiler/frontend/opcodes.h"
|
|
#include "shader_recompiler/frontend/translate/translate.h"
|
|
|
|
namespace Shader::Gcn {
|
|
|
|
void Translator::EmitVectorAlu(const GcnInst& inst) {
|
|
switch (inst.opcode) {
|
|
// VOP2
|
|
case Opcode::V_CNDMASK_B32:
|
|
return V_CNDMASK_B32(inst);
|
|
case Opcode::V_READLANE_B32:
|
|
return V_READLANE_B32(inst);
|
|
case Opcode::V_WRITELANE_B32:
|
|
return V_WRITELANE_B32(inst);
|
|
case Opcode::V_ADD_F32:
|
|
return V_ADD_F32(inst);
|
|
case Opcode::V_SUB_F32:
|
|
return V_SUB_F32(inst);
|
|
case Opcode::V_SUBREV_F32:
|
|
return V_SUBREV_F32(inst);
|
|
case Opcode::V_MAC_LEGACY_F32:
|
|
return V_MAC_F32(inst);
|
|
case Opcode::V_MUL_LEGACY_F32:
|
|
return V_MUL_F32(inst);
|
|
case Opcode::V_MUL_F32:
|
|
return V_MUL_F32(inst);
|
|
case Opcode::V_MUL_I32_I24:
|
|
return V_MUL_I32_I24(inst);
|
|
case Opcode::V_MUL_U32_U24:
|
|
return V_MUL_I32_I24(inst);
|
|
case Opcode::V_MIN_LEGACY_F32:
|
|
return V_MIN_F32(inst, true);
|
|
case Opcode::V_MAX_LEGACY_F32:
|
|
return V_MAX_F32(inst, true);
|
|
case Opcode::V_MIN_F32:
|
|
return V_MIN_F32(inst, false);
|
|
case Opcode::V_MAX_F32:
|
|
return V_MAX_F32(inst);
|
|
case Opcode::V_MIN_I32:
|
|
return V_MIN_I32(inst);
|
|
case Opcode::V_MAX_I32:
|
|
return V_MAX_U32(true, inst);
|
|
case Opcode::V_MIN_U32:
|
|
return V_MIN_U32(inst);
|
|
case Opcode::V_MAX_U32:
|
|
return V_MAX_U32(false, inst);
|
|
case Opcode::V_LSHR_B32:
|
|
return V_LSHR_B32(inst);
|
|
case Opcode::V_LSHRREV_B32:
|
|
return V_LSHRREV_B32(inst);
|
|
case Opcode::V_ASHR_I32:
|
|
return V_ASHR_I32(inst);
|
|
case Opcode::V_ASHRREV_I32:
|
|
return V_ASHRREV_I32(inst);
|
|
case Opcode::V_LSHL_B32:
|
|
return V_LSHL_B32(inst);
|
|
case Opcode::V_LSHLREV_B32:
|
|
return V_LSHLREV_B32(inst);
|
|
case Opcode::V_AND_B32:
|
|
return V_AND_B32(inst);
|
|
case Opcode::V_OR_B32:
|
|
return V_OR_B32(false, inst);
|
|
case Opcode::V_XOR_B32:
|
|
return V_OR_B32(true, inst);
|
|
case Opcode::V_BFM_B32:
|
|
return V_BFM_B32(inst);
|
|
case Opcode::V_MAC_F32:
|
|
return V_MAC_F32(inst);
|
|
case Opcode::V_MADMK_F32:
|
|
return V_MADMK_F32(inst);
|
|
case Opcode::V_MADAK_F32:
|
|
return V_FMA_F32(inst);
|
|
case Opcode::V_BCNT_U32_B32:
|
|
return V_BCNT_U32_B32(inst);
|
|
case Opcode::V_MBCNT_LO_U32_B32:
|
|
return V_MBCNT_U32_B32(true, inst);
|
|
case Opcode::V_MBCNT_HI_U32_B32:
|
|
return V_MBCNT_U32_B32(false, inst);
|
|
case Opcode::V_ADD_I32:
|
|
return V_ADD_I32(inst);
|
|
case Opcode::V_SUB_I32:
|
|
return V_SUB_I32(inst);
|
|
case Opcode::V_SUBREV_I32:
|
|
return V_SUBREV_I32(inst);
|
|
case Opcode::V_ADDC_U32:
|
|
return V_ADDC_U32(inst);
|
|
case Opcode::V_SUBB_U32:
|
|
return V_SUBB_U32(inst);
|
|
case Opcode::V_SUBBREV_U32:
|
|
return V_SUBBREV_U32(inst);
|
|
case Opcode::V_LDEXP_F32:
|
|
return V_LDEXP_F32(inst);
|
|
case Opcode::V_CVT_PKNORM_U16_F32:
|
|
return V_CVT_PKNORM_U16_F32(inst);
|
|
case Opcode::V_CVT_PKRTZ_F16_F32:
|
|
return V_CVT_PKRTZ_F16_F32(inst);
|
|
|
|
// VOP1
|
|
case Opcode::V_MOV_B32:
|
|
return V_MOV(inst);
|
|
case Opcode::V_READFIRSTLANE_B32:
|
|
return V_READFIRSTLANE_B32(inst);
|
|
case Opcode::V_CVT_I32_F64:
|
|
return V_CVT_I32_F64(inst);
|
|
case Opcode::V_CVT_F64_I32:
|
|
return V_CVT_F64_I32(inst);
|
|
case Opcode::V_CVT_F32_I32:
|
|
return V_CVT_F32_I32(inst);
|
|
case Opcode::V_CVT_F32_U32:
|
|
return V_CVT_F32_U32(inst);
|
|
case Opcode::V_CVT_U32_F32:
|
|
return V_CVT_U32_F32(inst);
|
|
case Opcode::V_CVT_I32_F32:
|
|
return V_CVT_I32_F32(inst);
|
|
case Opcode::V_CVT_F16_F32:
|
|
return V_CVT_F16_F32(inst);
|
|
case Opcode::V_CVT_F32_F16:
|
|
return V_CVT_F32_F16(inst);
|
|
case Opcode::V_CVT_FLR_I32_F32:
|
|
return V_CVT_FLR_I32_F32(inst);
|
|
case Opcode::V_CVT_F32_F64:
|
|
return V_CVT_F32_F64(inst);
|
|
case Opcode::V_CVT_F64_F32:
|
|
return V_CVT_F64_F32(inst);
|
|
case Opcode::V_CVT_RPI_I32_F32:
|
|
return V_CVT_RPI_I32_F32(inst);
|
|
case Opcode::V_CVT_OFF_F32_I4:
|
|
return V_CVT_OFF_F32_I4(inst);
|
|
case Opcode::V_CVT_F32_UBYTE0:
|
|
return V_CVT_F32_UBYTE(0, inst);
|
|
case Opcode::V_CVT_F32_UBYTE1:
|
|
return V_CVT_F32_UBYTE(1, inst);
|
|
case Opcode::V_CVT_F32_UBYTE2:
|
|
return V_CVT_F32_UBYTE(2, inst);
|
|
case Opcode::V_CVT_F32_UBYTE3:
|
|
return V_CVT_F32_UBYTE(3, inst);
|
|
case Opcode::V_FRACT_F32:
|
|
return V_FRACT_F32(inst);
|
|
case Opcode::V_TRUNC_F32:
|
|
return V_TRUNC_F32(inst);
|
|
case Opcode::V_CEIL_F32:
|
|
return V_CEIL_F32(inst);
|
|
case Opcode::V_RNDNE_F32:
|
|
return V_RNDNE_F32(inst);
|
|
case Opcode::V_FLOOR_F32:
|
|
return V_FLOOR_F32(inst);
|
|
case Opcode::V_EXP_F32:
|
|
return V_EXP_F32(inst);
|
|
case Opcode::V_LOG_F32:
|
|
return V_LOG_F32(inst);
|
|
case Opcode::V_RCP_F32:
|
|
return V_RCP_F32(inst);
|
|
case Opcode::V_RCP_F64:
|
|
return V_RCP_F64(inst);
|
|
case Opcode::V_RCP_IFLAG_F32:
|
|
return V_RCP_F32(inst);
|
|
case Opcode::V_RCP_CLAMP_F32:
|
|
return V_RCP_F32(inst);
|
|
case Opcode::V_RSQ_CLAMP_F32:
|
|
return V_RSQ_F32(inst);
|
|
case Opcode::V_RSQ_LEGACY_F32:
|
|
return V_RSQ_F32(inst);
|
|
case Opcode::V_RSQ_F32:
|
|
return V_RSQ_F32(inst);
|
|
case Opcode::V_SQRT_F32:
|
|
return V_SQRT_F32(inst);
|
|
case Opcode::V_SIN_F32:
|
|
return V_SIN_F32(inst);
|
|
case Opcode::V_COS_F32:
|
|
return V_COS_F32(inst);
|
|
case Opcode::V_NOT_B32:
|
|
return V_NOT_B32(inst);
|
|
case Opcode::V_BFREV_B32:
|
|
return V_BFREV_B32(inst);
|
|
case Opcode::V_FFBH_U32:
|
|
return V_FFBH_U32(inst);
|
|
case Opcode::V_FFBL_B32:
|
|
return V_FFBL_B32(inst);
|
|
case Opcode::V_FREXP_EXP_I32_F64:
|
|
return V_FREXP_EXP_I32_F64(inst);
|
|
case Opcode::V_FREXP_MANT_F64:
|
|
return V_FREXP_MANT_F64(inst);
|
|
case Opcode::V_FRACT_F64:
|
|
return V_FRACT_F64(inst);
|
|
case Opcode::V_FREXP_EXP_I32_F32:
|
|
return V_FREXP_EXP_I32_F32(inst);
|
|
case Opcode::V_FREXP_MANT_F32:
|
|
return V_FREXP_MANT_F32(inst);
|
|
case Opcode::V_MOVRELD_B32:
|
|
return V_MOVRELD_B32(inst);
|
|
case Opcode::V_MOVRELS_B32:
|
|
return V_MOVRELS_B32(inst);
|
|
case Opcode::V_MOVRELSD_B32:
|
|
return V_MOVRELSD_B32(inst);
|
|
|
|
// VOPC
|
|
// V_CMP_{OP16}_F32
|
|
case Opcode::V_CMP_F_F32:
|
|
return V_CMP_F32(ConditionOp::F, false, inst);
|
|
case Opcode::V_CMP_LT_F32:
|
|
return V_CMP_F32(ConditionOp::LT, false, inst);
|
|
case Opcode::V_CMP_EQ_F32:
|
|
return V_CMP_F32(ConditionOp::EQ, false, inst);
|
|
case Opcode::V_CMP_LE_F32:
|
|
return V_CMP_F32(ConditionOp::LE, false, inst);
|
|
case Opcode::V_CMP_GT_F32:
|
|
return V_CMP_F32(ConditionOp::GT, false, inst);
|
|
case Opcode::V_CMP_LG_F32:
|
|
return V_CMP_F32(ConditionOp::LG, false, inst);
|
|
case Opcode::V_CMP_GE_F32:
|
|
return V_CMP_F32(ConditionOp::GE, false, inst);
|
|
case Opcode::V_CMP_U_F32:
|
|
return V_CMP_F32(ConditionOp::U, false, inst);
|
|
case Opcode::V_CMP_NGE_F32:
|
|
return V_CMP_F32(ConditionOp::LT, false, inst);
|
|
case Opcode::V_CMP_NGT_F32:
|
|
return V_CMP_F32(ConditionOp::LE, false, inst);
|
|
case Opcode::V_CMP_NLE_F32:
|
|
return V_CMP_F32(ConditionOp::GT, false, inst);
|
|
case Opcode::V_CMP_NEQ_F32:
|
|
return V_CMP_F32(ConditionOp::LG, false, inst);
|
|
case Opcode::V_CMP_NLT_F32:
|
|
return V_CMP_F32(ConditionOp::GE, false, inst);
|
|
|
|
// V_CMPX_{OP16}_F32
|
|
case Opcode::V_CMPX_F_F32:
|
|
return V_CMP_F32(ConditionOp::F, true, inst);
|
|
case Opcode::V_CMPX_LT_F32:
|
|
return V_CMP_F32(ConditionOp::LT, true, inst);
|
|
case Opcode::V_CMPX_EQ_F32:
|
|
return V_CMP_F32(ConditionOp::EQ, true, inst);
|
|
case Opcode::V_CMPX_LE_F32:
|
|
return V_CMP_F32(ConditionOp::LE, true, inst);
|
|
case Opcode::V_CMPX_GT_F32:
|
|
return V_CMP_F32(ConditionOp::GT, true, inst);
|
|
case Opcode::V_CMPX_LG_F32:
|
|
return V_CMP_F32(ConditionOp::LG, true, inst);
|
|
case Opcode::V_CMPX_GE_F32:
|
|
return V_CMP_F32(ConditionOp::GE, true, inst);
|
|
case Opcode::V_CMPX_NGE_F32:
|
|
return V_CMP_F32(ConditionOp::LT, true, inst);
|
|
case Opcode::V_CMPX_NLG_F32:
|
|
return V_CMP_F32(ConditionOp::EQ, true, inst);
|
|
case Opcode::V_CMPX_NGT_F32:
|
|
return V_CMP_F32(ConditionOp::LE, true, inst);
|
|
case Opcode::V_CMPX_NLE_F32:
|
|
return V_CMP_F32(ConditionOp::GT, true, inst);
|
|
case Opcode::V_CMPX_NEQ_F32:
|
|
return V_CMP_F32(ConditionOp::LG, true, inst);
|
|
case Opcode::V_CMPX_NLT_F32:
|
|
return V_CMP_F32(ConditionOp::GE, true, inst);
|
|
case Opcode::V_CMPX_TRU_F32:
|
|
return V_CMP_F32(ConditionOp::TRU, true, inst);
|
|
|
|
// V_CMP_{OP8}_I32
|
|
case Opcode::V_CMP_LT_I32:
|
|
return V_CMP_U32(ConditionOp::LT, true, false, inst);
|
|
case Opcode::V_CMP_EQ_I32:
|
|
return V_CMP_U32(ConditionOp::EQ, true, false, inst);
|
|
case Opcode::V_CMP_LE_I32:
|
|
return V_CMP_U32(ConditionOp::LE, true, false, inst);
|
|
case Opcode::V_CMP_GT_I32:
|
|
return V_CMP_U32(ConditionOp::GT, true, false, inst);
|
|
case Opcode::V_CMP_NE_I32:
|
|
return V_CMP_U32(ConditionOp::LG, true, false, inst);
|
|
case Opcode::V_CMP_GE_I32:
|
|
return V_CMP_U32(ConditionOp::GE, true, false, inst);
|
|
case Opcode::V_CMPX_LE_I32:
|
|
return V_CMP_U32(ConditionOp::LE, true, true, inst);
|
|
|
|
// V_CMPX_{OP8}_I32
|
|
case Opcode::V_CMPX_LT_I32:
|
|
return V_CMP_U32(ConditionOp::LT, true, true, inst);
|
|
case Opcode::V_CMPX_EQ_I32:
|
|
return V_CMP_U32(ConditionOp::EQ, true, true, inst);
|
|
case Opcode::V_CMPX_GT_I32:
|
|
return V_CMP_U32(ConditionOp::GT, true, true, inst);
|
|
case Opcode::V_CMPX_LG_I32:
|
|
return V_CMP_U32(ConditionOp::LG, true, true, inst);
|
|
case Opcode::V_CMPX_GE_I32:
|
|
return V_CMP_U32(ConditionOp::GE, true, true, inst);
|
|
|
|
// V_CMP_{OP8}_U32
|
|
case Opcode::V_CMP_F_U32:
|
|
return V_CMP_U32(ConditionOp::F, false, false, inst);
|
|
case Opcode::V_CMP_LT_U32:
|
|
return V_CMP_U32(ConditionOp::LT, false, false, inst);
|
|
case Opcode::V_CMP_EQ_U32:
|
|
return V_CMP_U32(ConditionOp::EQ, false, false, inst);
|
|
case Opcode::V_CMP_LE_U32:
|
|
return V_CMP_U32(ConditionOp::LE, false, false, inst);
|
|
case Opcode::V_CMP_GT_U32:
|
|
return V_CMP_U32(ConditionOp::GT, false, false, inst);
|
|
case Opcode::V_CMP_NE_U32:
|
|
return V_CMP_U32(ConditionOp::LG, false, false, inst);
|
|
case Opcode::V_CMP_GE_U32:
|
|
return V_CMP_U32(ConditionOp::GE, false, false, inst);
|
|
case Opcode::V_CMP_TRU_U32:
|
|
return V_CMP_U32(ConditionOp::TRU, false, false, inst);
|
|
|
|
// V_CMPX_{OP8}_U32
|
|
case Opcode::V_CMPX_F_U32:
|
|
return V_CMP_U32(ConditionOp::F, false, true, inst);
|
|
case Opcode::V_CMPX_LT_U32:
|
|
return V_CMP_U32(ConditionOp::LT, false, true, inst);
|
|
case Opcode::V_CMPX_EQ_U32:
|
|
return V_CMP_U32(ConditionOp::EQ, false, true, inst);
|
|
case Opcode::V_CMPX_LE_U32:
|
|
return V_CMP_U32(ConditionOp::LE, false, true, inst);
|
|
case Opcode::V_CMPX_GT_U32:
|
|
return V_CMP_U32(ConditionOp::GT, false, true, inst);
|
|
case Opcode::V_CMPX_NE_U32:
|
|
return V_CMP_U32(ConditionOp::LG, false, true, inst);
|
|
case Opcode::V_CMPX_GE_U32:
|
|
return V_CMP_U32(ConditionOp::GE, false, true, inst);
|
|
case Opcode::V_CMPX_TRU_U32:
|
|
return V_CMP_U32(ConditionOp::TRU, false, true, inst);
|
|
|
|
// V_CMP_{OP8}_U64
|
|
case Opcode::V_CMP_NE_U64:
|
|
return V_CMP_NE_U64(inst);
|
|
|
|
case Opcode::V_CMP_CLASS_F32:
|
|
return V_CMP_CLASS_F32(inst);
|
|
|
|
// VOP3a
|
|
case Opcode::V_MAD_LEGACY_F32:
|
|
return V_MAD_F32(inst);
|
|
case Opcode::V_MAD_F32:
|
|
return V_MAD_F32(inst);
|
|
case Opcode::V_MAD_I32_I24:
|
|
return V_MAD_I32_I24(inst);
|
|
case Opcode::V_MAD_U32_U24:
|
|
return V_MAD_U32_U24(inst);
|
|
case Opcode::V_CUBEID_F32:
|
|
return V_CUBEID_F32(inst);
|
|
case Opcode::V_CUBESC_F32:
|
|
return V_CUBESC_F32(inst);
|
|
case Opcode::V_CUBETC_F32:
|
|
return V_CUBETC_F32(inst);
|
|
case Opcode::V_CUBEMA_F32:
|
|
return V_CUBEMA_F32(inst);
|
|
case Opcode::V_BFE_U32:
|
|
return V_BFE_U32(false, inst);
|
|
case Opcode::V_BFE_I32:
|
|
return V_BFE_U32(true, inst);
|
|
case Opcode::V_BFI_B32:
|
|
return V_BFI_B32(inst);
|
|
case Opcode::V_FMA_F32:
|
|
return V_FMA_F32(inst);
|
|
case Opcode::V_FMA_F64:
|
|
return V_FMA_F64(inst);
|
|
case Opcode::V_MIN3_F32:
|
|
return V_MIN3_F32(inst);
|
|
case Opcode::V_MIN3_I32:
|
|
return V_MIN3_I32(inst);
|
|
case Opcode::V_MIN3_U32:
|
|
return V_MIN3_U32(inst);
|
|
case Opcode::V_MAX3_F32:
|
|
return V_MAX3_F32(inst);
|
|
case Opcode::V_MAX3_I32:
|
|
return V_MAX3_U32(true, inst);
|
|
case Opcode::V_MAX3_U32:
|
|
return V_MAX3_U32(false, inst);
|
|
case Opcode::V_MED3_F32:
|
|
return V_MED3_F32(inst);
|
|
case Opcode::V_MED3_I32:
|
|
return V_MED3_I32(inst);
|
|
case Opcode::V_MED3_U32:
|
|
return V_MED3_U32(inst);
|
|
case Opcode::V_SAD_U32:
|
|
return V_SAD_U32(inst);
|
|
case Opcode::V_CVT_PK_U16_U32:
|
|
return V_CVT_PK_U16_U32(inst);
|
|
case Opcode::V_CVT_PK_U8_F32:
|
|
return V_CVT_PK_U8_F32(inst);
|
|
case Opcode::V_LSHL_B64:
|
|
return V_LSHL_B64(inst);
|
|
case Opcode::V_MUL_F64:
|
|
return V_MUL_F64(inst);
|
|
case Opcode::V_MAX_F64:
|
|
return V_MAX_F64(inst);
|
|
case Opcode::V_MUL_LO_U32:
|
|
return V_MUL_LO_U32(inst);
|
|
case Opcode::V_MUL_HI_U32:
|
|
return V_MUL_HI_U32(false, inst);
|
|
case Opcode::V_MUL_LO_I32:
|
|
return V_MUL_LO_U32(inst);
|
|
case Opcode::V_MAD_U64_U32:
|
|
return V_MAD_U64_U32(inst);
|
|
case Opcode::V_NOP:
|
|
return;
|
|
default:
|
|
LogMissingOpcode(inst);
|
|
}
|
|
}
|
|
|
|
// VOP2
|
|
|
|
void Translator::V_CNDMASK_B32(const GcnInst& inst) {
|
|
const IR::ScalarReg flag_reg{inst.src[2].code};
|
|
const IR::U1 flag = inst.src[2].field == OperandField::ScalarGPR
|
|
? ir.GetThreadBitScalarReg(flag_reg)
|
|
: ir.GetVcc();
|
|
const IR::Value result =
|
|
ir.Select(flag, GetSrc<IR::F32>(inst.src[1]), GetSrc<IR::F32>(inst.src[0]));
|
|
SetDst(inst.dst[0], IR::U32F32{result});
|
|
}
|
|
|
|
void Translator::V_ADD_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
|
SetDst(inst.dst[0], ir.FPAdd(src0, src1));
|
|
}
|
|
|
|
void Translator::V_SUB_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
|
SetDst(inst.dst[0], ir.FPSub(src0, src1));
|
|
}
|
|
|
|
void Translator::V_SUBREV_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
|
SetDst(inst.dst[0], ir.FPSub(src1, src0));
|
|
}
|
|
|
|
void Translator::V_MUL_F32(const GcnInst& inst) {
|
|
SetDst(inst.dst[0], ir.FPMul(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1])));
|
|
}
|
|
|
|
void Translator::V_MUL_I32_I24(const GcnInst& inst) {
|
|
const IR::U32 src0{ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0), ir.Imm32(24), true)};
|
|
const IR::U32 src1{ir.BitFieldExtract(GetSrc(inst.src[1]), ir.Imm32(0), ir.Imm32(24), true)};
|
|
SetDst(inst.dst[0], ir.IMul(src0, src1));
|
|
}
|
|
|
|
void Translator::V_MIN_F32(const GcnInst& inst, bool is_legacy) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
|
SetDst(inst.dst[0], ir.FPMin(src0, src1, is_legacy));
|
|
}
|
|
|
|
void Translator::V_MAX_F32(const GcnInst& inst, bool is_legacy) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
|
SetDst(inst.dst[0], ir.FPMax(src0, src1, is_legacy));
|
|
}
|
|
|
|
void Translator::V_MIN_I32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
SetDst(inst.dst[0], ir.SMin(src0, src1));
|
|
}
|
|
|
|
void Translator::V_MIN_U32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
SetDst(inst.dst[0], ir.IMin(src0, src1, false));
|
|
}
|
|
|
|
void Translator::V_MAX_U32(bool is_signed, const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
SetDst(inst.dst[0], ir.IMax(src0, src1, is_signed));
|
|
}
|
|
|
|
void Translator::V_LSHR_B32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
SetDst(inst.dst[0], ir.ShiftRightLogical(src0, ir.BitwiseAnd(src1, ir.Imm32(0x1F))));
|
|
}
|
|
|
|
void Translator::V_LSHRREV_B32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
SetDst(inst.dst[0], ir.ShiftRightLogical(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F))));
|
|
}
|
|
|
|
void Translator::V_ASHR_I32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
SetDst(inst.dst[0], ir.ShiftRightArithmetic(src0, ir.BitwiseAnd(src1, ir.Imm32(0x1F))));
|
|
}
|
|
|
|
void Translator::V_ASHRREV_I32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
SetDst(inst.dst[0], ir.ShiftRightArithmetic(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F))));
|
|
}
|
|
|
|
void Translator::V_LSHL_B32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
SetDst(inst.dst[0], ir.ShiftLeftLogical(src0, ir.BitwiseAnd(src1, ir.Imm32(0x1F))));
|
|
}
|
|
|
|
void Translator::V_LSHLREV_B32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
SetDst(inst.dst[0], ir.ShiftLeftLogical(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F))));
|
|
}
|
|
|
|
void Translator::V_AND_B32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
|
|
SetDst(inst.dst[0], ir.BitwiseAnd(src0, src1));
|
|
}
|
|
|
|
void Translator::V_OR_B32(bool is_xor, const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
|
|
SetDst(inst.dst[0], is_xor ? ir.BitwiseXor(src0, src1) : IR::U32(ir.BitwiseOr(src0, src1)));
|
|
}
|
|
|
|
void Translator::V_BFM_B32(const GcnInst& inst) {
|
|
// bitmask width
|
|
const IR::U32 src0{ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0), ir.Imm32(4))};
|
|
// bitmask offset
|
|
const IR::U32 src1{ir.BitFieldExtract(GetSrc(inst.src[1]), ir.Imm32(0), ir.Imm32(4))};
|
|
const IR::U32 ones = ir.ISub(ir.ShiftLeftLogical(ir.Imm32(1), src0), ir.Imm32(1));
|
|
SetDst(inst.dst[0], ir.ShiftLeftLogical(ones, src1));
|
|
}
|
|
|
|
void Translator::V_MAC_F32(const GcnInst& inst) {
|
|
SetDst(inst.dst[0], ir.FPFma(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1]),
|
|
GetSrc<IR::F32>(inst.dst[0])));
|
|
}
|
|
|
|
void Translator::V_MADMK_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
|
const IR::F32 k{GetSrc<IR::F32>(inst.src[2])};
|
|
SetDst(inst.dst[0], ir.FPFma(src0, k, src1));
|
|
}
|
|
|
|
void Translator::V_BCNT_U32_B32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
SetDst(inst.dst[0], ir.IAdd(ir.BitCount(src0), src1));
|
|
}
|
|
|
|
void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
|
|
if (!is_low) {
|
|
// v_mbcnt_hi_u32_b32 v2, -1, 0
|
|
if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193 &&
|
|
inst.src[1].field == OperandField::ConstZero) {
|
|
return;
|
|
}
|
|
// v_mbcnt_hi_u32_b32 vX, exec_hi, 0
|
|
if (inst.src[0].field == OperandField::ExecHi &&
|
|
inst.src[1].field == OperandField::ConstZero) {
|
|
return;
|
|
}
|
|
} else {
|
|
// v_mbcnt_lo_u32_b32 v2, -1, vX
|
|
// used combined with above to fetch lane id in non-compute stages
|
|
if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193) {
|
|
SetDst(inst.dst[0], ir.LaneId());
|
|
}
|
|
// v_mbcnt_lo_u32_b32 v20, exec_lo, vX
|
|
// used combined in above for append buffer indexing.
|
|
if (inst.src[0].field == OperandField::ExecLo) {
|
|
SetDst(inst.dst[0], ir.Imm32(0));
|
|
}
|
|
}
|
|
}
|
|
|
|
void Translator::V_ADD_I32(const GcnInst& inst) {
|
|
// Signed or unsigned components
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
|
|
const IR::U32 result{ir.IAdd(src0, src1)};
|
|
SetDst(inst.dst[0], result);
|
|
|
|
// TODO: Carry-out with signed or unsigned components
|
|
}
|
|
|
|
void Translator::V_SUB_I32(const GcnInst& inst) {
|
|
// Unsigned components
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
const IR::U32 result{ir.ISub(src0, src1)};
|
|
SetDst(inst.dst[0], result);
|
|
|
|
const IR::U1 did_underflow{ir.IGreaterThan(src1, src0, false)};
|
|
SetCarryOut(inst, did_underflow);
|
|
}
|
|
|
|
void Translator::V_SUBREV_I32(const GcnInst& inst) {
|
|
// Unsigned components
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
const IR::U32 result{ir.ISub(src1, src0)};
|
|
SetDst(inst.dst[0], result);
|
|
|
|
const IR::U1 did_underflow{ir.IGreaterThan(src0, src1, false)};
|
|
SetCarryOut(inst, did_underflow);
|
|
}
|
|
|
|
void Translator::V_ADDC_U32(const GcnInst& inst) {
|
|
// Unsigned components
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
const IR::U32 carry{GetCarryIn(inst)};
|
|
const IR::U32 result{ir.IAdd(ir.IAdd(src0, src1), carry)};
|
|
SetDst(inst.dst[0], result);
|
|
|
|
const IR::U1 less_src0{ir.ILessThan(result, src0, false)};
|
|
const IR::U1 less_src1{ir.ILessThan(result, src1, false)};
|
|
const IR::U1 did_overflow{ir.LogicalOr(less_src0, less_src1)};
|
|
SetCarryOut(inst, did_overflow);
|
|
}
|
|
|
|
void Translator::V_SUBB_U32(const GcnInst& inst) {
|
|
// Signed or unsigned components
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
const IR::U32 carry{GetCarryIn(inst)};
|
|
const IR::U32 result{ir.ISub(ir.ISub(src0, src1), carry)};
|
|
SetDst(inst.dst[0], result);
|
|
|
|
// TODO: Carry-out with signed or unsigned components
|
|
}
|
|
|
|
void Translator::V_SUBBREV_U32(const GcnInst& inst) {
|
|
// Signed or unsigned components
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
const IR::U32 carry{GetCarryIn(inst)};
|
|
const IR::U32 result{ir.ISub(ir.ISub(src1, src0), carry)};
|
|
SetDst(inst.dst[0], result);
|
|
|
|
// TODO: Carry-out with signed or unsigned components
|
|
}
|
|
|
|
void Translator::V_LDEXP_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
SetDst(inst.dst[0], ir.FPLdexp(src0, src1));
|
|
}
|
|
|
|
void Translator::V_CVT_PKNORM_U16_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
|
const IR::U32 dst0 = ir.ConvertFToU(32, ir.FPMul(src0, ir.Imm32(65535.f)));
|
|
const IR::U32 dst1 = ir.ConvertFToU(32, ir.FPMul(src1, ir.Imm32(65535.f)));
|
|
const IR::VectorReg dst_reg{inst.dst[0].code};
|
|
ir.SetVectorReg(dst_reg, ir.BitFieldInsert(dst0, dst1, ir.Imm32(16), ir.Imm32(16)));
|
|
}
|
|
|
|
void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
|
|
const IR::Value vec_f32 =
|
|
ir.CompositeConstruct(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1]));
|
|
SetDst(inst.dst[0], ir.PackHalf2x16(vec_f32));
|
|
}
|
|
|
|
// VOP1
|
|
|
|
void Translator::V_MOV(const GcnInst& inst) {
|
|
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[0]));
|
|
}
|
|
|
|
void Translator::V_CVT_I32_F64(const GcnInst& inst) {
|
|
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.ConvertFToS(32, src0));
|
|
}
|
|
|
|
void Translator::V_CVT_F64_I32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
SetDst64(inst.dst[0], ir.ConvertSToF(64, 32, src0));
|
|
}
|
|
|
|
void Translator::V_CVT_F32_I32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.ConvertSToF(32, 32, src0));
|
|
}
|
|
|
|
void Translator::V_CVT_F32_U32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.ConvertUToF(32, 32, src0));
|
|
}
|
|
|
|
void Translator::V_CVT_U32_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.ConvertFToU(32, src0));
|
|
}
|
|
|
|
void Translator::V_CVT_I32_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.ConvertFToS(32, src0));
|
|
}
|
|
|
|
void Translator::V_CVT_F16_F32(const GcnInst& inst) {
|
|
const IR::F32 src0 = GetSrc<IR::F32>(inst.src[0]);
|
|
const IR::F16 src0fp16 = ir.FPConvert(16, src0);
|
|
SetDst(inst.dst[0], ir.UConvert(32, ir.BitCast<IR::U16>(src0fp16)));
|
|
}
|
|
|
|
void Translator::V_CVT_F32_F16(const GcnInst& inst) {
|
|
const IR::U32 src0 = GetSrc(inst.src[0]);
|
|
const IR::U16 src0l = ir.UConvert(16, src0);
|
|
SetDst(inst.dst[0], ir.FPConvert(32, ir.BitCast<IR::F16>(src0l)));
|
|
}
|
|
|
|
void Translator::V_CVT_RPI_I32_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.ConvertFToI(32, true, ir.FPFloor(ir.FPAdd(src0, ir.Imm32(0.5f)))));
|
|
}
|
|
|
|
void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.ConvertFToI(32, true, ir.FPFloor(src0)));
|
|
}
|
|
|
|
void Translator::V_CVT_OFF_F32_I4(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
ASSERT(src0.IsImmediate());
|
|
static constexpr std::array IntToFloat = {
|
|
0.0f, 0.0625f, 0.1250f, 0.1875f, 0.2500f, 0.3125f, 0.3750f, 0.4375f,
|
|
-0.5000f, -0.4375f, -0.3750f, -0.3125f, -0.2500f, -0.1875f, -0.1250f, -0.0625f};
|
|
SetDst(inst.dst[0], ir.Imm32(IntToFloat[src0.U32() & 0xF]));
|
|
}
|
|
|
|
void Translator::V_CVT_F32_F64(const GcnInst& inst) {
|
|
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.FPConvert(32, src0));
|
|
}
|
|
|
|
void Translator::V_CVT_F64_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
SetDst64(inst.dst[0], ir.FPConvert(64, src0));
|
|
}
|
|
|
|
void Translator::V_CVT_F32_UBYTE(u32 index, const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 byte = ir.BitFieldExtract(src0, ir.Imm32(8 * index), ir.Imm32(8));
|
|
SetDst(inst.dst[0], ir.ConvertUToF(32, 32, byte));
|
|
}
|
|
|
|
void Translator::V_FRACT_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.FPFract(src0));
|
|
}
|
|
|
|
void Translator::V_TRUNC_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.FPTrunc(src0));
|
|
}
|
|
|
|
void Translator::V_CEIL_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.FPCeil(src0));
|
|
}
|
|
|
|
void Translator::V_RNDNE_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.FPRoundEven(src0));
|
|
}
|
|
|
|
void Translator::V_FLOOR_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.FPFloor(src0));
|
|
}
|
|
|
|
void Translator::V_EXP_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.FPExp2(src0));
|
|
}
|
|
|
|
void Translator::V_LOG_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.FPLog2(src0));
|
|
}
|
|
|
|
void Translator::V_RCP_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.FPRecip(src0));
|
|
}
|
|
|
|
void Translator::V_RCP_F64(const GcnInst& inst) {
|
|
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
|
|
SetDst64(inst.dst[0], ir.FPRecip(src0));
|
|
}
|
|
|
|
void Translator::V_RSQ_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.FPRecipSqrt(src0));
|
|
}
|
|
|
|
void Translator::V_SQRT_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.FPSqrt(src0));
|
|
}
|
|
|
|
void Translator::V_SIN_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.FPSin(src0));
|
|
}
|
|
|
|
void Translator::V_COS_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.FPCos(src0));
|
|
}
|
|
|
|
void Translator::V_NOT_B32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.BitwiseNot(src0));
|
|
}
|
|
|
|
void Translator::V_BFREV_B32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.BitReverse(src0));
|
|
}
|
|
|
|
void Translator::V_FFBH_U32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
// Gcn wants the MSB position counting from the left, but SPIR-V counts from the rightmost (LSB)
|
|
// position
|
|
const IR::U32 msb_pos = ir.FindUMsb(src0);
|
|
const IR::U32 pos_from_left = ir.ISub(ir.Imm32(31), msb_pos);
|
|
// Select 0xFFFFFFFF if src0 was 0
|
|
const IR::U1 cond = ir.INotEqual(src0, ir.Imm32(0));
|
|
SetDst(inst.dst[0], IR::U32{ir.Select(cond, pos_from_left, ir.Imm32(~0U))});
|
|
}
|
|
|
|
void Translator::V_FFBL_B32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.FindILsb(src0));
|
|
}
|
|
|
|
void Translator::V_FREXP_EXP_I32_F64(const GcnInst& inst) {
|
|
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.FPFrexpExp(src0));
|
|
}
|
|
|
|
void Translator::V_FREXP_MANT_F64(const GcnInst& inst) {
|
|
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
|
|
SetDst64(inst.dst[0], ir.FPFrexpSig(src0));
|
|
}
|
|
|
|
void Translator::V_FRACT_F64(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc64<IR::F64>(inst.src[0])};
|
|
SetDst64(inst.dst[0], ir.FPFract(src0));
|
|
}
|
|
|
|
void Translator::V_FREXP_EXP_I32_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.FPFrexpExp(src0));
|
|
}
|
|
|
|
void Translator::V_FREXP_MANT_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
SetDst(inst.dst[0], ir.FPFrexpSig(src0));
|
|
}
|
|
|
|
void Translator::V_MOVRELD_B32(const GcnInst& inst) {
|
|
const IR::U32 src_val{GetSrc(inst.src[0])};
|
|
u32 dst_vgprno = inst.dst[0].code - static_cast<u32>(IR::VectorReg::V0);
|
|
IR::U32 m0 = ir.GetM0();
|
|
|
|
VMovRelDHelper(dst_vgprno, src_val, m0);
|
|
}
|
|
|
|
void Translator::V_MOVRELS_B32(const GcnInst& inst) {
|
|
u32 src_vgprno = inst.src[0].code - static_cast<u32>(IR::VectorReg::V0);
|
|
const IR::U32 m0 = ir.GetM0();
|
|
|
|
const IR::U32 src_val = VMovRelSHelper(src_vgprno, m0);
|
|
SetDst(inst.dst[0], src_val);
|
|
}
|
|
|
|
void Translator::V_MOVRELSD_B32(const GcnInst& inst) {
|
|
u32 src_vgprno = inst.src[0].code - static_cast<u32>(IR::VectorReg::V0);
|
|
u32 dst_vgprno = inst.dst[0].code - static_cast<u32>(IR::VectorReg::V0);
|
|
IR::U32 m0 = ir.GetM0();
|
|
|
|
const IR::U32 src_val = VMovRelSHelper(src_vgprno, m0);
|
|
VMovRelDHelper(dst_vgprno, src_val, m0);
|
|
}
|
|
|
|
// VOPC
|
|
|
|
void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
|
const IR::U1 result = [&] {
|
|
switch (op) {
|
|
case ConditionOp::F:
|
|
return ir.Imm1(false);
|
|
case ConditionOp::EQ:
|
|
return ir.FPEqual(src0, src1);
|
|
case ConditionOp::LG:
|
|
return ir.FPNotEqual(src0, src1);
|
|
case ConditionOp::GT:
|
|
return ir.FPGreaterThan(src0, src1);
|
|
case ConditionOp::LT:
|
|
return ir.FPLessThan(src0, src1);
|
|
case ConditionOp::LE:
|
|
return ir.FPLessThanEqual(src0, src1);
|
|
case ConditionOp::GE:
|
|
return ir.FPGreaterThanEqual(src0, src1);
|
|
case ConditionOp::U:
|
|
return ir.LogicalNot(ir.LogicalAnd(ir.FPIsNan(src0), ir.FPIsNan(src1)));
|
|
default:
|
|
UNREACHABLE();
|
|
}
|
|
}();
|
|
if (set_exec) {
|
|
ir.SetExec(result);
|
|
}
|
|
|
|
switch (inst.dst[1].field) {
|
|
case OperandField::VccLo:
|
|
ir.SetVcc(result);
|
|
break;
|
|
case OperandField::ScalarGPR:
|
|
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), result);
|
|
break;
|
|
default:
|
|
UNREACHABLE();
|
|
}
|
|
}
|
|
|
|
void Translator::V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
const IR::U1 result = [&] {
|
|
switch (op) {
|
|
case ConditionOp::F:
|
|
return ir.Imm1(false);
|
|
case ConditionOp::TRU:
|
|
return ir.Imm1(true);
|
|
case ConditionOp::EQ:
|
|
return ir.IEqual(src0, src1);
|
|
case ConditionOp::LG:
|
|
return ir.INotEqual(src0, src1);
|
|
case ConditionOp::GT:
|
|
return ir.IGreaterThan(src0, src1, is_signed);
|
|
case ConditionOp::LT:
|
|
return ir.ILessThan(src0, src1, is_signed);
|
|
case ConditionOp::LE:
|
|
return ir.ILessThanEqual(src0, src1, is_signed);
|
|
case ConditionOp::GE:
|
|
return ir.IGreaterThanEqual(src0, src1, is_signed);
|
|
default:
|
|
UNREACHABLE();
|
|
}
|
|
}();
|
|
if (set_exec) {
|
|
ir.SetExec(result);
|
|
}
|
|
switch (inst.dst[1].field) {
|
|
case OperandField::VccLo:
|
|
return ir.SetVcc(result);
|
|
case OperandField::ScalarGPR:
|
|
return ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result);
|
|
default:
|
|
UNREACHABLE();
|
|
}
|
|
}
|
|
|
|
void Translator::V_CMP_NE_U64(const GcnInst& inst) {
|
|
const auto get_src = [&](const InstOperand& operand) {
|
|
switch (operand.field) {
|
|
case OperandField::VccLo:
|
|
return ir.GetVcc();
|
|
case OperandField::ExecLo:
|
|
return ir.GetExec();
|
|
case OperandField::ScalarGPR:
|
|
return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
|
|
case OperandField::ConstZero:
|
|
return ir.Imm1(false);
|
|
default:
|
|
UNREACHABLE();
|
|
}
|
|
};
|
|
const IR::U1 src0{get_src(inst.src[0])};
|
|
ASSERT(inst.src[1].field == OperandField::ConstZero); // src0 != 0
|
|
switch (inst.dst[1].field) {
|
|
case OperandField::VccLo:
|
|
ir.SetVcc(src0);
|
|
break;
|
|
case OperandField::ScalarGPR:
|
|
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), src0);
|
|
break;
|
|
default:
|
|
UNREACHABLE();
|
|
}
|
|
}
|
|
|
|
void Translator::V_CMP_CLASS_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
IR::U1 value;
|
|
if (src1.IsImmediate()) {
|
|
const auto class_mask = static_cast<IR::FloatClassFunc>(src1.U32());
|
|
if ((class_mask & IR::FloatClassFunc::NaN) == IR::FloatClassFunc::NaN) {
|
|
value = ir.FPIsNan(src0);
|
|
} else if ((class_mask & IR::FloatClassFunc::Infinity) == IR::FloatClassFunc::Infinity) {
|
|
value = ir.FPIsInf(src0);
|
|
} else {
|
|
UNREACHABLE();
|
|
}
|
|
} else {
|
|
// We don't know the type yet, delay its resolution.
|
|
value = ir.FPCmpClass32(src0, src1);
|
|
}
|
|
|
|
switch (inst.dst[1].field) {
|
|
case OperandField::VccLo:
|
|
return ir.SetVcc(value);
|
|
case OperandField::ScalarGPR:
|
|
return ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), value);
|
|
default:
|
|
UNREACHABLE();
|
|
}
|
|
}
|
|
|
|
// VOP3a
|
|
|
|
void Translator::V_MAD_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
|
const IR::F32 src2{GetSrc<IR::F32>(inst.src[2])};
|
|
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
|
|
}
|
|
|
|
void Translator::V_MAD_I32_I24(const GcnInst& inst, bool is_signed) {
|
|
const IR::U32 src0{
|
|
ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0), ir.Imm32(24), is_signed)};
|
|
const IR::U32 src1{
|
|
ir.BitFieldExtract(GetSrc(inst.src[1]), ir.Imm32(0), ir.Imm32(24), is_signed)};
|
|
const IR::U32 src2{GetSrc(inst.src[2])};
|
|
SetDst(inst.dst[0], ir.IAdd(ir.IMul(src0, src1), src2));
|
|
}
|
|
|
|
void Translator::V_MAD_U32_U24(const GcnInst& inst) {
|
|
V_MAD_I32_I24(inst, false);
|
|
}
|
|
|
|
void Translator::V_CUBEID_F32(const GcnInst& inst) {
|
|
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[2]));
|
|
}
|
|
|
|
void Translator::V_CUBESC_F32(const GcnInst& inst) {
|
|
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[0]));
|
|
}
|
|
|
|
void Translator::V_CUBETC_F32(const GcnInst& inst) {
|
|
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[1]));
|
|
}
|
|
|
|
void Translator::V_CUBEMA_F32(const GcnInst& inst) {
|
|
SetDst(inst.dst[0], ir.Imm32(1.f));
|
|
}
|
|
|
|
void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
IR::U32 src1{GetSrc(inst.src[1])};
|
|
IR::U32 src2{GetSrc(inst.src[2])};
|
|
if (!src1.IsImmediate()) {
|
|
src1 = ir.BitwiseAnd(src1, ir.Imm32(0x1F));
|
|
}
|
|
if (!src2.IsImmediate()) {
|
|
src2 = ir.BitwiseAnd(src2, ir.Imm32(0x1F));
|
|
}
|
|
SetDst(inst.dst[0], ir.BitFieldExtract(src0, src1, src2, is_signed));
|
|
}
|
|
|
|
void Translator::V_BFI_B32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
const IR::U32 src2{GetSrc(inst.src[2])};
|
|
SetDst(inst.dst[0],
|
|
ir.BitwiseOr(ir.BitwiseAnd(src0, src1), ir.BitwiseAnd(ir.BitwiseNot(src0), src2)));
|
|
}
|
|
|
|
void Translator::V_FMA_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
|
const IR::F32 src2{GetSrc<IR::F32>(inst.src[2])};
|
|
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
|
|
}
|
|
|
|
void Translator::V_FMA_F64(const GcnInst& inst) {
|
|
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
|
|
const IR::F64 src1{GetSrc64<IR::F64>(inst.src[1])};
|
|
const IR::F64 src2{GetSrc64<IR::F64>(inst.src[2])};
|
|
SetDst64(inst.dst[0], ir.FPFma(src0, src1, src2));
|
|
}
|
|
|
|
void Translator::V_MIN3_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
|
const IR::F32 src2{GetSrc<IR::F32>(inst.src[2])};
|
|
SetDst(inst.dst[0], ir.FPMin(src0, ir.FPMin(src1, src2)));
|
|
}
|
|
|
|
void Translator::V_MIN3_I32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
const IR::U32 src2{GetSrc(inst.src[2])};
|
|
SetDst(inst.dst[0], ir.SMin(src0, ir.SMin(src1, src2)));
|
|
}
|
|
|
|
void Translator::V_MIN3_U32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
const IR::U32 src2{GetSrc(inst.src[2])};
|
|
SetDst(inst.dst[0], ir.UMin(src0, ir.UMin(src1, src2)));
|
|
}
|
|
|
|
void Translator::V_MAX3_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
|
const IR::F32 src2{GetSrc<IR::F32>(inst.src[2])};
|
|
SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2)));
|
|
}
|
|
|
|
void Translator::V_MAX3_U32(bool is_signed, const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
const IR::U32 src2{GetSrc(inst.src[2])};
|
|
SetDst(inst.dst[0], ir.IMax(src0, ir.IMax(src1, src2, is_signed), is_signed));
|
|
}
|
|
|
|
void Translator::V_MED3_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
|
const IR::F32 src2{GetSrc<IR::F32>(inst.src[2])};
|
|
const IR::F32 mmx = ir.FPMin(ir.FPMax(src0, src1), src2);
|
|
SetDst(inst.dst[0], ir.FPMax(ir.FPMin(src0, src1), mmx));
|
|
}
|
|
|
|
void Translator::V_MED3_I32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
const IR::U32 src2{GetSrc(inst.src[2])};
|
|
const IR::U32 mmx = ir.SMin(ir.SMax(src0, src1), src2);
|
|
SetDst(inst.dst[0], ir.SMax(ir.SMin(src0, src1), mmx));
|
|
}
|
|
|
|
void Translator::V_MED3_U32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
const IR::U32 src2{GetSrc(inst.src[2])};
|
|
const IR::U32 mmx = ir.UMin(ir.UMax(src0, src1), src2);
|
|
SetDst(inst.dst[0], ir.UMax(ir.UMin(src0, src1), mmx));
|
|
}
|
|
|
|
void Translator::V_SAD(const GcnInst& inst) {
|
|
const IR::U32 abs_diff = ir.IAbs(ir.ISub(GetSrc(inst.src[0]), GetSrc(inst.src[1])));
|
|
SetDst(inst.dst[0], ir.IAdd(abs_diff, GetSrc(inst.src[2])));
|
|
}
|
|
|
|
void Translator::V_SAD_U32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
const IR::U32 src2{GetSrc(inst.src[2])};
|
|
IR::U32 result;
|
|
if (src0.IsImmediate() && src0.U32() == 0U) {
|
|
result = src1;
|
|
} else if (src1.IsImmediate() && src1.U32() == 0U) {
|
|
result = src0;
|
|
} else {
|
|
const IR::U32 max{ir.IMax(src0, src1, false)};
|
|
const IR::U32 min{ir.IMin(src0, src1, false)};
|
|
result = ir.ISub(max, min);
|
|
}
|
|
SetDst(inst.dst[0], ir.IAdd(result, src2));
|
|
}
|
|
|
|
void Translator::V_CVT_PK_U16_U32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
const IR::U32 lo = ir.IMin(src0, ir.Imm32(0xFFFF), false);
|
|
const IR::U32 hi = ir.IMin(src1, ir.Imm32(0xFFFF), false);
|
|
SetDst(inst.dst[0], ir.BitFieldInsert(lo, hi, ir.Imm32(16), ir.Imm32(16)));
|
|
}
|
|
|
|
void Translator::V_CVT_PK_U8_F32(const GcnInst& inst) {
|
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
const IR::U32 src2{GetSrc(inst.src[2])};
|
|
|
|
const IR::U32 value_uint = ir.ConvertFToU(32, src0);
|
|
const IR::U32 offset = ir.ShiftLeftLogical(src1, ir.Imm32(3));
|
|
SetDst(inst.dst[0], ir.BitFieldInsert(src2, value_uint, offset, ir.Imm32(8)));
|
|
}
|
|
|
|
void Translator::V_LSHL_B64(const GcnInst& inst) {
|
|
const IR::U64 src0{GetSrc64(inst.src[0])};
|
|
const IR::U64 src1{GetSrc64(inst.src[1])};
|
|
const IR::VectorReg dst_reg{inst.dst[0].code};
|
|
if (src0.IsImmediate()) {
|
|
if (src0.U64() == -1) {
|
|
// If src0 is a fixed -1, the result will always be -1.
|
|
ir.SetVectorReg(dst_reg, ir.Imm32(0xFFFFFFFF));
|
|
ir.SetVectorReg(dst_reg + 1, ir.Imm32(0xFFFFFFFF));
|
|
return;
|
|
}
|
|
if (src1.IsImmediate()) {
|
|
// If both src0 and src1 are immediates, we can calculate the result now.
|
|
// Note that according to the manual, only bits 4:0 are used from src1.
|
|
const u64 result = src0.U64() << (src1.U64() & 0x1F);
|
|
ir.SetVectorReg(dst_reg, ir.Imm32(static_cast<u32>(result)));
|
|
ir.SetVectorReg(dst_reg + 1, ir.Imm32(static_cast<u32>(result >> 32)));
|
|
return;
|
|
}
|
|
}
|
|
UNREACHABLE_MSG("Unimplemented V_LSHL_B64 arguments");
|
|
}
|
|
|
|
void Translator::V_MUL_F64(const GcnInst& inst) {
|
|
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
|
|
const IR::F64 src1{GetSrc64<IR::F64>(inst.src[1])};
|
|
SetDst64(inst.dst[0], ir.FPMul(src0, src1));
|
|
}
|
|
|
|
void Translator::V_MAX_F64(const GcnInst& inst) {
|
|
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
|
|
const IR::F64 src1{GetSrc64<IR::F64>(inst.src[1])};
|
|
SetDst64(inst.dst[0], ir.FPMax(src0, src1));
|
|
}
|
|
|
|
void Translator::V_MUL_LO_U32(const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
SetDst(inst.dst[0], ir.IMul(src0, src1));
|
|
}
|
|
|
|
void Translator::V_MUL_HI_U32(bool is_signed, const GcnInst& inst) {
|
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
|
const IR::U32 hi{ir.CompositeExtract(ir.IMulExt(src0, src1, is_signed), 1)};
|
|
SetDst(inst.dst[0], hi);
|
|
}
|
|
|
|
void Translator::V_MAD_U64_U32(const GcnInst& inst) {
|
|
const auto src0 = GetSrc<IR::U32>(inst.src[0]);
|
|
const auto src1 = GetSrc<IR::U32>(inst.src[1]);
|
|
const auto src2 = GetSrc64<IR::U64>(inst.src[2]);
|
|
|
|
// const IR::U64 mul_result = ir.UConvert(64, ir.IMul(src0, src1));
|
|
const IR::U64 mul_result =
|
|
ir.PackUint2x32(ir.CompositeConstruct(ir.IMul(src0, src1), ir.Imm32(0U)));
|
|
const IR::U64 sum_result = ir.IAdd(mul_result, src2);
|
|
|
|
SetDst64(inst.dst[0], sum_result);
|
|
|
|
const IR::U1 less_src0 = ir.ILessThan(sum_result, mul_result, false);
|
|
const IR::U1 less_src1 = ir.ILessThan(sum_result, src2, false);
|
|
const IR::U1 did_overflow = ir.LogicalOr(less_src0, less_src1);
|
|
ir.SetVcc(did_overflow);
|
|
}
|
|
|
|
IR::U32 Translator::GetCarryIn(const GcnInst& inst) {
|
|
IR::U1 carry;
|
|
if (inst.src_count == 3) { // VOP3
|
|
if (inst.src[2].field == OperandField::VccLo) {
|
|
carry = ir.GetVcc();
|
|
} else if (inst.src[2].field == OperandField::ScalarGPR) {
|
|
carry = ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[2].code));
|
|
} else {
|
|
UNREACHABLE();
|
|
}
|
|
} else { // VOP2
|
|
carry = ir.GetVcc();
|
|
}
|
|
|
|
return IR::U32{ir.Select(carry, ir.Imm32(1), ir.Imm32(0))};
|
|
}
|
|
|
|
void Translator::SetCarryOut(const GcnInst& inst, const IR::U1& carry) {
|
|
if (inst.dst_count == 2) { // VOP3
|
|
if (inst.dst[1].field == OperandField::VccLo) {
|
|
ir.SetVcc(carry);
|
|
} else if (inst.dst[1].field == OperandField::ScalarGPR) {
|
|
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), carry);
|
|
} else {
|
|
UNREACHABLE();
|
|
}
|
|
} else { // VOP2
|
|
ir.SetVcc(carry);
|
|
}
|
|
}
|
|
|
|
// TODO: add range analysis pass to hopefully put an upper bound on m0, and only select one of
|
|
// [src_vgprno, src_vgprno + max_m0]. Same for dst regs we may write back to
|
|
|
|
IR::U32 Translator::VMovRelSHelper(u32 src_vgprno, const IR::U32 m0) {
|
|
// Read from VGPR0 by default when src_vgprno + m0 > num_allocated_vgprs
|
|
IR::U32 src_val = ir.GetVectorReg<IR::U32>(IR::VectorReg::V0);
|
|
for (u32 i = src_vgprno; i < runtime_info.num_allocated_vgprs; i++) {
|
|
const IR::U1 cond = ir.IEqual(m0, ir.Imm32(i - src_vgprno));
|
|
src_val =
|
|
IR::U32{ir.Select(cond, ir.GetVectorReg<IR::U32>(IR::VectorReg::V0 + i), src_val)};
|
|
}
|
|
return src_val;
|
|
}
|
|
|
|
void Translator::VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0) {
|
|
for (u32 i = dst_vgprno; i < runtime_info.num_allocated_vgprs; i++) {
|
|
const IR::U1 cond = ir.IEqual(m0, ir.Imm32(i - dst_vgprno));
|
|
const IR::U32 dst_val =
|
|
IR::U32{ir.Select(cond, src_val, ir.GetVectorReg<IR::U32>(IR::VectorReg::V0 + i))};
|
|
ir.SetVectorReg(IR::VectorReg::V0 + i, dst_val);
|
|
}
|
|
}
|
|
|
|
} // namespace Shader::Gcn
|