diff --git a/src/shader_recompiler/frontend/format.cpp b/src/shader_recompiler/frontend/format.cpp index 634566fa8..f7a178adc 100644 --- a/src/shader_recompiler/frontend/format.cpp +++ b/src/shader_recompiler/frontend/format.cpp @@ -1479,7 +1479,7 @@ constexpr std::array InstructionFormatVOP3 = {{ {InstClass::VectorFpGraph32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, ScalarType::Float32}, // 337 = V_MIN3_F32 - {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + {InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, ScalarType::Float32}, // 338 = V_MIN3_I32 {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Sint32, @@ -1488,7 +1488,7 @@ constexpr std::array InstructionFormatVOP3 = {{ {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, // 340 = V_MAX3_F32 - {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + {InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, ScalarType::Float32}, // 341 = V_MAX3_I32 {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Sint32, @@ -1497,7 +1497,7 @@ constexpr std::array InstructionFormatVOP3 = {{ {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, // 343 = V_MED3_F32 - {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + {InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, ScalarType::Float32}, // 344 = V_MED3_I32 {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Sint32, diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index 88db554a9..62f4c1aab 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -85,4 +85,29 @@ void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) { UNREACHABLE(); } +void Translator::EmitDataShare(const GcnInst& inst) { + switch (inst.opcode) { + case Opcode::DS_SWIZZLE_B32: + return DS_SWIZZLE_B32(inst); + case Opcode::DS_READ_B32: + return DS_READ(32, false, false, inst); + case Opcode::DS_READ_B64: + return DS_READ(64, false, false, inst); + case Opcode::DS_READ2_B32: + return DS_READ(32, false, true, inst); + case Opcode::DS_READ2_B64: + return DS_READ(64, false, true, inst); + case Opcode::DS_WRITE_B32: + return DS_WRITE(32, false, false, inst); + case Opcode::DS_WRITE_B64: + return DS_WRITE(64, false, false, inst); + case Opcode::DS_WRITE2_B32: + return DS_WRITE(32, false, true, inst); + case Opcode::DS_WRITE2_B64: + return DS_WRITE(64, false, true, inst); + default: + info.translation_failed = true; + } +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index 518405373..889de21b7 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -6,7 +6,7 @@ namespace Shader::Gcn { -void Translator::EXP(const GcnInst& inst) { +void Translator::EmitExport(const GcnInst& inst) { if (ir.block->has_multiple_predecessors && info.stage == Stage::Fragment) { LOG_WARNING(Render_Recompiler, "An ambiguous export appeared in translation"); ir.Discard(ir.LogicalNot(ir.GetExec())); diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 076145bb6..40ca859da 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -5,6 +5,88 @@ namespace Shader::Gcn { +void Translator::EmitScalarAlu(const GcnInst& inst) { + switch (inst.opcode) { + case Opcode::S_MOVK_I32: + return S_MOVK(inst); + case Opcode::S_MOV_B32: + return S_MOV(inst); + case Opcode::S_MUL_I32: + return S_MUL_I32(inst); + case Opcode::S_AND_SAVEEXEC_B64: + return S_AND_SAVEEXEC_B64(inst); + case Opcode::S_MOV_B64: + return S_MOV_B64(inst); + case Opcode::S_CMP_LT_U32: + return S_CMP(ConditionOp::LT, false, inst); + case Opcode::S_CMP_LE_U32: + return S_CMP(ConditionOp::LE, false, inst); + case Opcode::S_CMP_LG_U32: + return S_CMP(ConditionOp::LG, false, inst); + case Opcode::S_CMP_LT_I32: + return S_CMP(ConditionOp::LT, true, inst); + case Opcode::S_CMP_LG_I32: + return S_CMP(ConditionOp::LG, true, inst); + case Opcode::S_CMP_GT_I32: + return S_CMP(ConditionOp::GT, true, inst); + case Opcode::S_CMP_GE_I32: + return S_CMP(ConditionOp::GE, true, inst); + case Opcode::S_CMP_EQ_I32: + return S_CMP(ConditionOp::EQ, true, inst); + case Opcode::S_CMP_EQ_U32: + return S_CMP(ConditionOp::EQ, false, inst); + case Opcode::S_OR_B64: + return S_OR_B64(NegateMode::None, false, inst); + case Opcode::S_NOR_B64: + return S_OR_B64(NegateMode::Result, false, inst); + case Opcode::S_XOR_B64: + return S_OR_B64(NegateMode::None, true, inst); + case Opcode::S_ORN2_B64: + return S_OR_B64(NegateMode::Src1, false, inst); + case Opcode::S_AND_B64: + return S_AND_B64(NegateMode::None, inst); + case Opcode::S_NAND_B64: + return S_AND_B64(NegateMode::Result, inst); + case Opcode::S_ANDN2_B64: + return S_AND_B64(NegateMode::Src1, inst); + case Opcode::S_NOT_B64: + return S_NOT_B64(inst); + case Opcode::S_ADD_I32: + return S_ADD_I32(inst); + case Opcode::S_AND_B32: + return S_AND_B32(inst); + case Opcode::S_ASHR_I32: + return S_ASHR_I32(inst); + case Opcode::S_OR_B32: + return S_OR_B32(inst); + case Opcode::S_LSHL_B32: + return S_LSHL_B32(inst); + case Opcode::S_LSHR_B32: + return S_LSHR_B32(inst); + case Opcode::S_CSELECT_B32: + return S_CSELECT_B32(inst); + case Opcode::S_CSELECT_B64: + return S_CSELECT_B64(inst); + case Opcode::S_BFE_U32: + return S_BFE_U32(inst); + case Opcode::S_BFM_B32: + return S_BFM_B32(inst); + case Opcode::S_BREV_B32: + return S_BREV_B32(inst); + case Opcode::S_ADD_U32: + return S_ADD_U32(inst); + case Opcode::S_ADDC_U32: + return S_ADDC_U32(inst); + case Opcode::S_SUB_U32: + case Opcode::S_SUB_I32: + return S_SUB_U32(inst); + case Opcode::S_WQM_B64: + break; + default: + info.translation_failed = true; + } +} + void Translator::S_MOVK(const GcnInst& inst) { const auto simm16 = inst.control.sopk.simm.Value(); if (simm16 & (1 << 15)) { @@ -62,8 +144,6 @@ void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) { } }(); - // Mark destination SPGR as an EXEC context. This means we will use 1-bit - // IR instruction whenever it's loaded. switch (inst.dst[0].field) { case OperandField::ScalarGPR: ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), exec); diff --git a/src/shader_recompiler/frontend/translate/scalar_memory.cpp b/src/shader_recompiler/frontend/translate/scalar_memory.cpp index 3c80764c4..ceaceb54b 100644 --- a/src/shader_recompiler/frontend/translate/scalar_memory.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_memory.cpp @@ -49,4 +49,27 @@ void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) { } } +void Translator::EmitScalarMemory(const GcnInst& inst) { + switch (inst.opcode) { + case Opcode::S_LOAD_DWORDX4: + return S_LOAD_DWORD(4, inst); + case Opcode::S_LOAD_DWORDX8: + return S_LOAD_DWORD(8, inst); + case Opcode::S_LOAD_DWORDX16: + return S_LOAD_DWORD(16, inst); + case Opcode::S_BUFFER_LOAD_DWORD: + return S_BUFFER_LOAD_DWORD(1, inst); + case Opcode::S_BUFFER_LOAD_DWORDX2: + return S_BUFFER_LOAD_DWORD(2, inst); + case Opcode::S_BUFFER_LOAD_DWORDX4: + return S_BUFFER_LOAD_DWORD(4, inst); + case Opcode::S_BUFFER_LOAD_DWORDX8: + return S_BUFFER_LOAD_DWORD(8, inst); + case Opcode::S_BUFFER_LOAD_DWORDX16: + return S_BUFFER_LOAD_DWORD(16, inst); + default: + info.translation_failed = true; + } +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index edd592697..7577a8642 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -455,733 +455,79 @@ void Translator::EmitFetch(const GcnInst& inst) { } } -void Translate(IR::Block* block, u32 block_base, std::span inst_list, Info& info) { +void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) { + switch (inst.opcode) { + case Opcode::S_BARRIER: + return S_BARRIER(); + case Opcode::S_TTRACEDATA: + LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!"); + return; + case Opcode::S_GETPC_B64: + return S_GETPC_B64(pc, inst); + case Opcode::S_WAITCNT: + case Opcode::S_NOP: + case Opcode::S_ENDPGM: + case Opcode::S_CBRANCH_EXECZ: + case Opcode::S_CBRANCH_SCC0: + case Opcode::S_CBRANCH_SCC1: + case Opcode::S_CBRANCH_VCCNZ: + case Opcode::S_CBRANCH_VCCZ: + case Opcode::S_BRANCH: + return; + default: + UNREACHABLE(); + } +} + +void Translate(IR::Block* block, u32 pc, std::span inst_list, Info& info) { if (inst_list.empty()) { return; } Translator translator{block, info}; for (const auto& inst : inst_list) { - block_base += inst.length; - switch (inst.opcode) { - case Opcode::S_MOVK_I32: - translator.S_MOVK(inst); - break; - case Opcode::S_MOV_B32: - translator.S_MOV(inst); - break; - case Opcode::S_MUL_I32: - translator.S_MUL_I32(inst); - break; - case Opcode::V_MAD_F32: - translator.V_MAD_F32(inst); - break; - case Opcode::V_MOV_B32: - translator.V_MOV(inst); - break; - case Opcode::V_MAC_F32: - translator.V_MAC_F32(inst); - break; - case Opcode::V_MUL_F32: - translator.V_MUL_F32(inst); - break; - case Opcode::V_AND_B32: - translator.V_AND_B32(inst); - break; - case Opcode::V_OR_B32: - translator.V_OR_B32(false, inst); - break; - case Opcode::V_XOR_B32: - translator.V_OR_B32(true, inst); - break; - case Opcode::V_LSHLREV_B32: - translator.V_LSHLREV_B32(inst); - break; - case Opcode::V_LSHL_B32: - translator.V_LSHL_B32(inst); - break; - case Opcode::V_FFBL_B32: - translator.V_FFBL_B32(inst); - break; - case Opcode::V_ADD_I32: - translator.V_ADD_I32(inst); - break; - case Opcode::V_ADDC_U32: - translator.V_ADDC_U32(inst); - break; - case Opcode::V_CVT_F32_I32: - translator.V_CVT_F32_I32(inst); - break; - case Opcode::V_CVT_F32_U32: - translator.V_CVT_F32_U32(inst); - break; - case Opcode::V_RCP_F32: - translator.V_RCP_F32(inst); - break; - case Opcode::S_SWAPPC_B64: + pc += inst.length; + + // Special case for emitting fetch shader. + if (inst.opcode == Opcode::S_SWAPPC_B64) { ASSERT(info.stage == Stage::Vertex); translator.EmitFetch(inst); - break; - case Opcode::S_WAITCNT: - break; - case Opcode::S_LOAD_DWORDX4: - translator.S_LOAD_DWORD(4, inst); - break; - case Opcode::S_LOAD_DWORDX8: - translator.S_LOAD_DWORD(8, inst); - break; - case Opcode::S_LOAD_DWORDX16: - translator.S_LOAD_DWORD(16, inst); - break; - case Opcode::S_BUFFER_LOAD_DWORD: - translator.S_BUFFER_LOAD_DWORD(1, inst); - break; - case Opcode::S_BUFFER_LOAD_DWORDX2: - translator.S_BUFFER_LOAD_DWORD(2, inst); - break; - case Opcode::S_BUFFER_LOAD_DWORDX4: - translator.S_BUFFER_LOAD_DWORD(4, inst); - break; - case Opcode::S_BUFFER_LOAD_DWORDX8: - translator.S_BUFFER_LOAD_DWORD(8, inst); - break; - case Opcode::S_BUFFER_LOAD_DWORDX16: - translator.S_BUFFER_LOAD_DWORD(16, inst); - break; - case Opcode::EXP: - translator.EXP(inst); - break; - case Opcode::V_INTERP_P2_F32: - translator.V_INTERP_P2_F32(inst); - break; - case Opcode::V_CVT_PKRTZ_F16_F32: - translator.V_CVT_PKRTZ_F16_F32(inst); - break; - case Opcode::V_CVT_F32_F16: - translator.V_CVT_F32_F16(inst); - break; - case Opcode::V_CVT_F16_F32: - translator.V_CVT_F16_F32(inst); - break; - case Opcode::V_CVT_F32_UBYTE0: - translator.V_CVT_F32_UBYTE(0, inst); - break; - case Opcode::V_CVT_F32_UBYTE1: - translator.V_CVT_F32_UBYTE(1, inst); - break; - case Opcode::V_CVT_F32_UBYTE2: - translator.V_CVT_F32_UBYTE(2, inst); - break; - case Opcode::V_CVT_F32_UBYTE3: - translator.V_CVT_F32_UBYTE(3, inst); - break; - case Opcode::V_BFREV_B32: - translator.V_BFREV_B32(inst); - break; - case Opcode::V_LDEXP_F32: - translator.V_LDEXP_F32(inst); - break; - case Opcode::V_FRACT_F32: - translator.V_FRACT_F32(inst); - break; - case Opcode::V_ADD_F32: - translator.V_ADD_F32(inst); - break; - case Opcode::V_CVT_OFF_F32_I4: - translator.V_CVT_OFF_F32_I4(inst); - break; - case Opcode::V_MED3_F32: - translator.V_MED3_F32(inst); - break; - case Opcode::V_FLOOR_F32: - translator.V_FLOOR_F32(inst); - break; - case Opcode::V_SUB_F32: - translator.V_SUB_F32(inst); - break; - case Opcode::V_FMA_F32: - case Opcode::V_MADAK_F32: // Yes these can share the opcode - translator.V_FMA_F32(inst); - break; - case Opcode::IMAGE_SAMPLE_LZ_O: - case Opcode::IMAGE_SAMPLE_O: - case Opcode::IMAGE_SAMPLE_C: - case Opcode::IMAGE_SAMPLE_C_LZ: - case Opcode::IMAGE_SAMPLE_LZ: - case Opcode::IMAGE_SAMPLE: - case Opcode::IMAGE_SAMPLE_L: - case Opcode::IMAGE_SAMPLE_C_O: - case Opcode::IMAGE_SAMPLE_B: - case Opcode::IMAGE_SAMPLE_C_LZ_O: - translator.IMAGE_SAMPLE(inst); - break; - case Opcode::IMAGE_ATOMIC_ADD: - translator.IMAGE_ATOMIC(AtomicOp::Add, inst); - break; - case Opcode::IMAGE_ATOMIC_AND: - translator.IMAGE_ATOMIC(AtomicOp::And, inst); - break; - case Opcode::IMAGE_ATOMIC_OR: - translator.IMAGE_ATOMIC(AtomicOp::Or, inst); - break; - case Opcode::IMAGE_ATOMIC_XOR: - translator.IMAGE_ATOMIC(AtomicOp::Xor, inst); - break; - case Opcode::IMAGE_ATOMIC_UMAX: - translator.IMAGE_ATOMIC(AtomicOp::Umax, inst); - break; - case Opcode::IMAGE_ATOMIC_SMAX: - translator.IMAGE_ATOMIC(AtomicOp::Smax, inst); - break; - case Opcode::IMAGE_ATOMIC_UMIN: - translator.IMAGE_ATOMIC(AtomicOp::Umin, inst); - break; - case Opcode::IMAGE_ATOMIC_SMIN: - translator.IMAGE_ATOMIC(AtomicOp::Smin, inst); - break; - case Opcode::IMAGE_ATOMIC_INC: - translator.IMAGE_ATOMIC(AtomicOp::Inc, inst); - break; - case Opcode::IMAGE_ATOMIC_DEC: - translator.IMAGE_ATOMIC(AtomicOp::Dec, inst); - break; - case Opcode::IMAGE_GET_LOD: - translator.IMAGE_GET_LOD(inst); - break; - case Opcode::IMAGE_GATHER4_C: - case Opcode::IMAGE_GATHER4_LZ: - case Opcode::IMAGE_GATHER4_LZ_O: - translator.IMAGE_GATHER(inst); - break; - case Opcode::IMAGE_STORE: - translator.IMAGE_STORE(inst); - break; - case Opcode::IMAGE_LOAD_MIP: - translator.IMAGE_LOAD(true, inst); - break; - case Opcode::IMAGE_LOAD: - translator.IMAGE_LOAD(false, inst); - break; - case Opcode::V_MAD_U64_U32: - translator.V_MAD_U64_U32(inst); - break; - case Opcode::V_CMP_GE_I32: - translator.V_CMP_U32(ConditionOp::GE, true, false, inst); - break; - case Opcode::V_CMP_EQ_I32: - translator.V_CMP_U32(ConditionOp::EQ, true, false, inst); - break; - case Opcode::V_CMP_LE_I32: - translator.V_CMP_U32(ConditionOp::LE, true, false, inst); - break; - case Opcode::V_CMP_NE_I32: - translator.V_CMP_U32(ConditionOp::LG, true, false, inst); - break; - case Opcode::V_CMP_NE_U32: - translator.V_CMP_U32(ConditionOp::LG, false, false, inst); - break; - case Opcode::V_CMP_EQ_U32: - translator.V_CMP_U32(ConditionOp::EQ, false, false, inst); - break; - case Opcode::V_CMP_F_U32: - translator.V_CMP_U32(ConditionOp::F, false, false, inst); - break; - case Opcode::V_CMP_LT_U32: - translator.V_CMP_U32(ConditionOp::LT, false, false, inst); - break; - case Opcode::V_CMP_GT_U32: - translator.V_CMP_U32(ConditionOp::GT, false, false, inst); - break; - case Opcode::V_CMP_GE_U32: - translator.V_CMP_U32(ConditionOp::GE, false, false, inst); - break; - case Opcode::V_CMP_TRU_U32: - translator.V_CMP_U32(ConditionOp::TRU, false, false, inst); - break; - case Opcode::V_CMP_NEQ_F32: - translator.V_CMP_F32(ConditionOp::LG, false, inst); - break; - case Opcode::V_CMP_F_F32: - translator.V_CMP_F32(ConditionOp::F, false, inst); - break; - case Opcode::V_CMP_LT_F32: - translator.V_CMP_F32(ConditionOp::LT, false, inst); - break; - case Opcode::V_CMP_EQ_F32: - translator.V_CMP_F32(ConditionOp::EQ, false, inst); - break; - case Opcode::V_CMP_LE_F32: - translator.V_CMP_F32(ConditionOp::LE, false, inst); - break; - case Opcode::V_CMP_GT_F32: - translator.V_CMP_F32(ConditionOp::GT, false, inst); - break; - case Opcode::V_CMP_LG_F32: - translator.V_CMP_F32(ConditionOp::LG, false, inst); - break; - case Opcode::V_CMP_GE_F32: - translator.V_CMP_F32(ConditionOp::GE, false, inst); - break; - case Opcode::V_CMP_NLE_F32: - translator.V_CMP_F32(ConditionOp::GT, false, inst); - break; - case Opcode::V_CMP_NLT_F32: - translator.V_CMP_F32(ConditionOp::GE, false, inst); - break; - case Opcode::V_CMP_NGT_F32: - translator.V_CMP_F32(ConditionOp::LE, false, inst); - break; - case Opcode::V_CMP_NGE_F32: - translator.V_CMP_F32(ConditionOp::LT, false, inst); - break; - case Opcode::S_CMP_LT_U32: - translator.S_CMP(ConditionOp::LT, false, inst); - break; - case Opcode::S_CMP_LE_U32: - translator.S_CMP(ConditionOp::LE, false, inst); - break; - case Opcode::S_CMP_LG_U32: - translator.S_CMP(ConditionOp::LG, false, inst); - break; - case Opcode::S_CMP_LT_I32: - translator.S_CMP(ConditionOp::LT, true, inst); - break; - case Opcode::S_CMP_LG_I32: - translator.S_CMP(ConditionOp::LG, true, inst); - break; - case Opcode::S_CMP_GT_I32: - translator.S_CMP(ConditionOp::GT, true, inst); - break; - case Opcode::S_CMP_GE_I32: - translator.S_CMP(ConditionOp::GE, true, inst); - break; - case Opcode::S_CMP_EQ_I32: - translator.S_CMP(ConditionOp::EQ, true, inst); - break; - case Opcode::S_CMP_EQ_U32: - translator.S_CMP(ConditionOp::EQ, false, inst); - break; - case Opcode::S_LSHL_B32: - translator.S_LSHL_B32(inst); - break; - case Opcode::V_CNDMASK_B32: - translator.V_CNDMASK_B32(inst); - break; - case Opcode::TBUFFER_LOAD_FORMAT_X: - translator.BUFFER_LOAD_FORMAT(1, true, true, inst); - break; - case Opcode::TBUFFER_LOAD_FORMAT_XY: - translator.BUFFER_LOAD_FORMAT(2, true, true, inst); - break; - case Opcode::TBUFFER_LOAD_FORMAT_XYZ: - translator.BUFFER_LOAD_FORMAT(3, true, true, inst); - break; - case Opcode::TBUFFER_LOAD_FORMAT_XYZW: - translator.BUFFER_LOAD_FORMAT(4, true, true, inst); - break; - case Opcode::BUFFER_LOAD_FORMAT_X: - translator.BUFFER_LOAD_FORMAT(1, false, true, inst); - break; - case Opcode::BUFFER_LOAD_FORMAT_XY: - translator.BUFFER_LOAD_FORMAT(2, false, true, inst); - break; - case Opcode::BUFFER_LOAD_FORMAT_XYZ: - translator.BUFFER_LOAD_FORMAT(3, false, true, inst); - break; - case Opcode::BUFFER_LOAD_FORMAT_XYZW: - translator.BUFFER_LOAD_FORMAT(4, false, true, inst); - break; - case Opcode::BUFFER_LOAD_DWORD: - translator.BUFFER_LOAD_FORMAT(1, false, false, inst); - break; - case Opcode::BUFFER_LOAD_DWORDX2: - translator.BUFFER_LOAD_FORMAT(2, false, false, inst); - break; - case Opcode::BUFFER_LOAD_DWORDX3: - translator.BUFFER_LOAD_FORMAT(3, false, false, inst); - break; - case Opcode::BUFFER_LOAD_DWORDX4: - translator.BUFFER_LOAD_FORMAT(4, false, false, inst); - break; - case Opcode::BUFFER_STORE_FORMAT_X: - case Opcode::BUFFER_STORE_DWORD: - translator.BUFFER_STORE_FORMAT(1, false, inst); - break; - case Opcode::BUFFER_STORE_DWORDX2: - translator.BUFFER_STORE_FORMAT(2, false, inst); - break; - case Opcode::BUFFER_STORE_DWORDX3: - translator.BUFFER_STORE_FORMAT(3, false, inst); - break; - case Opcode::BUFFER_STORE_FORMAT_XYZW: - case Opcode::BUFFER_STORE_DWORDX4: - translator.BUFFER_STORE_FORMAT(4, false, inst); - break; - case Opcode::V_MAX_F32: - translator.V_MAX_F32(inst); - break; - case Opcode::V_MAX_I32: - translator.V_MAX_U32(true, inst); - break; - case Opcode::V_MAX_U32: - translator.V_MAX_U32(false, inst); - break; - case Opcode::V_NOT_B32: - translator.V_NOT_B32(inst); - break; - case Opcode::V_RSQ_F32: - translator.V_RSQ_F32(inst); - break; - case Opcode::S_ANDN2_B64: - translator.S_AND_B64(NegateMode::Src1, inst); - break; - case Opcode::S_ORN2_B64: - translator.S_OR_B64(NegateMode::Src1, false, inst); - break; - case Opcode::V_SIN_F32: - translator.V_SIN_F32(inst); - break; - case Opcode::V_COS_F32: - translator.V_COS_F32(inst); - break; - case Opcode::V_LOG_F32: - translator.V_LOG_F32(inst); - break; - case Opcode::V_EXP_F32: - translator.V_EXP_F32(inst); - break; - case Opcode::V_SQRT_F32: - translator.V_SQRT_F32(inst); - break; - case Opcode::V_MIN_F32: - translator.V_MIN_F32(inst); - break; - case Opcode::V_MIN_I32: - translator.V_MIN_I32(inst); - break; - case Opcode::V_MIN3_F32: - translator.V_MIN3_F32(inst); - break; - case Opcode::V_MIN_LEGACY_F32: - translator.V_MIN_F32(inst, true); - break; - case Opcode::V_MADMK_F32: - translator.V_MADMK_F32(inst); - break; - case Opcode::V_CUBEMA_F32: - translator.V_CUBEMA_F32(inst); - break; - case Opcode::V_CUBESC_F32: - translator.V_CUBESC_F32(inst); - break; - case Opcode::V_CUBETC_F32: - translator.V_CUBETC_F32(inst); - break; - case Opcode::V_CUBEID_F32: - translator.V_CUBEID_F32(inst); - break; - case Opcode::V_CVT_U32_F32: - translator.V_CVT_U32_F32(inst); - break; - case Opcode::V_CVT_I32_F32: - translator.V_CVT_I32_F32(inst); - break; - case Opcode::V_CVT_FLR_I32_F32: - translator.V_CVT_FLR_I32_F32(inst); - break; - case Opcode::V_SUBREV_F32: - translator.V_SUBREV_F32(inst); - break; - case Opcode::S_AND_SAVEEXEC_B64: - translator.S_AND_SAVEEXEC_B64(inst); - break; - case Opcode::S_MOV_B64: - translator.S_MOV_B64(inst); - break; - case Opcode::V_SUBREV_I32: - translator.V_SUBREV_I32(inst); - break; + continue; + } - case Opcode::V_CMPX_F_F32: - translator.V_CMP_F32(ConditionOp::F, true, inst); + // Emit instructions for each category. + switch (inst.category) { + case InstCategory::DataShare: + translator.EmitDataShare(inst); break; - case Opcode::V_CMPX_LT_F32: - translator.V_CMP_F32(ConditionOp::LT, true, inst); + case InstCategory::VectorInterpolation: + translator.EmitVectorInterpolation(inst); break; - case Opcode::V_CMPX_EQ_F32: - translator.V_CMP_F32(ConditionOp::EQ, true, inst); + case InstCategory::ScalarMemory: + translator.EmitScalarMemory(inst); break; - case Opcode::V_CMPX_LE_F32: - translator.V_CMP_F32(ConditionOp::LE, true, inst); + case InstCategory::VectorMemory: + translator.EmitVectorMemory(inst); break; - case Opcode::V_CMPX_GT_F32: - translator.V_CMP_F32(ConditionOp::GT, true, inst); + case InstCategory::Export: + translator.EmitExport(inst); break; - case Opcode::V_CMPX_LG_F32: - translator.V_CMP_F32(ConditionOp::LG, true, inst); + case InstCategory::FlowControl: + translator.EmitFlowControl(pc, inst); break; - case Opcode::V_CMPX_GE_F32: - translator.V_CMP_F32(ConditionOp::GE, true, inst); + case InstCategory::ScalarALU: + translator.EmitScalarAlu(inst); break; - case Opcode::V_CMPX_NGE_F32: - translator.V_CMP_F32(ConditionOp::LT, true, inst); - break; - case Opcode::V_CMPX_NLG_F32: - translator.V_CMP_F32(ConditionOp::EQ, true, inst); - break; - case Opcode::V_CMPX_NGT_F32: - translator.V_CMP_F32(ConditionOp::LE, true, inst); - break; - case Opcode::V_CMPX_NLE_F32: - translator.V_CMP_F32(ConditionOp::GT, true, inst); - break; - case Opcode::V_CMPX_NEQ_F32: - translator.V_CMP_F32(ConditionOp::LG, true, inst); - break; - case Opcode::V_CMPX_NLT_F32: - translator.V_CMP_F32(ConditionOp::GE, true, inst); - break; - case Opcode::V_CMPX_TRU_F32: - translator.V_CMP_F32(ConditionOp::TRU, true, inst); - break; - case Opcode::V_CMP_LE_U32: - translator.V_CMP_U32(ConditionOp::LE, false, false, inst); - break; - case Opcode::V_CMP_GT_I32: - translator.V_CMP_U32(ConditionOp::GT, true, false, inst); - break; - case Opcode::V_CMP_LT_I32: - translator.V_CMP_U32(ConditionOp::LT, true, false, inst); - break; - case Opcode::V_CMPX_LT_I32: - translator.V_CMP_U32(ConditionOp::LT, true, true, inst); - break; - case Opcode::V_CMPX_F_U32: - translator.V_CMP_U32(ConditionOp::F, false, true, inst); - break; - case Opcode::V_CMPX_LT_U32: - translator.V_CMP_U32(ConditionOp::LT, false, true, inst); - break; - case Opcode::V_CMPX_EQ_U32: - translator.V_CMP_U32(ConditionOp::EQ, false, true, inst); - break; - case Opcode::V_CMPX_LE_U32: - translator.V_CMP_U32(ConditionOp::LE, false, true, inst); - break; - case Opcode::V_CMPX_GT_U32: - translator.V_CMP_U32(ConditionOp::GT, false, true, inst); - break; - case Opcode::V_CMPX_NE_U32: - translator.V_CMP_U32(ConditionOp::LG, false, true, inst); - break; - case Opcode::V_CMPX_GE_U32: - translator.V_CMP_U32(ConditionOp::GE, false, true, inst); - break; - case Opcode::V_CMPX_TRU_U32: - translator.V_CMP_U32(ConditionOp::TRU, false, true, inst); - break; - case Opcode::S_OR_B64: - translator.S_OR_B64(NegateMode::None, false, inst); - break; - case Opcode::S_NOR_B64: - translator.S_OR_B64(NegateMode::Result, false, inst); - break; - case Opcode::S_XOR_B64: - translator.S_OR_B64(NegateMode::None, true, inst); - break; - case Opcode::S_AND_B64: - translator.S_AND_B64(NegateMode::None, inst); - break; - case Opcode::S_NOT_B64: - translator.S_NOT_B64(inst); - break; - case Opcode::S_NAND_B64: - translator.S_AND_B64(NegateMode::Result, inst); - break; - case Opcode::V_LSHRREV_B32: - translator.V_LSHRREV_B32(inst); - break; - case Opcode::S_ADD_I32: - translator.S_ADD_I32(inst); - break; - case Opcode::V_MUL_HI_U32: - translator.V_MUL_HI_U32(false, inst); - break; - case Opcode::V_MUL_LO_I32: - translator.V_MUL_LO_U32(inst); - break; - case Opcode::V_SAD_U32: - translator.V_SAD_U32(inst); - break; - case Opcode::V_BFE_U32: - translator.V_BFE_U32(false, inst); - break; - case Opcode::V_BFE_I32: - translator.V_BFE_U32(true, inst); - break; - case Opcode::V_MAD_I32_I24: - translator.V_MAD_I32_I24(inst); - break; - case Opcode::V_MUL_I32_I24: - case Opcode::V_MUL_U32_U24: - translator.V_MUL_I32_I24(inst); - break; - case Opcode::V_SUB_I32: - translator.V_SUB_I32(inst); - break; - case Opcode::V_LSHR_B32: - translator.V_LSHR_B32(inst); - break; - case Opcode::V_ASHRREV_I32: - translator.V_ASHRREV_I32(inst); - break; - case Opcode::V_MAD_U32_U24: - translator.V_MAD_U32_U24(inst); - break; - case Opcode::S_AND_B32: - translator.S_AND_B32(inst); - break; - case Opcode::S_ASHR_I32: - translator.S_ASHR_I32(inst); - break; - case Opcode::S_OR_B32: - translator.S_OR_B32(inst); - break; - case Opcode::S_LSHR_B32: - translator.S_LSHR_B32(inst); - break; - case Opcode::S_CSELECT_B32: - translator.S_CSELECT_B32(inst); - break; - case Opcode::S_CSELECT_B64: - translator.S_CSELECT_B64(inst); - break; - case Opcode::S_BFE_U32: - translator.S_BFE_U32(inst); - break; - case Opcode::V_RNDNE_F32: - translator.V_RNDNE_F32(inst); - break; - case Opcode::V_BCNT_U32_B32: - translator.V_BCNT_U32_B32(inst); - break; - case Opcode::V_MAX3_F32: - translator.V_MAX3_F32(inst); - break; - case Opcode::DS_SWIZZLE_B32: - translator.DS_SWIZZLE_B32(inst); - break; - case Opcode::V_MUL_LO_U32: - translator.V_MUL_LO_U32(inst); - break; - case Opcode::S_BFM_B32: - translator.S_BFM_B32(inst); - break; - case Opcode::V_MIN_U32: - translator.V_MIN_U32(inst); - break; - case Opcode::V_CMP_NE_U64: - translator.V_CMP_NE_U64(inst); - break; - case Opcode::V_CMP_CLASS_F32: - translator.V_CMP_CLASS_F32(inst); - break; - case Opcode::V_TRUNC_F32: - translator.V_TRUNC_F32(inst); - break; - case Opcode::V_CEIL_F32: - translator.V_CEIL_F32(inst); - break; - case Opcode::V_BFI_B32: - translator.V_BFI_B32(inst); - break; - case Opcode::S_BREV_B32: - translator.S_BREV_B32(inst); - break; - case Opcode::S_ADD_U32: - translator.S_ADD_U32(inst); - break; - case Opcode::S_ADDC_U32: - translator.S_ADDC_U32(inst); - break; - case Opcode::S_SUB_U32: - case Opcode::S_SUB_I32: - translator.S_SUB_U32(inst); - break; - // TODO: Separate implementation for legacy variants. - case Opcode::V_MUL_LEGACY_F32: - translator.V_MUL_F32(inst); - break; - case Opcode::V_MAC_LEGACY_F32: - translator.V_MAC_F32(inst); - break; - case Opcode::V_MAD_LEGACY_F32: - translator.V_MAD_F32(inst); - break; - case Opcode::V_MAX_LEGACY_F32: - translator.V_MAX_F32(inst, true); - break; - case Opcode::V_RSQ_LEGACY_F32: - case Opcode::V_RSQ_CLAMP_F32: - translator.V_RSQ_F32(inst); - break; - case Opcode::V_RCP_IFLAG_F32: - translator.V_RCP_F32(inst); - break; - case Opcode::IMAGE_GET_RESINFO: - translator.IMAGE_GET_RESINFO(inst); - break; - case Opcode::S_BARRIER: - translator.S_BARRIER(); - break; - case Opcode::S_TTRACEDATA: - LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!"); - break; - case Opcode::DS_READ_B32: - translator.DS_READ(32, false, false, inst); - break; - case Opcode::DS_READ_B64: - translator.DS_READ(64, false, false, inst); - break; - case Opcode::DS_READ2_B32: - translator.DS_READ(32, false, true, inst); - break; - case Opcode::DS_READ2_B64: - translator.DS_READ(64, false, true, inst); - break; - case Opcode::DS_WRITE_B32: - translator.DS_WRITE(32, false, false, inst); - break; - case Opcode::DS_WRITE_B64: - translator.DS_WRITE(64, false, false, inst); - break; - case Opcode::DS_WRITE2_B32: - translator.DS_WRITE(32, false, true, inst); - break; - case Opcode::DS_WRITE2_B64: - translator.DS_WRITE(64, false, true, inst); - break; - case Opcode::V_READFIRSTLANE_B32: - translator.V_READFIRSTLANE_B32(inst); - break; - case Opcode::S_GETPC_B64: - translator.S_GETPC_B64(block_base, inst); - break; - case Opcode::S_NOP: - case Opcode::S_CBRANCH_EXECZ: - case Opcode::S_CBRANCH_SCC0: - case Opcode::S_CBRANCH_SCC1: - case Opcode::S_CBRANCH_VCCNZ: - case Opcode::S_CBRANCH_VCCZ: - case Opcode::S_BRANCH: - case Opcode::S_WQM_B64: - case Opcode::V_INTERP_P1_F32: - case Opcode::S_ENDPGM: + case InstCategory::VectorALU: + translator.EmitVectorAlu(inst); break; default: + UNREACHABLE(); + } + + if (info.translation_failed) { const u32 opcode = u32(inst.opcode); LOG_ERROR(Render_Recompiler, "Unknown opcode {} ({})", - magic_enum::enum_name(inst.opcode), opcode); - info.translation_failed = true; + magic_enum::enum_name(inst.opcode), u32(inst.opcode)); } } } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 365e43a45..9388f55de 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -55,9 +55,17 @@ class Translator { public: explicit Translator(IR::Block* block_, Info& info); + // Instruction categories void EmitPrologue(); void EmitFetch(const GcnInst& inst); void EmitDataShare(const GcnInst& inst); + void EmitVectorInterpolation(const GcnInst& inst); + void EmitScalarMemory(const GcnInst& inst); + void EmitVectorMemory(const GcnInst& inst); + void EmitExport(const GcnInst& inst); + void EmitFlowControl(u32 pc, const GcnInst& inst); + void EmitScalarAlu(const GcnInst& inst); + void EmitVectorAlu(const GcnInst& inst); // Scalar ALU void S_MOVK(const GcnInst& inst); @@ -188,9 +196,6 @@ public: void IMAGE_GET_LOD(const GcnInst& inst); void IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst); - // Export - void EXP(const GcnInst& inst); - private: template [[nodiscard]] T GetSrc(const InstOperand& operand, bool flt_zero = false); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 362390325..5d92289e7 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -5,6 +5,290 @@ namespace Shader::Gcn { +void Translator::EmitVectorAlu(const GcnInst& inst) { + switch (inst.opcode) { + case Opcode::V_LSHLREV_B32: + return V_LSHLREV_B32(inst); + case Opcode::V_LSHL_B32: + return V_LSHL_B32(inst); + case Opcode::V_BFREV_B32: + return V_BFREV_B32(inst); + case Opcode::V_BFE_U32: + return V_BFE_U32(false, inst); + case Opcode::V_BFE_I32: + return V_BFE_U32(true, inst); + case Opcode::V_BFI_B32: + return V_BFI_B32(inst); + case Opcode::V_LSHR_B32: + return V_LSHR_B32(inst); + case Opcode::V_ASHRREV_I32: + return V_ASHRREV_I32(inst); + case Opcode::V_LSHRREV_B32: + return V_LSHRREV_B32(inst); + case Opcode::V_NOT_B32: + return V_NOT_B32(inst); + case Opcode::V_AND_B32: + return V_AND_B32(inst); + case Opcode::V_OR_B32: + return V_OR_B32(false, inst); + case Opcode::V_XOR_B32: + return V_OR_B32(true, inst); + case Opcode::V_FFBL_B32: + return V_FFBL_B32(inst); + + case Opcode::V_MOV_B32: + return V_MOV(inst); + case Opcode::V_ADD_I32: + return V_ADD_I32(inst); + case Opcode::V_ADDC_U32: + return V_ADDC_U32(inst); + case Opcode::V_CVT_F32_I32: + return V_CVT_F32_I32(inst); + case Opcode::V_CVT_F32_U32: + return V_CVT_F32_U32(inst); + case Opcode::V_CVT_PKRTZ_F16_F32: + return V_CVT_PKRTZ_F16_F32(inst); + case Opcode::V_CVT_F32_F16: + return V_CVT_F32_F16(inst); + case Opcode::V_CVT_F16_F32: + return V_CVT_F16_F32(inst); + case Opcode::V_CVT_F32_UBYTE0: + return V_CVT_F32_UBYTE(0, inst); + case Opcode::V_CVT_F32_UBYTE1: + return V_CVT_F32_UBYTE(1, inst); + case Opcode::V_CVT_F32_UBYTE2: + return V_CVT_F32_UBYTE(2, inst); + case Opcode::V_CVT_F32_UBYTE3: + return V_CVT_F32_UBYTE(3, inst); + case Opcode::V_CVT_OFF_F32_I4: + return V_CVT_OFF_F32_I4(inst); + case Opcode::V_MAD_U64_U32: + return V_MAD_U64_U32(inst); + case Opcode::V_CMP_GE_I32: + return V_CMP_U32(ConditionOp::GE, true, false, inst); + case Opcode::V_CMP_EQ_I32: + return V_CMP_U32(ConditionOp::EQ, true, false, inst); + case Opcode::V_CMP_LE_I32: + return V_CMP_U32(ConditionOp::LE, true, false, inst); + case Opcode::V_CMP_NE_I32: + return V_CMP_U32(ConditionOp::LG, true, false, inst); + case Opcode::V_CMP_NE_U32: + return V_CMP_U32(ConditionOp::LG, false, false, inst); + case Opcode::V_CMP_EQ_U32: + return V_CMP_U32(ConditionOp::EQ, false, false, inst); + case Opcode::V_CMP_F_U32: + return V_CMP_U32(ConditionOp::F, false, false, inst); + case Opcode::V_CMP_LT_U32: + return V_CMP_U32(ConditionOp::LT, false, false, inst); + case Opcode::V_CMP_GT_U32: + return V_CMP_U32(ConditionOp::GT, false, false, inst); + case Opcode::V_CMP_GE_U32: + return V_CMP_U32(ConditionOp::GE, false, false, inst); + case Opcode::V_CMP_TRU_U32: + return V_CMP_U32(ConditionOp::TRU, false, false, inst); + case Opcode::V_CMP_NEQ_F32: + return V_CMP_F32(ConditionOp::LG, false, inst); + case Opcode::V_CMP_F_F32: + return V_CMP_F32(ConditionOp::F, false, inst); + case Opcode::V_CMP_LT_F32: + return V_CMP_F32(ConditionOp::LT, false, inst); + case Opcode::V_CMP_EQ_F32: + return V_CMP_F32(ConditionOp::EQ, false, inst); + case Opcode::V_CMP_LE_F32: + return V_CMP_F32(ConditionOp::LE, false, inst); + case Opcode::V_CMP_GT_F32: + return V_CMP_F32(ConditionOp::GT, false, inst); + case Opcode::V_CMP_LG_F32: + return V_CMP_F32(ConditionOp::LG, false, inst); + case Opcode::V_CMP_GE_F32: + return V_CMP_F32(ConditionOp::GE, false, inst); + case Opcode::V_CMP_NLE_F32: + return V_CMP_F32(ConditionOp::GT, false, inst); + case Opcode::V_CMP_NLT_F32: + return V_CMP_F32(ConditionOp::GE, false, inst); + case Opcode::V_CMP_NGT_F32: + return V_CMP_F32(ConditionOp::LE, false, inst); + case Opcode::V_CMP_NGE_F32: + return V_CMP_F32(ConditionOp::LT, false, inst); + case Opcode::V_CNDMASK_B32: + return V_CNDMASK_B32(inst); + case Opcode::V_MAX_I32: + return V_MAX_U32(true, inst); + case Opcode::V_MAX_U32: + return V_MAX_U32(false, inst); + case Opcode::V_MIN_I32: + return V_MIN_I32(inst); + case Opcode::V_CUBEMA_F32: + return V_CUBEMA_F32(inst); + case Opcode::V_CUBESC_F32: + return V_CUBESC_F32(inst); + case Opcode::V_CUBETC_F32: + return V_CUBETC_F32(inst); + case Opcode::V_CUBEID_F32: + return V_CUBEID_F32(inst); + case Opcode::V_CVT_U32_F32: + return V_CVT_U32_F32(inst); + case Opcode::V_CVT_I32_F32: + return V_CVT_I32_F32(inst); + case Opcode::V_CVT_FLR_I32_F32: + return V_CVT_FLR_I32_F32(inst); + case Opcode::V_SUBREV_I32: + return V_SUBREV_I32(inst); + case Opcode::V_MUL_HI_U32: + return V_MUL_HI_U32(false, inst); + case Opcode::V_MUL_LO_I32: + return V_MUL_LO_U32(inst); + case Opcode::V_SAD_U32: + return V_SAD_U32(inst); + case Opcode::V_SUB_I32: + return V_SUB_I32(inst); + case Opcode::V_MAD_I32_I24: + return V_MAD_I32_I24(inst); + case Opcode::V_MUL_I32_I24: + case Opcode::V_MUL_U32_U24: + return V_MUL_I32_I24(inst); + case Opcode::V_MAD_U32_U24: + return V_MAD_U32_U24(inst); + case Opcode::V_BCNT_U32_B32: + return V_BCNT_U32_B32(inst); + case Opcode::V_MUL_LO_U32: + return V_MUL_LO_U32(inst); + case Opcode::V_MIN_U32: + return V_MIN_U32(inst); + case Opcode::V_CMP_NE_U64: + return V_CMP_NE_U64(inst); + case Opcode::V_READFIRSTLANE_B32: + return V_READFIRSTLANE_B32(inst); + + case Opcode::V_MAD_F32: + return V_MAD_F32(inst); + case Opcode::V_MAC_F32: + return V_MAC_F32(inst); + case Opcode::V_MUL_F32: + return V_MUL_F32(inst); + case Opcode::V_RCP_F32: + return V_RCP_F32(inst); + case Opcode::V_LDEXP_F32: + return V_LDEXP_F32(inst); + case Opcode::V_FRACT_F32: + return V_FRACT_F32(inst); + case Opcode::V_ADD_F32: + return V_ADD_F32(inst); + case Opcode::V_MED3_F32: + return V_MED3_F32(inst); + case Opcode::V_FLOOR_F32: + return V_FLOOR_F32(inst); + case Opcode::V_SUB_F32: + return V_SUB_F32(inst); + case Opcode::V_FMA_F32: + case Opcode::V_MADAK_F32: + return V_FMA_F32(inst); + case Opcode::V_MAX_F32: + return V_MAX_F32(inst); + case Opcode::V_RSQ_F32: + return V_RSQ_F32(inst); + case Opcode::V_SIN_F32: + return V_SIN_F32(inst); + case Opcode::V_COS_F32: + return V_COS_F32(inst); + case Opcode::V_LOG_F32: + return V_LOG_F32(inst); + case Opcode::V_EXP_F32: + return V_EXP_F32(inst); + case Opcode::V_SQRT_F32: + return V_SQRT_F32(inst); + case Opcode::V_MIN_F32: + return V_MIN_F32(inst, false); + case Opcode::V_MIN3_F32: + return V_MIN3_F32(inst); + case Opcode::V_MIN_LEGACY_F32: + return V_MIN_F32(inst, true); + case Opcode::V_MADMK_F32: + return V_MADMK_F32(inst); + case Opcode::V_SUBREV_F32: + return V_SUBREV_F32(inst); + case Opcode::V_RNDNE_F32: + return V_RNDNE_F32(inst); + case Opcode::V_MAX3_F32: + return V_MAX3_F32(inst); + case Opcode::V_TRUNC_F32: + return V_TRUNC_F32(inst); + case Opcode::V_CEIL_F32: + return V_CEIL_F32(inst); + case Opcode::V_MUL_LEGACY_F32: + return V_MUL_F32(inst); + case Opcode::V_MAC_LEGACY_F32: + return V_MAC_F32(inst); + case Opcode::V_MAD_LEGACY_F32: + return V_MAD_F32(inst); + case Opcode::V_MAX_LEGACY_F32: + return V_MAX_F32(inst, true); + case Opcode::V_RSQ_LEGACY_F32: + case Opcode::V_RSQ_CLAMP_F32: + return V_RSQ_F32(inst); + case Opcode::V_RCP_IFLAG_F32: + return V_RCP_F32(inst); + + case Opcode::V_CMPX_F_F32: + return V_CMP_F32(ConditionOp::F, true, inst); + case Opcode::V_CMPX_LT_F32: + return V_CMP_F32(ConditionOp::LT, true, inst); + case Opcode::V_CMPX_EQ_F32: + return V_CMP_F32(ConditionOp::EQ, true, inst); + case Opcode::V_CMPX_LE_F32: + return V_CMP_F32(ConditionOp::LE, true, inst); + case Opcode::V_CMPX_GT_F32: + return V_CMP_F32(ConditionOp::GT, true, inst); + case Opcode::V_CMPX_LG_F32: + return V_CMP_F32(ConditionOp::LG, true, inst); + case Opcode::V_CMPX_GE_F32: + return V_CMP_F32(ConditionOp::GE, true, inst); + case Opcode::V_CMPX_NGE_F32: + return V_CMP_F32(ConditionOp::LT, true, inst); + case Opcode::V_CMPX_NLG_F32: + return V_CMP_F32(ConditionOp::EQ, true, inst); + case Opcode::V_CMPX_NGT_F32: + return V_CMP_F32(ConditionOp::LE, true, inst); + case Opcode::V_CMPX_NLE_F32: + return V_CMP_F32(ConditionOp::GT, true, inst); + case Opcode::V_CMPX_NEQ_F32: + return V_CMP_F32(ConditionOp::LG, true, inst); + case Opcode::V_CMPX_NLT_F32: + return V_CMP_F32(ConditionOp::GE, true, inst); + case Opcode::V_CMPX_TRU_F32: + return V_CMP_F32(ConditionOp::TRU, true, inst); + case Opcode::V_CMP_CLASS_F32: + return V_CMP_CLASS_F32(inst); + + case Opcode::V_CMP_LE_U32: + return V_CMP_U32(ConditionOp::LE, false, false, inst); + case Opcode::V_CMP_GT_I32: + return V_CMP_U32(ConditionOp::GT, true, false, inst); + case Opcode::V_CMP_LT_I32: + return V_CMP_U32(ConditionOp::LT, true, false, inst); + case Opcode::V_CMPX_LT_I32: + return V_CMP_U32(ConditionOp::LT, true, true, inst); + case Opcode::V_CMPX_F_U32: + return V_CMP_U32(ConditionOp::F, false, true, inst); + case Opcode::V_CMPX_LT_U32: + return V_CMP_U32(ConditionOp::LT, false, true, inst); + case Opcode::V_CMPX_EQ_U32: + return V_CMP_U32(ConditionOp::EQ, false, true, inst); + case Opcode::V_CMPX_LE_U32: + return V_CMP_U32(ConditionOp::LE, false, true, inst); + case Opcode::V_CMPX_GT_U32: + return V_CMP_U32(ConditionOp::GT, false, true, inst); + case Opcode::V_CMPX_NE_U32: + return V_CMP_U32(ConditionOp::LG, false, true, inst); + case Opcode::V_CMPX_GE_U32: + return V_CMP_U32(ConditionOp::GE, false, true, inst); + case Opcode::V_CMPX_TRU_U32: + return V_CMP_U32(ConditionOp::TRU, false, true, inst); + default: + info.translation_failed = true; + } +} + void Translator::V_MOV(const GcnInst& inst) { SetDst(inst.dst[0], GetSrc(inst.src[0])); } diff --git a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp index 55a2d624e..6fdf94e24 100644 --- a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp +++ b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp @@ -12,4 +12,15 @@ void Translator::V_INTERP_P2_F32(const GcnInst& inst) { ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan)); } +void Translator::EmitVectorInterpolation(const GcnInst& inst) { + switch (inst.opcode) { + case Opcode::V_INTERP_P1_F32: + return; + case Opcode::V_INTERP_P2_F32: + return V_INTERP_P2_F32(inst); + default: + info.translation_failed = true; + } +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index f4383c61d..bc6deab08 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -5,6 +5,93 @@ namespace Shader::Gcn { +void Translator::EmitVectorMemory(const GcnInst& inst) { + switch (inst.opcode) { + case Opcode::IMAGE_SAMPLE_LZ_O: + case Opcode::IMAGE_SAMPLE_O: + case Opcode::IMAGE_SAMPLE_C: + case Opcode::IMAGE_SAMPLE_C_LZ: + case Opcode::IMAGE_SAMPLE_LZ: + case Opcode::IMAGE_SAMPLE: + case Opcode::IMAGE_SAMPLE_L: + case Opcode::IMAGE_SAMPLE_C_O: + case Opcode::IMAGE_SAMPLE_B: + case Opcode::IMAGE_SAMPLE_C_LZ_O: + return IMAGE_SAMPLE(inst); + case Opcode::IMAGE_GATHER4_C: + case Opcode::IMAGE_GATHER4_LZ: + case Opcode::IMAGE_GATHER4_LZ_O: + return IMAGE_GATHER(inst); + case Opcode::IMAGE_ATOMIC_ADD: + return IMAGE_ATOMIC(AtomicOp::Add, inst); + case Opcode::IMAGE_ATOMIC_AND: + return IMAGE_ATOMIC(AtomicOp::And, inst); + case Opcode::IMAGE_ATOMIC_OR: + return IMAGE_ATOMIC(AtomicOp::Or, inst); + case Opcode::IMAGE_ATOMIC_XOR: + return IMAGE_ATOMIC(AtomicOp::Xor, inst); + case Opcode::IMAGE_ATOMIC_UMAX: + return IMAGE_ATOMIC(AtomicOp::Umax, inst); + case Opcode::IMAGE_ATOMIC_SMAX: + return IMAGE_ATOMIC(AtomicOp::Smax, inst); + case Opcode::IMAGE_ATOMIC_UMIN: + return IMAGE_ATOMIC(AtomicOp::Umin, inst); + case Opcode::IMAGE_ATOMIC_SMIN: + return IMAGE_ATOMIC(AtomicOp::Smin, inst); + case Opcode::IMAGE_ATOMIC_INC: + return IMAGE_ATOMIC(AtomicOp::Inc, inst); + case Opcode::IMAGE_ATOMIC_DEC: + return IMAGE_ATOMIC(AtomicOp::Dec, inst); + case Opcode::IMAGE_GET_LOD: + return IMAGE_GET_LOD(inst); + case Opcode::IMAGE_STORE: + return IMAGE_STORE(inst); + case Opcode::IMAGE_LOAD_MIP: + return IMAGE_LOAD(true, inst); + case Opcode::IMAGE_LOAD: + return IMAGE_LOAD(false, inst); + case Opcode::IMAGE_GET_RESINFO: + return IMAGE_GET_RESINFO(inst); + + case Opcode::TBUFFER_LOAD_FORMAT_X: + return BUFFER_LOAD_FORMAT(1, true, true, inst); + case Opcode::TBUFFER_LOAD_FORMAT_XY: + return BUFFER_LOAD_FORMAT(2, true, true, inst); + case Opcode::TBUFFER_LOAD_FORMAT_XYZ: + return BUFFER_LOAD_FORMAT(3, true, true, inst); + case Opcode::TBUFFER_LOAD_FORMAT_XYZW: + return BUFFER_LOAD_FORMAT(4, true, true, inst); + case Opcode::BUFFER_LOAD_FORMAT_X: + return BUFFER_LOAD_FORMAT(1, false, true, inst); + case Opcode::BUFFER_LOAD_FORMAT_XY: + return BUFFER_LOAD_FORMAT(2, false, true, inst); + case Opcode::BUFFER_LOAD_FORMAT_XYZ: + return BUFFER_LOAD_FORMAT(3, false, true, inst); + case Opcode::BUFFER_LOAD_FORMAT_XYZW: + return BUFFER_LOAD_FORMAT(4, false, true, inst); + case Opcode::BUFFER_LOAD_DWORD: + return BUFFER_LOAD_FORMAT(1, false, false, inst); + case Opcode::BUFFER_LOAD_DWORDX2: + return BUFFER_LOAD_FORMAT(2, false, false, inst); + case Opcode::BUFFER_LOAD_DWORDX3: + return BUFFER_LOAD_FORMAT(3, false, false, inst); + case Opcode::BUFFER_LOAD_DWORDX4: + return BUFFER_LOAD_FORMAT(4, false, false, inst); + case Opcode::BUFFER_STORE_FORMAT_X: + case Opcode::BUFFER_STORE_DWORD: + return BUFFER_STORE_FORMAT(1, false, inst); + case Opcode::BUFFER_STORE_DWORDX2: + return BUFFER_STORE_FORMAT(2, false, inst); + case Opcode::BUFFER_STORE_DWORDX3: + return BUFFER_STORE_FORMAT(3, false, inst); + case Opcode::BUFFER_STORE_FORMAT_XYZW: + case Opcode::BUFFER_STORE_DWORDX4: + return BUFFER_STORE_FORMAT(4, false, inst); + default: + info.translation_failed = true; + } +} + void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) { IR::VectorReg dst_reg{inst.dst[0].code}; const IR::ScalarReg tsharp_reg{inst.src[2].code};