From 43892c8679487a6e042e206b5ff4815389715200 Mon Sep 17 00:00:00 2001 From: martin Date: Wed, 4 Sep 2024 15:57:52 -0500 Subject: [PATCH] fix packing pack is for floats, we have uints here --- .../frontend/translate/translate.h | 2 +- .../frontend/translate/vector_alu.cpp | 25 ++++++++++--------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 77e30485b..ca78c1f5e 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -233,7 +233,7 @@ private: [[nodiscard]] T GetSrc64(const InstOperand& operand); void SetDst(const InstOperand& operand, const IR::U32F32& value); void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw); - IR::U16 Convert_F32_to_U16_Normalized(const IR::F32& src); + IR::U32 Convert_F32_to_U16_Normalized(const IR::F32& src); void LogMissingOpcode(const GcnInst& inst); private: diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 06fa4c37d..a32c6248c 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -344,25 +344,26 @@ void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32)); } -IR::U16 Translator::Convert_F32_to_U16_Normalized(const IR::F32& src) { - const IR::F32 as_float = ir.FPMul(src, ir.Imm32((f32)std::numeric_limits::max())); +IR::U32 Translator::Convert_F32_to_U16_Normalized(const IR::F32& src) { + const IR::F32 clamped = ir.FPClamp(src, ir.Imm32(0.0f), ir.Imm32(1.0f)); + const IR::F32 as_float = ir.FPMul(clamped, ir.Imm32((f32)std::numeric_limits::max())); const IR::U32 as_unsigned = ir.ConvertFToU(32, as_float); - return ir.UConvert(16, as_unsigned); + return as_unsigned; } void Translator::V_CVT_PKNORM_U16_F32(const GcnInst& inst) { const IR::VectorReg dst_reg{inst.dst[0].code}; - const IR::F32 src0 = GetSrc(inst.src[0]); - const IR::F32 src1 = GetSrc(inst.src[1]); + const IR::U32 src0 = Convert_F32_to_U16_Normalized(GetSrc(inst.src[0])); + const IR::U32 src1 = Convert_F32_to_U16_Normalized(GetSrc(inst.src[1])); - const IR::Value vec_u16 = - ir.CompositeConstruct( - Convert_F32_to_U16_Normalized(src0), - Convert_F32_to_U16_Normalized(src1) - ); - - ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_u16)); + ir.SetVectorReg( + dst_reg, + ir.BitwiseOr( + src0, + ir.ShiftLeftLogical(src1, ir.Imm32(16)) + ) + ); } void Translator::V_CVT_F32_F16(const GcnInst& inst) {