Floating point and fixes

This commit is contained in:
Lander Gallastegi 2025-04-06 01:25:21 +02:00 committed by Lander Gallastegi
parent 3d971701db
commit 258a0225c2
10 changed files with 1026 additions and 338 deletions

View File

@ -905,6 +905,7 @@ if (ARCHITECTURE STREQUAL "x86_64")
src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h
src/shader_recompiler/backend/asm_x64/emit_x64_shared_memory.cpp

View File

@ -16,32 +16,24 @@ void EmitBitCastU16F16(EmitContext& ctx, const Operands& dest, const Operands& s
}
void EmitBitCastU32F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
if (dest[0].isMEM() && src[0].isMEM()) {
Reg32 tmp = ctx.TempGPReg(false).getReg().cvt32();
ctx.Code().mov(tmp, src[0]);
ctx.Code().mov(dest[0], tmp);
} else if (src[0].isMEM()) {
ctx.Code().mov(dest[0], src[0]);
if (src[0].isMEM()) {
MovGP(ctx, dest[0], src[0]);
} else if (dest[0].isMEM()) {
ctx.Code().movd(dest[0].getAddress(), src[0].getReg().cvt128());
} else {
ctx.Code().movd(dword[rsp - 4], src[0].getReg().cvt128());
ctx.Code().mov(dest[0], dword[rsp - 4]);
MovGP(ctx, dest[0], dword[rsp - 4]);
}
}
void EmitBitCastU64F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
if (dest[0].isMEM() && src[0].isMEM()) {
Reg tmp = ctx.TempGPReg(false);
ctx.Code().mov(tmp, src[0]);
ctx.Code().mov(dest[0], tmp);
} else if (src[0].isMEM()) {
ctx.Code().mov(dest[0], src[0]);
if (src[0].isMEM()) {
MovGP(ctx, dest[0], src[0]);
} else if (dest[0].isMEM()) {
ctx.Code().movq(dest[0].getAddress(), src[0].getReg().cvt128());
} else {
ctx.Code().movq(qword[rsp - 8], src[0].getReg().cvt128());
ctx.Code().mov(dest[0], qword[rsp - 8]);
MovGP(ctx, dest[0], qword[rsp - 8]);
}
}
@ -50,31 +42,23 @@ void EmitBitCastF16U16(EmitContext& ctx, const Operands& dest, const Operands& s
}
void EmitBitCastF32U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
if (dest[0].isMEM() && src[0].isMEM()) {
Reg32 tmp = ctx.TempGPReg(false).getReg().cvt32();
ctx.Code().mov(tmp, src[0]);
ctx.Code().mov(dest[0], tmp);
} else if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], src[0]);
if (dest[0].isMEM()) {
MovGP(ctx, dest[0], src[0]);
} else if (src[0].isMEM()) {
ctx.Code().movd(dest[0].getReg().cvt128(), src[0].getAddress());
} else {
ctx.Code().mov(dword[rsp - 4], src[0]);
MovGP(ctx, dword[rsp - 4], src[0]);
ctx.Code().movd(dest[0].getReg().cvt128(), dword[rsp - 4]);
}
}
void EmitBitCastF64U64(EmitContext& ctx, const Operands& dest, const Operands& src) {
if (dest[0].isMEM() && src[0].isMEM()) {
Reg tmp = ctx.TempGPReg(false);
ctx.Code().mov(tmp, src[0]);
ctx.Code().mov(dest[0], tmp);
} else if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], src[0]);
if (dest[0].isMEM()) {
MovGP(ctx, dest[0], src[0]);
} else if (src[0].isMEM()) {
ctx.Code().movq(dest[0].getReg().cvt128(), src[0].getAddress());
} else {
ctx.Code().mov(qword[rsp - 8], src[0].getReg());
MovGP(ctx, qword[rsp - 8], src[0]);
ctx.Code().mov(dest[0].getReg().cvt128(), qword[rsp - 8]);
}
}
@ -82,35 +66,27 @@ void EmitBitCastF64U64(EmitContext& ctx, const Operands& dest, const Operands& s
void EmitPackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src) {
const bool is_mem = dest[0].isMEM() && (src[0].isMEM() || src[1].isMEM());
Reg tmp = is_mem ? ctx.TempGPReg(false) : dest[0].getReg();
ctx.Code().mov(tmp, src[0]);
MovGP(ctx, tmp, src[1]);
ctx.Code().shl(tmp, 32);
ctx.Code().or_(tmp, src[0]);
if (is_mem) {
ctx.Code().mov(dest[0], tmp);
}
MovGP(ctx, dest[0], tmp);
}
void EmitUnpackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg src0 = src[0].isMEM() ? ctx.TempGPReg() : src[0].getReg();
if (src[0].isMEM()) {
ctx.Code().mov(src0, src[0]);
}
MovGP(ctx, src0, src[0]);
Reg dest1 = dest[1].isMEM() ? ctx.TempGPReg(false) : dest[1].getReg().changeBit(64);
ctx.Code().mov(dest1, src0);
MovGP(ctx, dest1, src0);
ctx.Code().shr(dest1, 32);
if (dest[1].isMEM()) {
ctx.Code().mov(dest[1], dest1.cvt32());
}
ctx.Code().mov(dest[0], src0.cvt32());
MovGP(ctx, dest[1], dest1);
MovGP(ctx, dest[0], src0.cvt32());
}
void EmitPackFloat2x32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
MovFloat(ctx, tmp, src[0]);
ctx.Code().pinsrd(tmp, src[1], 1);
if (dest[0].isMEM()) {
ctx.Code().movss(dest[0].getAddress(), tmp);
}
MovFloat(ctx, dest[0], tmp);
}
void EmitPackUnorm2x16(EmitContext& ctx) {

View File

@ -19,7 +19,7 @@ void EmitGetUserData(EmitContext& ctx, const Operands& dest, IR::ScalarReg reg)
void EmitSetUserData(EmitContext& ctx, const Operands& offset, const Operands& value) {
Reg& tmp = ctx.TempGPReg();
ctx.Code().mov(tmp, offset[0]);
MovGP(ctx, tmp, offset[0]);
ctx.Code().shl(tmp, 2);
ctx.Code().lea(tmp, ptr[ctx.UserData() + tmp]);
MovGP(ctx, ptr[tmp], value[0]);
@ -59,7 +59,7 @@ void EmitGetGotoVariable(EmitContext&) {
void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset) {
Reg& tmp = ctx.TempGPReg(false);
ctx.Code().mov(tmp, base[1]);
MovGP(ctx, tmp, base[1]);
ctx.Code().shl(tmp, 32);
ctx.Code().or_(tmp, base[0]);
if (offset[0].isMEM()) {

View File

@ -1,7 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/exception.h"
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
@ -10,169 +9,27 @@ namespace Shader::Backend::X64 {
using namespace Xbyak;
using namespace Xbyak::util;
namespace {
static void EmitInlineF16ToF32(EmitContext& ctx, const Operand& dest, const Operand& src) {
CodeGenerator& c = ctx.Code();
Label nonzero_exp, zero_mantissa, norm_loop, norm_done, normal, done;
Reg sign = ctx.TempGPReg().cvt32();
Reg exponent = ctx.TempGPReg().cvt32();
Reg mantissa = ctx.TempGPReg().cvt32();
c.movzx(mantissa, src);
// Extract sign, exponent, and mantissa
c.mov(sign, mantissa);
c.and_(sign, 0x8000);
c.shl(sign, 16);
c.mov(exponent, mantissa);
c.and_(exponent, 0x7C00);
c.shr(exponent, 10);
c.and_(mantissa, 0x03FF);
// Check for zero exponent and mantissa
c.test(exponent, exponent);
c.jnz(nonzero_exp);
c.test(mantissa, mantissa);
c.jz(zero_mantissa);
// Nromalize subnormal number
c.mov(exponent, 1);
c.L(norm_loop);
c.test(mantissa, 0x400);
c.jnz(norm_done);
c.shl(mantissa, 1);
c.dec(exponent);
c.jmp(norm_loop);
c.L(norm_done);
c.and_(mantissa, 0x03FF);
c.jmp(normal);
// Zero mantissa
c.L(zero_mantissa);
c.and_(mantissa, sign);
c.jmp(done);
// Non-zero exponent
c.L(nonzero_exp);
c.cmp(exponent, 0x1F);
c.jne(normal);
// Infinite or NaN
c.shl(mantissa, 13);
c.or_(mantissa, sign);
c.or_(mantissa, 0x7F800000);
c.jmp(done);
// Normal number
c.L(normal);
c.add(exponent, 112);
c.shl(exponent, 23);
c.shl(mantissa, 13);
c.or_(mantissa, sign);
c.or_(mantissa, exponent);
c.L(done);
if (dest.isMEM()) {
c.mov(dest, mantissa);
} else {
c.movd(dest.getReg().cvt128(), mantissa);
}
}
static void EmitInlineF32ToF16(EmitContext& ctx, const Operand& dest, const Operand& src) {
CodeGenerator& c = ctx.Code();
Label zero_exp, underflow, overflow, done;
Reg sign = ctx.TempGPReg().cvt32();
Reg exponent = ctx.TempGPReg().cvt32();
Reg mantissa = dest.isMEM() ? ctx.TempGPReg().cvt32() : dest.getReg().cvt32();
if (src.isMEM()) {
c.mov(mantissa, src);
} else {
c.movd(mantissa, src.getReg().cvt128());
}
// Extract sign, exponent, and mantissa
c.mov(exponent, mantissa);
c.mov(sign, mantissa);
c.and_(exponent, 0x7F800000);
c.and_(mantissa, 0x007FFFFF);
c.shr(exponent, 23);
c.shl(mantissa, 3);
c.shr(sign, 16);
c.and_(sign, 0x8000);
// Subnormal numbers will be zero
c.test(exponent, exponent);
c.jz(zero_exp);
// Check for overflow and underflow
c.sub(exponent, 112);
c.cmp(exponent, 0);
c.jle(underflow);
c.cmp(exponent, 0x1F);
c.jge(overflow);
// Normal number
c.shl(exponent, 10);
c.shr(mantissa, 13);
c.or_(mantissa, exponent);
c.or_(mantissa, sign);
c.jmp(done);
// Undeflow
c.L(underflow);
c.xor_(mantissa, mantissa);
c.jmp(done);
// Overflow
c.L(overflow);
c.mov(mantissa, 0x7C00);
c.or_(mantissa, sign);
c.jmp(done);
// Zero value
c.L(zero_exp);
c.and_(mantissa, sign);
c.L(done);
if (dest.isMEM()) {
c.mov(dest, mantissa);
} else {
c.and_(mantissa, 0xFFFF);
}
}
}
void EmitConvertS16F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp_xmm = ctx.TempXmmReg(false);
Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32();
EmitInlineF16ToF32(ctx, tmp_xmm, src[0]);
ctx.Code().cvttss2si(tmp_reg, tmp_xmm);
ctx.Code().and_(tmp_reg, 0xFFFF);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp_reg.cvt16());
}
MovGP(ctx, dest[0], tmp_reg);
}
void EmitConvertS16F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32();
ctx.Code().cvttss2si(tmp, src[0]);
ctx.Code().and_(tmp, 0xFFFF);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp.cvt16());
}
MovGP(ctx, dest[0], tmp);
}
void EmitConvertS16F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32();
ctx.Code().cvttsd2si(tmp, src[0]);
ctx.Code().and_(tmp, 0xFFFF);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp.cvt16());
}
MovGP(ctx, dest[0], tmp);
}
void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
@ -180,25 +37,19 @@ void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& s
Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32();
EmitInlineF16ToF32(ctx, tmp_xmm, src[0]);
ctx.Code().cvttss2si(tmp_reg, tmp_xmm);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp_reg);
}
MovGP(ctx, dest[0], tmp_reg);
}
void EmitConvertS32F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32();
ctx.Code().cvttss2si(tmp, src[0]);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp);
}
MovGP(ctx, dest[0], tmp);
}
void EmitConvertS32F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32();
ctx.Code().cvttsd2si(tmp, src[0]);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp);
}
MovGP(ctx, dest[0], tmp);
}
void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
@ -206,25 +57,19 @@ void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& s
Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg() : dest[0].getReg();
EmitInlineF16ToF32(ctx, tmp_xmm, src[0]);
ctx.Code().cvttss2si(tmp_reg, tmp_xmm);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp_reg);
}
MovGP(ctx, dest[0], tmp_reg);
}
void EmitConvertS64F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg();
ctx.Code().cvttss2si(tmp, src[0]);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp);
}
MovGP(ctx, dest[0], tmp);
}
void EmitConvertS64F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg();
ctx.Code().cvttsd2si(tmp, src[0]);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp);
}
MovGP(ctx, dest[0], tmp);
}
void EmitConvertU16F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
@ -282,17 +127,13 @@ void EmitConvertF32F16(EmitContext& ctx, const Operands& dest, const Operands& s
void EmitConvertF32F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().cvtsd2ss(tmp, src[0]);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp);
}
MovFloat(ctx, dest[0], tmp);
}
void EmitConvertF64F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().cvtss2sd(tmp, src[0]);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp);
}
MovDouble(ctx, dest[0], tmp);
}
void EmitConvertF16S8(EmitContext& ctx, const Operands& dest, const Operands& src) {
@ -344,9 +185,7 @@ void EmitConvertF32S8(EmitContext& ctx, const Operands& dest, const Operands& sr
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().movsx(tmp_reg, src[0]);
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp_xmm);
}
MovFloat(ctx, dest[0], tmp_xmm);
}
void EmitConvertF32S16(EmitContext& ctx, const Operands& dest, const Operands& src) {
@ -354,25 +193,19 @@ void EmitConvertF32S16(EmitContext& ctx, const Operands& dest, const Operands& s
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().movsx(tmp_reg, src[0]);
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp_xmm);
}
MovFloat(ctx, dest[0], tmp_xmm);
}
void EmitConvertF32S32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().cvtsi2ss(tmp, src[0]);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp);
}
MovFloat(ctx, dest[0], tmp);
}
void EmitConvertF32S64(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().cvtsi2ss(tmp, src[0]);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp);
}
MovFloat(ctx, dest[0], tmp);
}
void EmitConvertF32U8(EmitContext& ctx, const Operands& dest, const Operands& src) {
@ -396,9 +229,7 @@ void EmitConvertF64S8(EmitContext& ctx, const Operands& dest, const Operands& sr
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().movsx(tmp_reg, src[0]);
ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp_xmm);
}
MovDouble(ctx, dest[0], tmp_xmm);
}
void EmitConvertF64S16(EmitContext& ctx, const Operands& dest, const Operands& src) {
@ -406,25 +237,19 @@ void EmitConvertF64S16(EmitContext& ctx, const Operands& dest, const Operands& s
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().movsx(tmp_reg, src[0]);
ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp_xmm);
}
MovDouble(ctx, dest[0], tmp_xmm);
}
void EmitConvertF64S32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().cvtsi2sd(tmp, src[0]);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp);
}
MovDouble(ctx, dest[0], tmp);
}
void EmitConvertF64S64(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().cvtsi2sd(tmp, src[0]);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp);
}
MovDouble(ctx, dest[0], tmp);
}
void EmitConvertF64U8(EmitContext& ctx, const Operands& dest, const Operands& src) {

View File

@ -0,0 +1,723 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/exception.h"
#include "shader_recompiler/backend/asm_x64/emit_x64_instructions.h"
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
namespace Shader::Backend::X64 {
using namespace Xbyak;
using namespace Xbyak::util;
void EmitFPAbs16(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt16() : dest[0].getReg().cvt16();
MovGP(ctx, tmp, src[0]);
ctx.Code().and_(tmp, 0x7FFF);
MovGP(ctx, dest[0], tmp);
}
void EmitFPAbs32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg reg_tmp = ctx.TempXmmReg(false);
Xmm xmm_tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().mov(reg_tmp, 0x7FFFFFFF);
ctx.Code().movd(xmm_tmp, reg_tmp);
ctx.Code().andps(xmm_tmp, src[0]);
MovFloat(ctx, dest[0], xmm_tmp);
}
void EmitFPAbs64(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg reg_tmp = ctx.TempGPReg(false);
Xmm xmm_tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().mov(reg_tmp, 0x7FFFFFFFFFFFFFFF);
ctx.Code().movq(xmm_tmp, reg_tmp);
ctx.Code().andpd(xmm_tmp, src[0]);
MovFloat(ctx, dest[0], xmm_tmp);
}
void EmitFPAdd16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp1, op1[0]);
EmitInlineF16ToF32(ctx, tmp2, op2[0]);
ctx.Code().addss(tmp1, tmp2);
EmitInlineF32ToF16(ctx, dest[0], tmp1);
}
void EmitFPAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
MovFloat(ctx, tmp, op1[0]);
ctx.Code().addss(tmp, op2[0]);
MovFloat(ctx, dest[0], tmp);
}
void EmitFPAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
MovDouble(ctx, tmp, op1[0]);
ctx.Code().addsd(tmp, op2[0]);
MovDouble(ctx, dest[0], tmp);
}
void EmitFPSub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
MovFloat(ctx, tmp, op1[0]);
ctx.Code().subss(tmp, op2[0]);
MovFloat(ctx, dest[0], tmp);
}
void EmitFPFma16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
Xmm tmp3 = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp1, op1[0]);
EmitInlineF16ToF32(ctx, tmp2, op2[0]);
EmitInlineF16ToF32(ctx, tmp3, op3[0]);
ctx.Code().vfmadd132ss(tmp3, tmp1, tmp2);
EmitInlineF32ToF16(ctx, dest[0], tmp3);
}
void EmitFPFma32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
Xmm tmp1 = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128();
Xmm tmp2 = op1[0].isMEM() ? ctx.TempXmmReg() : op1[0].getReg().cvt128();
Xmm tmp3 = op2[0].isMEM() ? ctx.TempXmmReg() : op2[0].getReg().cvt128();
MovFloat(ctx, tmp1, op3[0]);
MovFloat(ctx, tmp2, op1[0]);
MovFloat(ctx, tmp3, op2[0]);
ctx.Code().vfmadd132ss(tmp3, tmp1, tmp2);
MovFloat(ctx, dest[0], tmp3);
}
void EmitFPFma64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
Xmm tmp1 = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128();
Xmm tmp2 = op1[0].isMEM() ? ctx.TempXmmReg() : op1[0].getReg().cvt128();
Xmm tmp3 = op2[0].isMEM() ? ctx.TempXmmReg() : op2[0].getReg().cvt128();
MovDouble(ctx, tmp1, op3[0]);
MovDouble(ctx, tmp2, op1[0]);
MovDouble(ctx, tmp3, op2[0]);
ctx.Code().vfmadd132sd(tmp3, tmp1, tmp2);
MovDouble(ctx, dest[0], tmp3);
}
void EmitFPMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy) {
if (is_legacy) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
MovFloat(ctx, tmp1, op1[0]);
MovFloat(ctx, tmp2, op1[0]);
ctx.Code().maxss(tmp2, op2[0]);
ctx.Code().cmpunordss(tmp1, tmp1);
ctx.Code().andps(tmp1, op2[0]);
ctx.Code().orps(tmp2, tmp1);
MovFloat(ctx, dest[0], tmp2);
} else {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
MovFloat(ctx, tmp, op1[0]);
ctx.Code().maxss(tmp, op2[0]);
MovFloat(ctx, dest[0], tmp);
}
}
void EmitFPMax64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
MovDouble(ctx, tmp, op1[0]);
ctx.Code().maxsd(tmp, op2[0]);
MovDouble(ctx, dest[0], tmp);
}
void EmitFPMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy) {
if (is_legacy) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
MovFloat(ctx, tmp1, op1[0]);
MovFloat(ctx, tmp2, op1[0]);
ctx.Code().minss(tmp2, op2[0]);
ctx.Code().cmpunordss(tmp1, tmp1);
ctx.Code().andps(tmp1, op2[0]);
ctx.Code().orps(tmp2, tmp1);
MovFloat(ctx, dest[0], tmp2);
} else {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
MovFloat(ctx, tmp, op1[0]);
ctx.Code().minss(tmp, op2[0]);
MovFloat(ctx, dest[0], tmp);
}
}
void EmitFPMin64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
MovDouble(ctx, tmp, op1[0]);
ctx.Code().minsd(tmp, op2[0]);
MovDouble(ctx, dest[0], tmp);
}
void EmitFPMul16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp1, op1[0]);
EmitInlineF16ToF32(ctx, tmp2, op2[0]);
ctx.Code().mulss(tmp1, tmp2);
EmitInlineF32ToF16(ctx, dest[0], tmp1);
}
void EmitFPMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
MovFloat(ctx, tmp, op1[0]);
ctx.Code().mulss(tmp, op2[0]);
MovFloat(ctx, dest[0], tmp);
}
void EmitFPMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
MovDouble(ctx, tmp, op1[0]);
ctx.Code().mulsd(tmp, op2[0]);
MovDouble(ctx, dest[0], tmp);
}
void EmitFPDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
MovFloat(ctx, tmp, op1[0]);
ctx.Code().divss(tmp, op2[0]);
MovFloat(ctx, dest[0], tmp);
}
void EmitFPDiv64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
MovDouble(ctx, tmp, op1[0]);
ctx.Code().divsd(tmp, op2[0]);
MovDouble(ctx, dest[0], tmp);
}
void EmitFPNeg16(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt16() : dest[0].getReg().cvt16();
MovGP(ctx, tmp, op1[0]);
ctx.Code().xor_(tmp, 0x8000);
MovGP(ctx, dest[0], tmp);
}
void EmitFPNeg32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
Reg tmp_reg = ctx.TempGPReg(false).cvt32();
ctx.Code().mov(tmp_reg, 0x80000000);
ctx.Code().movd(tmp_xmm, tmp_reg);
ctx.Code().xorps(tmp_xmm, op1[0]);
MovFloat(ctx, dest[0], tmp_xmm);
}
void EmitFPNeg64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
Reg tmp_reg = ctx.TempXmmReg(false);
ctx.Code().mov(tmp_reg, 0x8000000000000000);
ctx.Code().movq(tmp_xmm, tmp_reg);
ctx.Code().xorpd(tmp_xmm, op1[0]);
MovDouble(ctx, dest[0], tmp_xmm);
}
void EmitFPSin(EmitContext& ctx) {
throw NotImplementedException("FPSin");
}
void EmitFPCos(EmitContext& ctx) {
throw NotImplementedException("FPCos");
}
void EmitFPExp2(EmitContext& ctx) {
throw NotImplementedException("FPExp2");
}
void EmitFPLdexp(EmitContext& ctx) {
throw NotImplementedException("FPLdexp");
}
void EmitFPLog2(EmitContext& ctx) {
throw NotImplementedException("FPLog2");
}
void EmitFPRecip32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().rcpss(tmp, op1[0]);
MovFloat(ctx, dest[0], tmp);
}
void EmitFPRecip64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
Reg tmp_reg = ctx.TempGPReg(false);
ctx.Code().mov(tmp_reg, 1);
ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg);
ctx.Code().divsd(tmp_xmm, op1[0]);
MovDouble(ctx, dest[0], tmp_xmm);
}
void EmitFPRecipSqrt32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().rsqrtss(tmp, op1[0]);
MovFloat(ctx, dest[0], tmp);
}
void EmitFPRecipSqrt64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
Reg tmp_reg = ctx.TempGPReg(false);
ctx.Code().mov(tmp_reg, 1);
ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg);
ctx.Code().divsd(tmp_xmm, op1[0]);
ctx.Code().sqrtsd(tmp_xmm, tmp_xmm);
MovDouble(ctx, dest[0], tmp_xmm);
}
void EmitFPSqrt(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().sqrtss(tmp, op1[0]);
MovFloat(ctx, dest[0], tmp);
}
void EmitFPSaturate16(EmitContext& ctx) {
throw NotImplementedException("FPSaturate16");
}
void EmitFPSaturate32(EmitContext& ctx) {
throw NotImplementedException("FPSaturate32");
}
void EmitFPSaturate64(EmitContext& ctx) {
throw NotImplementedException("FPSaturate64");
}
void EmitFPClamp16(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
Xmm tmp3 = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp1, op[0]);
EmitInlineF16ToF32(ctx, tmp2, min[0]);
EmitInlineF16ToF32(ctx, tmp3, max[0]);
ctx.Code().maxss(tmp1, tmp2);
ctx.Code().minss(tmp1, tmp3);
EmitInlineF32ToF16(ctx, dest[0], tmp1);
}
void EmitFPClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
MovFloat(ctx, tmp, op[0]);
ctx.Code().maxss(tmp, min[0]);
ctx.Code().minss(tmp, max[0]);
MovFloat(ctx, dest[0], tmp);
}
void EmitFPClamp64(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
MovDouble(ctx, tmp, op[0]);
ctx.Code().maxsd(tmp, min[0]);
ctx.Code().minsd(tmp, max[0]);
MovDouble(ctx, dest[0], tmp);
}
void EmitFPRoundEven16(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp, op1[0]);
ctx.Code().roundss(tmp, tmp, 0x00);
EmitInlineF32ToF16(ctx, dest[0], tmp);
}
void EmitFPRoundEven32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().roundss(tmp, op1[0], 0x00);
MovFloat(ctx, dest[0], tmp);
}
void EmitFPRoundEven64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().roundsd(tmp, op1[0], 0x00);
MovDouble(ctx, dest[0], tmp);
}
void EmitFPFloor16(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp, op1[0]);
ctx.Code().roundss(tmp, tmp, 0x01);
EmitInlineF32ToF16(ctx, dest[0], tmp);
}
void EmitFPFloor32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().roundss(tmp, op1[0], 0x01);
MovFloat(ctx, dest[0], tmp);
}
void EmitFPFloor64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().roundsd(tmp, op1[0], 0x01);
MovDouble(ctx, dest[0], tmp);
}
void EmitFPCeil16(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp, op1[0]);
ctx.Code().roundss(tmp, tmp, 0x02);
EmitInlineF32ToF16(ctx, dest[0], tmp);
}
void EmitFPCeil32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().roundss(tmp, op1[0], 0x02);
MovFloat(ctx, dest[0], tmp);
}
void EmitFPCeil64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().roundsd(tmp, op1[0], 0x02);
MovDouble(ctx, dest[0], tmp);
}
void EmitFPTrunc16(EmitContext& ctx) {
throw NotImplementedException("FPTrunc16");
}
void EmitFPTrunc32(EmitContext& ctx) {
throw NotImplementedException("FPTrunc32");
}
void EmitFPTrunc64(EmitContext& ctx) {
throw NotImplementedException("FPTrunc64");
}
void EmitFPFract32(EmitContext& ctx) {
throw NotImplementedException("FPFract32");
}
void EmitFPFract64(EmitContext& ctx) {
throw NotImplementedException("FPFract64");
}
void EmitFPFrexpSig32(EmitContext& ctx) {
throw NotImplementedException("FPFrexpSig32");
}
void EmitFPFrexpSig64(EmitContext& ctx) {
throw NotImplementedException("FPFrexpSig64");
}
void EmitFPFrexpExp32(EmitContext& ctx) {
throw NotImplementedException("FPFrexpExp32");
}
void EmitFPFrexpExp64(EmitContext& ctx) {
throw NotImplementedException("FPFrexpExp64");
}
void EmitFPOrdEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordEqual16(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0], 0);
ctx.Code().L(not_nan);
}
void EmitFPOrdEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordEqual32(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0], 0);
ctx.Code().L(not_nan);
}
void EmitFPOrdEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordEqual64(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0], 0);
ctx.Code().L(not_nan);
}
void EmitFPUnordEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp1, lhs[0]);
EmitInlineF16ToF32(ctx, tmp2, rhs[0]);
ctx.Code().ucomiss(tmp1, tmp2);
ctx.Code().sete(dest[0]);
}
void EmitFPUnordEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
MovFloat(ctx, tmp, lhs[0]);
ctx.Code().ucomiss(tmp, rhs[0]);
ctx.Code().sete(dest[0]);
}
void EmitFPUnordEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
MovDouble(ctx, tmp, lhs[0]);
ctx.Code().ucomisd(tmp, rhs[0]);
ctx.Code().sete(dest[0]);
}
void EmitFPOrdNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordNotEqual16(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0], 0);
ctx.Code().L(not_nan);
}
void EmitFPOrdNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0], 0);
ctx.Code().L(not_nan);
}
void EmitFPOrdNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordNotEqual64(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0], 0);
ctx.Code().L(not_nan);
}
void EmitFPUnordNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp1, lhs[0]);
EmitInlineF16ToF32(ctx, tmp2, rhs[0]);
ctx.Code().ucomiss(tmp1, tmp2);
ctx.Code().setne(dest[0]);
}
void EmitFPUnordNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
MovFloat(ctx, tmp, lhs[0]);
ctx.Code().ucomiss(tmp, rhs[0]);
ctx.Code().setne(dest[0]);
}
void EmitFPUnordNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
MovDouble(ctx, tmp, lhs[0]);
ctx.Code().ucomisd(tmp, rhs[0]);
ctx.Code().setne(dest[0]);
}
void EmitFPOrdLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordLessThan16(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0], 0);
ctx.Code().L(not_nan);
}
void EmitFPOrdLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordLessThan32(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0], 0);
ctx.Code().L(not_nan);
}
void EmitFPOrdLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordLessThan64(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0], 0);
ctx.Code().L(not_nan);
}
void EmitFPUnordLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp1, lhs[0]);
EmitInlineF16ToF32(ctx, tmp2, rhs[0]);
ctx.Code().ucomiss(tmp1, tmp2);
ctx.Code().setb(dest[0]);
}
void EmitFPUnordLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
MovFloat(ctx, tmp, lhs[0]);
ctx.Code().ucomiss(tmp, rhs[0]);
ctx.Code().setb(dest[0]);
}
void EmitFPUnordLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
MovDouble(ctx, tmp, lhs[0]);
ctx.Code().ucomisd(tmp, rhs[0]);
ctx.Code().setb(dest[0]);
}
void EmitFPOrdGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordGreaterThan16(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0], 0);
ctx.Code().L(not_nan);
}
void EmitFPOrdGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordGreaterThan32(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0], 0);
ctx.Code().L(not_nan);
}
void EmitFPOrdGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordGreaterThan64(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0], 0);
ctx.Code().L(not_nan);
}
void EmitFPUnordGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp1, lhs[0]);
EmitInlineF16ToF32(ctx, tmp2, rhs[0]);
ctx.Code().ucomiss(tmp1, tmp2);
ctx.Code().seta(dest[0]);
}
void EmitFPUnordGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
MovFloat(ctx, tmp, lhs[0]);
ctx.Code().ucomiss(tmp, rhs[0]);
ctx.Code().seta(dest[0]);
}
void EmitFPUnordGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
MovDouble(ctx, tmp, lhs[0]);
ctx.Code().ucomisd(tmp, rhs[0]);
ctx.Code().seta(dest[0]);
}
void EmitFPOrdLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordLessThanEqual16(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0], 0);
ctx.Code().L(not_nan);
}
void EmitFPOrdLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordLessThanEqual32(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0], 0);
ctx.Code().L(not_nan);
}
void EmitFPOrdLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordLessThanEqual64(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0], 0);
ctx.Code().L(not_nan);
}
void EmitFPUnordLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp1, lhs[0]);
EmitInlineF16ToF32(ctx, tmp2, rhs[0]);
ctx.Code().ucomiss(tmp1, tmp2);
ctx.Code().setbe(dest[0]);
}
void EmitFPUnordLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
MovFloat(ctx, tmp, lhs[0]);
ctx.Code().ucomiss(tmp, rhs[0]);
ctx.Code().setbe(dest[0]);
}
void EmitFPUnordLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
MovDouble(ctx, tmp, lhs[0]);
ctx.Code().ucomisd(tmp, rhs[0]);
ctx.Code().setbe(dest[0]);
}
void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordGreaterThanEqual16(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0], 0);
ctx.Code().L(not_nan);
ctx.Code().vfpclassss(tmp1, tmp2);
}
void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordGreaterThanEqual32(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0], 0);
ctx.Code().L(not_nan);
}
void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Label not_nan;
EmitFPUnordGreaterThanEqual64(ctx, dest, lhs, rhs);
ctx.Code().jnp(not_nan);
ctx.Code().mov(dest[0], 0);
ctx.Code().L(not_nan);
}
void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp1 = ctx.TempXmmReg();
Xmm tmp2 = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp1, lhs[0]);
EmitInlineF16ToF32(ctx, tmp2, rhs[0]);
ctx.Code().ucomiss(tmp1, tmp2);
ctx.Code().setae(dest[0]);
}
void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
MovFloat(ctx, tmp, lhs[0]);
ctx.Code().ucomiss(tmp, rhs[0]);
ctx.Code().setae(dest[0]);
}
void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
MovDouble(ctx, tmp, lhs[0]);
ctx.Code().ucomisd(tmp, rhs[0]);
ctx.Code().setae(dest[0]);
}
void EmitFPIsNan16(EmitContext& ctx, const Operands& dest, const Operands& op) {
Xmm tmp = ctx.TempXmmReg();
EmitInlineF16ToF32(ctx, tmp, op[0]);
ctx.Code().ucomiss(tmp, tmp);
ctx.Code().setp(dest[0]);
}
void EmitFPIsNan32(EmitContext& ctx, const Operands& dest, const Operands& op) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
MovFloat(ctx, tmp, op[0]);
ctx.Code().ucomiss(tmp, tmp);
ctx.Code().setp(dest[0]);
}
void EmitFPIsNan64(EmitContext& ctx, const Operands& dest, const Operands& op) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
MovDouble(ctx, tmp, op[0]);
ctx.Code().ucomisd(tmp, tmp);
ctx.Code().setp(dest[0]);
}
void EmitFPIsInf32(EmitContext& ctx) {
throw NotImplementedException("FPIsInf32");
}
void EmitFPIsInf64(EmitContext& ctx) {
throw NotImplementedException("FPIsInf64");
}
void EmitFPCmpClass32(EmitContext&) {
UNREACHABLE();
}
}

View File

@ -157,7 +157,7 @@ void EmitCompositeInsertF16x4(EmitContext& ctx, const Operands& dest, const Oper
void EmitCompositeShuffleF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2);
void EmitCompositeShuffleF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3);
void EmitCompositeShuffleF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4);
void EmitCompositeConstructF32x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2);
void EmitCompositeConstructF32x2(EmitContext& ctx, const Operands& dest, const OpEmitFPAbs16erands& src1, const Operands& src2);
void EmitCompositeConstructF32x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3);
void EmitCompositeConstructF32x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4);
void EmitCompositeConstructF32x2x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2);
@ -227,103 +227,103 @@ void EmitPackUint2_10_10_10(EmitContext& ctx);
void EmitUnpackUint2_10_10_10(EmitContext& ctx);
void EmitPackSint2_10_10_10(EmitContext& ctx);
void EmitUnpackSint2_10_10_10(EmitContext& ctx);
Id EmitFPAbs16(EmitContext& ctx, Id value);
Id EmitFPAbs32(EmitContext& ctx, Id value);
Id EmitFPAbs64(EmitContext& ctx, Id value);
Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPSub32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
Id EmitFPMax32(EmitContext& ctx, Id a, Id b, bool is_legacy = false);
Id EmitFPMax64(EmitContext& ctx, Id a, Id b);
Id EmitFPMin32(EmitContext& ctx, Id a, Id b, bool is_legacy = false);
Id EmitFPMin64(EmitContext& ctx, Id a, Id b);
Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPNeg16(EmitContext& ctx, Id value);
Id EmitFPNeg32(EmitContext& ctx, Id value);
Id EmitFPNeg64(EmitContext& ctx, Id value);
Id EmitFPSin(EmitContext& ctx, Id value);
Id EmitFPCos(EmitContext& ctx, Id value);
Id EmitFPExp2(EmitContext& ctx, Id value);
Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp);
Id EmitFPLog2(EmitContext& ctx, Id value);
Id EmitFPRecip32(EmitContext& ctx, Id value);
Id EmitFPRecip64(EmitContext& ctx, Id value);
Id EmitFPRecipSqrt32(EmitContext& ctx, Id value);
Id EmitFPRecipSqrt64(EmitContext& ctx, Id value);
Id EmitFPSqrt(EmitContext& ctx, Id value);
Id EmitFPSaturate16(EmitContext& ctx, Id value);
Id EmitFPSaturate32(EmitContext& ctx, Id value);
Id EmitFPSaturate64(EmitContext& ctx, Id value);
Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value);
Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value);
Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value);
Id EmitFPRoundEven16(EmitContext& ctx, Id value);
Id EmitFPRoundEven32(EmitContext& ctx, Id value);
Id EmitFPRoundEven64(EmitContext& ctx, Id value);
Id EmitFPFloor16(EmitContext& ctx, Id value);
Id EmitFPFloor32(EmitContext& ctx, Id value);
Id EmitFPFloor64(EmitContext& ctx, Id value);
Id EmitFPCeil16(EmitContext& ctx, Id value);
Id EmitFPCeil32(EmitContext& ctx, Id value);
Id EmitFPCeil64(EmitContext& ctx, Id value);
Id EmitFPTrunc16(EmitContext& ctx, Id value);
Id EmitFPTrunc32(EmitContext& ctx, Id value);
Id EmitFPTrunc64(EmitContext& ctx, Id value);
Id EmitFPFract32(EmitContext& ctx, Id value);
Id EmitFPFract64(EmitContext& ctx, Id value);
Id EmitFPFrexpSig32(EmitContext& ctx, Id value);
Id EmitFPFrexpSig64(EmitContext& ctx, Id value);
Id EmitFPFrexpExp32(EmitContext& ctx, Id value);
Id EmitFPFrexpExp64(EmitContext& ctx, Id value);
Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPIsNan16(EmitContext& ctx, Id value);
Id EmitFPIsNan32(EmitContext& ctx, Id value);
Id EmitFPIsNan64(EmitContext& ctx, Id value);
Id EmitFPIsInf32(EmitContext& ctx, Id value);
Id EmitFPIsInf64(EmitContext& ctx, Id value);
void EmitFPAbs16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPAbs32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPAbs64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPAdd16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitFPAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitFPAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitFPSub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitFPFma16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
void EmitFPFma32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
void EmitFPFma64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
void EmitFPMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy = false);
void EmitFPMax64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitFPMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy = false);
void EmitFPMin64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitFPMul16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitFPMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitFPMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitFPDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitFPDiv64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
void EmitFPNeg16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPNeg32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPNeg64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPSin(EmitContext& ctx);
void EmitFPCos(EmitContext& ctx);
void EmitFPExp2(EmitContext& ctx);
void EmitFPLdexp(EmitContext& ctx);
void EmitFPLog2(EmitContext& ctx);
void EmitFPRecip32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPRecip64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPRecipSqrt32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPRecipSqrt64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPSqrt(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPSaturate16(EmitContext& ctx);
void EmitFPSaturate32(EmitContext& ctx);
void EmitFPSaturate64(EmitContext& ctx);
void EmitFPClamp16(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max);
void EmitFPClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max);
void EmitFPClamp64(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max);
void EmitFPRoundEven16(EmitContext& ctx, const Operands& dest, const Operands& op1);
void EmitFPRoundEven32(EmitContext& ctx, const Operands& dest, const Operands& op1);
void EmitFPRoundEven64(EmitContext& ctx, const Operands& dest, const Operands& op1);
void EmitFPFloor16(EmitContext& ctx, const Operands& dest, const Operands& op1);
void EmitFPFloor32(EmitContext& ctx, const Operands& dest, const Operands& op1);
void EmitFPFloor64(EmitContext& ctx, const Operands& dest, const Operands& op1);
void EmitFPCeil16(EmitContext& ctx, const Operands& dest, const Operands& op1);
void EmitFPCeil32(EmitContext& ctx, const Operands& dest, const Operands& op1);
void EmitFPCeil64(EmitContext& ctx, const Operands& dest, const Operands& op1);
void EmitFPTrunc16(EmitContext& ctx);
void EmitFPTrunc32(EmitContext& ctx);
void EmitFPTrunc64(EmitContext& ctx);
void EmitFPFract32(EmitContext& ctx);
void EmitFPFract64(EmitContext& ctx);
void EmitFPFrexpSig32(EmitContext& ctx);
void EmitFPFrexpSig64(EmitContext& ctx);
void EmitFPFrexpExp32(EmitContext& ctx);
void EmitFPFrexpExp64(EmitContext& ctx);
void EmitFPOrdEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
void EmitFPIsNan16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPIsNan32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPIsNan64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitFPIsInf32(EmitContext& ctx);
void EmitFPIsInf64(EmitContext& ctx);
Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitIAdd64(EmitContext& ctx, Id a, Id b);
Id EmitIAddCary32(EmitContext& ctx, Id a, Id b);

View File

@ -48,6 +48,16 @@ Xmm& EmitContext::TempXmmReg(bool reserve) {
return reg;
}
void EmitContext::PopTempGPReg() {
ASSERT(temp_gp_reg_index > 0);
temp_gp_reg_index--;
}
void EmitContext::PopTempXmmReg() {
ASSERT(temp_xmm_reg_index > 0);
temp_xmm_reg_index--;
}
const Operands& EmitContext::Def(IR::Inst* inst) {
return inst_to_operands.at(inst);
}

View File

@ -40,7 +40,9 @@ public:
[[nodiscard]] Xbyak::Reg64& TempGPReg(bool reserve = true);
[[nodiscard]] Xbyak::Xmm& TempXmmReg(bool reserve = true);
void PopTempGPReg();
void PopTempXmmReg();
[[nodiscard]] const Xbyak::Reg64& UserData() const {return Xbyak::util::r11;}
[[nodiscard]] const Operands& Def(IR::Inst* inst);

View File

@ -159,6 +159,9 @@ Reg ResizeRegToType(const Reg& reg, IR::Type type) {
void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) {
CodeGenerator& c = ctx.Code();
if (src == dst) {
return;
}
if (src.isMEM() && dst.isMEM()) {
Reg tmp = ctx.TempGPReg(false).cvt32();
c.mov(tmp, src);
@ -176,6 +179,9 @@ void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand&
void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) {
CodeGenerator& c = ctx.Code();
if (src == dst) {
return;
}
if (src.isMEM() && dst.isMEM()) {
const Reg64& tmp = ctx.TempGPReg(false);
c.mov(tmp, src);
@ -193,6 +199,9 @@ void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand
void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) {
CodeGenerator& c = ctx.Code();
if (src == dst) {
return;
}
Reg tmp = (src.isMEM() && dst.isMEM()) ? ctx.TempGPReg(false).changeBit(dst.getBit()) : dst.getReg();
if (src.getBit() == dst.getBit()) {
c.mov(tmp, src);
@ -288,4 +297,144 @@ void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src) {
}
}
void EmitInlineF16ToF32(EmitContext& ctx, const Operand& dest, const Operand& src) {
CodeGenerator& c = ctx.Code();
Label nonzero_exp, zero_mantissa, norm_loop, norm_done, normal, done;
Reg sign = ctx.TempGPReg().cvt32();
Reg exponent = ctx.TempGPReg().cvt32();
Reg mantissa = ctx.TempGPReg().cvt32();
c.movzx(mantissa, src);
// Extract sign, exponent, and mantissa
c.mov(sign, mantissa);
c.and_(sign, 0x8000);
c.shl(sign, 16);
c.mov(exponent, mantissa);
c.and_(exponent, 0x7C00);
c.shr(exponent, 10);
c.and_(mantissa, 0x03FF);
// Check for zero exponent and mantissa
c.test(exponent, exponent);
c.jnz(nonzero_exp);
c.test(mantissa, mantissa);
c.jz(zero_mantissa);
// Nromalize subnormal number
c.mov(exponent, 1);
c.L(norm_loop);
c.test(mantissa, 0x400);
c.jnz(norm_done);
c.shl(mantissa, 1);
c.dec(exponent);
c.jmp(norm_loop);
c.L(norm_done);
c.and_(mantissa, 0x03FF);
c.jmp(normal);
// Zero mantissa
c.L(zero_mantissa);
c.and_(mantissa, sign);
c.jmp(done);
// Non-zero exponent
c.L(nonzero_exp);
c.cmp(exponent, 0x1F);
c.jne(normal);
// Infinite or NaN
c.shl(mantissa, 13);
c.or_(mantissa, sign);
c.or_(mantissa, 0x7F800000);
c.jmp(done);
// Normal number
c.L(normal);
c.add(exponent, 112);
c.shl(exponent, 23);
c.shl(mantissa, 13);
c.or_(mantissa, sign);
c.or_(mantissa, exponent);
c.L(done);
if (dest.isMEM()) {
c.mov(dest, mantissa);
} else {
c.movd(dest.getReg().cvt128(), mantissa);
}
ctx.PopTempGPReg();
ctx.PopTempGPReg();
ctx.PopTempGPReg();
}
void EmitInlineF32ToF16(EmitContext& ctx, const Operand& dest, const Operand& src) {
CodeGenerator& c = ctx.Code();
Label zero_exp, underflow, overflow, done;
Reg sign = ctx.TempGPReg().cvt32();
Reg exponent = ctx.TempGPReg().cvt32();
Reg mantissa = dest.isMEM() ? ctx.TempGPReg().cvt32() : dest.getReg().cvt32();
if (src.isMEM()) {
c.mov(mantissa, src);
} else {
c.movd(mantissa, src.getReg().cvt128());
}
// Extract sign, exponent, and mantissa
c.mov(exponent, mantissa);
c.mov(sign, mantissa);
c.and_(exponent, 0x7F800000);
c.and_(mantissa, 0x007FFFFF);
c.shr(exponent, 23);
c.shl(mantissa, 3);
c.shr(sign, 16);
c.and_(sign, 0x8000);
// Subnormal numbers will be zero
c.test(exponent, exponent);
c.jz(zero_exp);
// Check for overflow and underflow
c.sub(exponent, 112);
c.cmp(exponent, 0);
c.jle(underflow);
c.cmp(exponent, 0x1F);
c.jge(overflow);
// Normal number
c.shl(exponent, 10);
c.shr(mantissa, 13);
c.or_(mantissa, exponent);
c.or_(mantissa, sign);
c.jmp(done);
// Undeflow
c.L(underflow);
c.xor_(mantissa, mantissa);
c.jmp(done);
// Overflow
c.L(overflow);
c.mov(mantissa, 0x7C00);
c.or_(mantissa, sign);
c.jmp(done);
// Zero value
c.L(zero_exp);
c.and_(mantissa, sign);
c.L(done);
if (dest.isMEM()) {
c.mov(dest, mantissa);
} else {
c.and_(mantissa, 0xFFFF);
}
ctx.PopTempGPReg();
ctx.PopTempGPReg();
ctx.PopTempGPReg();
}
} // namespace Shader::Backend::X64

View File

@ -19,5 +19,7 @@ void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand&
void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src);
void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src);
void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src);
void EmitInlineF16ToF32(EmitContext& ctx, const Xbyak::Operand& dest, const Xbyak::Operand& src);
void EmitInlineF32ToF16(EmitContext& ctx, const Xbyak::Operand& dest, const Xbyak::Operand& src);
} // namespace Shader::Backend::X64