mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-01 15:02:40 +00:00
Floating point and fixes
This commit is contained in:
parent
3d971701db
commit
258a0225c2
@ -905,6 +905,7 @@ if (ARCHITECTURE STREQUAL "x86_64")
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_shared_memory.cpp
|
||||
|
@ -16,32 +16,24 @@ void EmitBitCastU16F16(EmitContext& ctx, const Operands& dest, const Operands& s
|
||||
}
|
||||
|
||||
void EmitBitCastU32F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
if (dest[0].isMEM() && src[0].isMEM()) {
|
||||
Reg32 tmp = ctx.TempGPReg(false).getReg().cvt32();
|
||||
ctx.Code().mov(tmp, src[0]);
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
} else if (src[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], src[0]);
|
||||
if (src[0].isMEM()) {
|
||||
MovGP(ctx, dest[0], src[0]);
|
||||
} else if (dest[0].isMEM()) {
|
||||
ctx.Code().movd(dest[0].getAddress(), src[0].getReg().cvt128());
|
||||
} else {
|
||||
ctx.Code().movd(dword[rsp - 4], src[0].getReg().cvt128());
|
||||
ctx.Code().mov(dest[0], dword[rsp - 4]);
|
||||
MovGP(ctx, dest[0], dword[rsp - 4]);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitBitCastU64F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
if (dest[0].isMEM() && src[0].isMEM()) {
|
||||
Reg tmp = ctx.TempGPReg(false);
|
||||
ctx.Code().mov(tmp, src[0]);
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
} else if (src[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], src[0]);
|
||||
if (src[0].isMEM()) {
|
||||
MovGP(ctx, dest[0], src[0]);
|
||||
} else if (dest[0].isMEM()) {
|
||||
ctx.Code().movq(dest[0].getAddress(), src[0].getReg().cvt128());
|
||||
} else {
|
||||
ctx.Code().movq(qword[rsp - 8], src[0].getReg().cvt128());
|
||||
ctx.Code().mov(dest[0], qword[rsp - 8]);
|
||||
MovGP(ctx, dest[0], qword[rsp - 8]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -50,31 +42,23 @@ void EmitBitCastF16U16(EmitContext& ctx, const Operands& dest, const Operands& s
|
||||
}
|
||||
|
||||
void EmitBitCastF32U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
if (dest[0].isMEM() && src[0].isMEM()) {
|
||||
Reg32 tmp = ctx.TempGPReg(false).getReg().cvt32();
|
||||
ctx.Code().mov(tmp, src[0]);
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
} else if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], src[0]);
|
||||
if (dest[0].isMEM()) {
|
||||
MovGP(ctx, dest[0], src[0]);
|
||||
} else if (src[0].isMEM()) {
|
||||
ctx.Code().movd(dest[0].getReg().cvt128(), src[0].getAddress());
|
||||
} else {
|
||||
ctx.Code().mov(dword[rsp - 4], src[0]);
|
||||
MovGP(ctx, dword[rsp - 4], src[0]);
|
||||
ctx.Code().movd(dest[0].getReg().cvt128(), dword[rsp - 4]);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitBitCastF64U64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
if (dest[0].isMEM() && src[0].isMEM()) {
|
||||
Reg tmp = ctx.TempGPReg(false);
|
||||
ctx.Code().mov(tmp, src[0]);
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
} else if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], src[0]);
|
||||
if (dest[0].isMEM()) {
|
||||
MovGP(ctx, dest[0], src[0]);
|
||||
} else if (src[0].isMEM()) {
|
||||
ctx.Code().movq(dest[0].getReg().cvt128(), src[0].getAddress());
|
||||
} else {
|
||||
ctx.Code().mov(qword[rsp - 8], src[0].getReg());
|
||||
MovGP(ctx, qword[rsp - 8], src[0]);
|
||||
ctx.Code().mov(dest[0].getReg().cvt128(), qword[rsp - 8]);
|
||||
}
|
||||
}
|
||||
@ -82,35 +66,27 @@ void EmitBitCastF64U64(EmitContext& ctx, const Operands& dest, const Operands& s
|
||||
void EmitPackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
const bool is_mem = dest[0].isMEM() && (src[0].isMEM() || src[1].isMEM());
|
||||
Reg tmp = is_mem ? ctx.TempGPReg(false) : dest[0].getReg();
|
||||
ctx.Code().mov(tmp, src[0]);
|
||||
MovGP(ctx, tmp, src[1]);
|
||||
ctx.Code().shl(tmp, 32);
|
||||
ctx.Code().or_(tmp, src[0]);
|
||||
if (is_mem) {
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
}
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitUnpackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg src0 = src[0].isMEM() ? ctx.TempGPReg() : src[0].getReg();
|
||||
if (src[0].isMEM()) {
|
||||
ctx.Code().mov(src0, src[0]);
|
||||
}
|
||||
MovGP(ctx, src0, src[0]);
|
||||
Reg dest1 = dest[1].isMEM() ? ctx.TempGPReg(false) : dest[1].getReg().changeBit(64);
|
||||
ctx.Code().mov(dest1, src0);
|
||||
MovGP(ctx, dest1, src0);
|
||||
ctx.Code().shr(dest1, 32);
|
||||
if (dest[1].isMEM()) {
|
||||
ctx.Code().mov(dest[1], dest1.cvt32());
|
||||
}
|
||||
ctx.Code().mov(dest[0], src0.cvt32());
|
||||
MovGP(ctx, dest[1], dest1);
|
||||
MovGP(ctx, dest[0], src0.cvt32());
|
||||
}
|
||||
|
||||
void EmitPackFloat2x32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
MovFloat(ctx, tmp, src[0]);
|
||||
ctx.Code().pinsrd(tmp, src[1], 1);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().movss(dest[0].getAddress(), tmp);
|
||||
}
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitPackUnorm2x16(EmitContext& ctx) {
|
||||
|
@ -19,7 +19,7 @@ void EmitGetUserData(EmitContext& ctx, const Operands& dest, IR::ScalarReg reg)
|
||||
|
||||
void EmitSetUserData(EmitContext& ctx, const Operands& offset, const Operands& value) {
|
||||
Reg& tmp = ctx.TempGPReg();
|
||||
ctx.Code().mov(tmp, offset[0]);
|
||||
MovGP(ctx, tmp, offset[0]);
|
||||
ctx.Code().shl(tmp, 2);
|
||||
ctx.Code().lea(tmp, ptr[ctx.UserData() + tmp]);
|
||||
MovGP(ctx, ptr[tmp], value[0]);
|
||||
@ -59,7 +59,7 @@ void EmitGetGotoVariable(EmitContext&) {
|
||||
|
||||
void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset) {
|
||||
Reg& tmp = ctx.TempGPReg(false);
|
||||
ctx.Code().mov(tmp, base[1]);
|
||||
MovGP(ctx, tmp, base[1]);
|
||||
ctx.Code().shl(tmp, 32);
|
||||
ctx.Code().or_(tmp, base[0]);
|
||||
if (offset[0].isMEM()) {
|
||||
|
@ -1,7 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
|
||||
|
||||
@ -10,169 +9,27 @@ namespace Shader::Backend::X64 {
|
||||
using namespace Xbyak;
|
||||
using namespace Xbyak::util;
|
||||
|
||||
namespace {
|
||||
|
||||
static void EmitInlineF16ToF32(EmitContext& ctx, const Operand& dest, const Operand& src) {
|
||||
CodeGenerator& c = ctx.Code();
|
||||
Label nonzero_exp, zero_mantissa, norm_loop, norm_done, normal, done;
|
||||
Reg sign = ctx.TempGPReg().cvt32();
|
||||
Reg exponent = ctx.TempGPReg().cvt32();
|
||||
Reg mantissa = ctx.TempGPReg().cvt32();
|
||||
|
||||
c.movzx(mantissa, src);
|
||||
|
||||
// Extract sign, exponent, and mantissa
|
||||
c.mov(sign, mantissa);
|
||||
c.and_(sign, 0x8000);
|
||||
c.shl(sign, 16);
|
||||
c.mov(exponent, mantissa);
|
||||
c.and_(exponent, 0x7C00);
|
||||
c.shr(exponent, 10);
|
||||
c.and_(mantissa, 0x03FF);
|
||||
|
||||
// Check for zero exponent and mantissa
|
||||
c.test(exponent, exponent);
|
||||
c.jnz(nonzero_exp);
|
||||
c.test(mantissa, mantissa);
|
||||
c.jz(zero_mantissa);
|
||||
|
||||
// Nromalize subnormal number
|
||||
c.mov(exponent, 1);
|
||||
c.L(norm_loop);
|
||||
c.test(mantissa, 0x400);
|
||||
c.jnz(norm_done);
|
||||
c.shl(mantissa, 1);
|
||||
c.dec(exponent);
|
||||
c.jmp(norm_loop);
|
||||
c.L(norm_done);
|
||||
c.and_(mantissa, 0x03FF);
|
||||
c.jmp(normal);
|
||||
|
||||
// Zero mantissa
|
||||
c.L(zero_mantissa);
|
||||
c.and_(mantissa, sign);
|
||||
c.jmp(done);
|
||||
|
||||
// Non-zero exponent
|
||||
c.L(nonzero_exp);
|
||||
c.cmp(exponent, 0x1F);
|
||||
c.jne(normal);
|
||||
|
||||
// Infinite or NaN
|
||||
c.shl(mantissa, 13);
|
||||
c.or_(mantissa, sign);
|
||||
c.or_(mantissa, 0x7F800000);
|
||||
c.jmp(done);
|
||||
|
||||
// Normal number
|
||||
c.L(normal);
|
||||
c.add(exponent, 112);
|
||||
c.shl(exponent, 23);
|
||||
c.shl(mantissa, 13);
|
||||
c.or_(mantissa, sign);
|
||||
c.or_(mantissa, exponent);
|
||||
|
||||
c.L(done);
|
||||
if (dest.isMEM()) {
|
||||
c.mov(dest, mantissa);
|
||||
} else {
|
||||
c.movd(dest.getReg().cvt128(), mantissa);
|
||||
}
|
||||
}
|
||||
|
||||
static void EmitInlineF32ToF16(EmitContext& ctx, const Operand& dest, const Operand& src) {
|
||||
CodeGenerator& c = ctx.Code();
|
||||
Label zero_exp, underflow, overflow, done;
|
||||
Reg sign = ctx.TempGPReg().cvt32();
|
||||
Reg exponent = ctx.TempGPReg().cvt32();
|
||||
Reg mantissa = dest.isMEM() ? ctx.TempGPReg().cvt32() : dest.getReg().cvt32();
|
||||
|
||||
if (src.isMEM()) {
|
||||
c.mov(mantissa, src);
|
||||
} else {
|
||||
c.movd(mantissa, src.getReg().cvt128());
|
||||
}
|
||||
|
||||
// Extract sign, exponent, and mantissa
|
||||
c.mov(exponent, mantissa);
|
||||
c.mov(sign, mantissa);
|
||||
c.and_(exponent, 0x7F800000);
|
||||
c.and_(mantissa, 0x007FFFFF);
|
||||
c.shr(exponent, 23);
|
||||
c.shl(mantissa, 3);
|
||||
c.shr(sign, 16);
|
||||
c.and_(sign, 0x8000);
|
||||
|
||||
// Subnormal numbers will be zero
|
||||
c.test(exponent, exponent);
|
||||
c.jz(zero_exp);
|
||||
|
||||
// Check for overflow and underflow
|
||||
c.sub(exponent, 112);
|
||||
c.cmp(exponent, 0);
|
||||
c.jle(underflow);
|
||||
c.cmp(exponent, 0x1F);
|
||||
c.jge(overflow);
|
||||
|
||||
// Normal number
|
||||
c.shl(exponent, 10);
|
||||
c.shr(mantissa, 13);
|
||||
c.or_(mantissa, exponent);
|
||||
c.or_(mantissa, sign);
|
||||
c.jmp(done);
|
||||
|
||||
// Undeflow
|
||||
c.L(underflow);
|
||||
c.xor_(mantissa, mantissa);
|
||||
c.jmp(done);
|
||||
|
||||
// Overflow
|
||||
c.L(overflow);
|
||||
c.mov(mantissa, 0x7C00);
|
||||
c.or_(mantissa, sign);
|
||||
c.jmp(done);
|
||||
|
||||
// Zero value
|
||||
c.L(zero_exp);
|
||||
c.and_(mantissa, sign);
|
||||
|
||||
c.L(done);
|
||||
if (dest.isMEM()) {
|
||||
c.mov(dest, mantissa);
|
||||
} else {
|
||||
c.and_(mantissa, 0xFFFF);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void EmitConvertS16F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp_xmm = ctx.TempXmmReg(false);
|
||||
Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32();
|
||||
EmitInlineF16ToF32(ctx, tmp_xmm, src[0]);
|
||||
ctx.Code().cvttss2si(tmp_reg, tmp_xmm);
|
||||
ctx.Code().and_(tmp_reg, 0xFFFF);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp_reg.cvt16());
|
||||
}
|
||||
MovGP(ctx, dest[0], tmp_reg);
|
||||
}
|
||||
|
||||
void EmitConvertS16F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32();
|
||||
ctx.Code().cvttss2si(tmp, src[0]);
|
||||
ctx.Code().and_(tmp, 0xFFFF);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp.cvt16());
|
||||
}
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertS16F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32();
|
||||
ctx.Code().cvttsd2si(tmp, src[0]);
|
||||
ctx.Code().and_(tmp, 0xFFFF);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp.cvt16());
|
||||
}
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
@ -180,25 +37,19 @@ void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& s
|
||||
Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32();
|
||||
EmitInlineF16ToF32(ctx, tmp_xmm, src[0]);
|
||||
ctx.Code().cvttss2si(tmp_reg, tmp_xmm);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp_reg);
|
||||
}
|
||||
MovGP(ctx, dest[0], tmp_reg);
|
||||
}
|
||||
|
||||
void EmitConvertS32F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32();
|
||||
ctx.Code().cvttss2si(tmp, src[0]);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
}
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertS32F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32();
|
||||
ctx.Code().cvttsd2si(tmp, src[0]);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
}
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
@ -206,25 +57,19 @@ void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& s
|
||||
Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg() : dest[0].getReg();
|
||||
EmitInlineF16ToF32(ctx, tmp_xmm, src[0]);
|
||||
ctx.Code().cvttss2si(tmp_reg, tmp_xmm);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp_reg);
|
||||
}
|
||||
MovGP(ctx, dest[0], tmp_reg);
|
||||
}
|
||||
|
||||
void EmitConvertS64F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg();
|
||||
ctx.Code().cvttss2si(tmp, src[0]);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
}
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertS64F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg();
|
||||
ctx.Code().cvttsd2si(tmp, src[0]);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
}
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertU16F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
@ -282,17 +127,13 @@ void EmitConvertF32F16(EmitContext& ctx, const Operands& dest, const Operands& s
|
||||
void EmitConvertF32F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().cvtsd2ss(tmp, src[0]);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
}
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertF64F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().cvtss2sd(tmp, src[0]);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
}
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertF16S8(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
@ -344,9 +185,7 @@ void EmitConvertF32S8(EmitContext& ctx, const Operands& dest, const Operands& sr
|
||||
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().movsx(tmp_reg, src[0]);
|
||||
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp_xmm);
|
||||
}
|
||||
MovFloat(ctx, dest[0], tmp_xmm);
|
||||
}
|
||||
|
||||
void EmitConvertF32S16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
@ -354,25 +193,19 @@ void EmitConvertF32S16(EmitContext& ctx, const Operands& dest, const Operands& s
|
||||
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().movsx(tmp_reg, src[0]);
|
||||
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp_xmm);
|
||||
}
|
||||
MovFloat(ctx, dest[0], tmp_xmm);
|
||||
}
|
||||
|
||||
void EmitConvertF32S32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().cvtsi2ss(tmp, src[0]);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
}
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertF32S64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().cvtsi2ss(tmp, src[0]);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
}
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertF32U8(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
@ -396,9 +229,7 @@ void EmitConvertF64S8(EmitContext& ctx, const Operands& dest, const Operands& sr
|
||||
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().movsx(tmp_reg, src[0]);
|
||||
ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp_xmm);
|
||||
}
|
||||
MovDouble(ctx, dest[0], tmp_xmm);
|
||||
}
|
||||
|
||||
void EmitConvertF64S16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
@ -406,25 +237,19 @@ void EmitConvertF64S16(EmitContext& ctx, const Operands& dest, const Operands& s
|
||||
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().movsx(tmp_reg, src[0]);
|
||||
ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp_xmm);
|
||||
}
|
||||
MovDouble(ctx, dest[0], tmp_xmm);
|
||||
}
|
||||
|
||||
void EmitConvertF64S32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().cvtsi2sd(tmp, src[0]);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
}
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertF64S64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().cvtsi2sd(tmp, src[0]);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
}
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertF64U8(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
|
@ -0,0 +1,723 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/backend/asm_x64/emit_x64_instructions.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
using namespace Xbyak;
|
||||
using namespace Xbyak::util;
|
||||
|
||||
|
||||
void EmitFPAbs16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt16() : dest[0].getReg().cvt16();
|
||||
MovGP(ctx, tmp, src[0]);
|
||||
ctx.Code().and_(tmp, 0x7FFF);
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPAbs32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg reg_tmp = ctx.TempXmmReg(false);
|
||||
Xmm xmm_tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().mov(reg_tmp, 0x7FFFFFFF);
|
||||
ctx.Code().movd(xmm_tmp, reg_tmp);
|
||||
ctx.Code().andps(xmm_tmp, src[0]);
|
||||
MovFloat(ctx, dest[0], xmm_tmp);
|
||||
}
|
||||
|
||||
void EmitFPAbs64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg reg_tmp = ctx.TempGPReg(false);
|
||||
Xmm xmm_tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().mov(reg_tmp, 0x7FFFFFFFFFFFFFFF);
|
||||
ctx.Code().movq(xmm_tmp, reg_tmp);
|
||||
ctx.Code().andpd(xmm_tmp, src[0]);
|
||||
MovFloat(ctx, dest[0], xmm_tmp);
|
||||
}
|
||||
|
||||
void EmitFPAdd16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp1, op1[0]);
|
||||
EmitInlineF16ToF32(ctx, tmp2, op2[0]);
|
||||
ctx.Code().addss(tmp1, tmp2);
|
||||
EmitInlineF32ToF16(ctx, dest[0], tmp1);
|
||||
}
|
||||
|
||||
void EmitFPAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
MovFloat(ctx, tmp, op1[0]);
|
||||
ctx.Code().addss(tmp, op2[0]);
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
MovDouble(ctx, tmp, op1[0]);
|
||||
ctx.Code().addsd(tmp, op2[0]);
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPSub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
MovFloat(ctx, tmp, op1[0]);
|
||||
ctx.Code().subss(tmp, op2[0]);
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPFma16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
Xmm tmp3 = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp1, op1[0]);
|
||||
EmitInlineF16ToF32(ctx, tmp2, op2[0]);
|
||||
EmitInlineF16ToF32(ctx, tmp3, op3[0]);
|
||||
ctx.Code().vfmadd132ss(tmp3, tmp1, tmp2);
|
||||
EmitInlineF32ToF16(ctx, dest[0], tmp3);
|
||||
}
|
||||
|
||||
void EmitFPFma32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
|
||||
Xmm tmp1 = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128();
|
||||
Xmm tmp2 = op1[0].isMEM() ? ctx.TempXmmReg() : op1[0].getReg().cvt128();
|
||||
Xmm tmp3 = op2[0].isMEM() ? ctx.TempXmmReg() : op2[0].getReg().cvt128();
|
||||
MovFloat(ctx, tmp1, op3[0]);
|
||||
MovFloat(ctx, tmp2, op1[0]);
|
||||
MovFloat(ctx, tmp3, op2[0]);
|
||||
ctx.Code().vfmadd132ss(tmp3, tmp1, tmp2);
|
||||
MovFloat(ctx, dest[0], tmp3);
|
||||
}
|
||||
|
||||
void EmitFPFma64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) {
|
||||
Xmm tmp1 = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128();
|
||||
Xmm tmp2 = op1[0].isMEM() ? ctx.TempXmmReg() : op1[0].getReg().cvt128();
|
||||
Xmm tmp3 = op2[0].isMEM() ? ctx.TempXmmReg() : op2[0].getReg().cvt128();
|
||||
MovDouble(ctx, tmp1, op3[0]);
|
||||
MovDouble(ctx, tmp2, op1[0]);
|
||||
MovDouble(ctx, tmp3, op2[0]);
|
||||
ctx.Code().vfmadd132sd(tmp3, tmp1, tmp2);
|
||||
MovDouble(ctx, dest[0], tmp3);
|
||||
}
|
||||
|
||||
void EmitFPMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy) {
|
||||
if (is_legacy) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
MovFloat(ctx, tmp1, op1[0]);
|
||||
MovFloat(ctx, tmp2, op1[0]);
|
||||
ctx.Code().maxss(tmp2, op2[0]);
|
||||
ctx.Code().cmpunordss(tmp1, tmp1);
|
||||
ctx.Code().andps(tmp1, op2[0]);
|
||||
ctx.Code().orps(tmp2, tmp1);
|
||||
MovFloat(ctx, dest[0], tmp2);
|
||||
} else {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
MovFloat(ctx, tmp, op1[0]);
|
||||
ctx.Code().maxss(tmp, op2[0]);
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitFPMax64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
MovDouble(ctx, tmp, op1[0]);
|
||||
ctx.Code().maxsd(tmp, op2[0]);
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy) {
|
||||
if (is_legacy) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
MovFloat(ctx, tmp1, op1[0]);
|
||||
MovFloat(ctx, tmp2, op1[0]);
|
||||
ctx.Code().minss(tmp2, op2[0]);
|
||||
ctx.Code().cmpunordss(tmp1, tmp1);
|
||||
ctx.Code().andps(tmp1, op2[0]);
|
||||
ctx.Code().orps(tmp2, tmp1);
|
||||
MovFloat(ctx, dest[0], tmp2);
|
||||
} else {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
MovFloat(ctx, tmp, op1[0]);
|
||||
ctx.Code().minss(tmp, op2[0]);
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitFPMin64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
MovDouble(ctx, tmp, op1[0]);
|
||||
ctx.Code().minsd(tmp, op2[0]);
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPMul16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp1, op1[0]);
|
||||
EmitInlineF16ToF32(ctx, tmp2, op2[0]);
|
||||
ctx.Code().mulss(tmp1, tmp2);
|
||||
EmitInlineF32ToF16(ctx, dest[0], tmp1);
|
||||
}
|
||||
|
||||
void EmitFPMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
MovFloat(ctx, tmp, op1[0]);
|
||||
ctx.Code().mulss(tmp, op2[0]);
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
MovDouble(ctx, tmp, op1[0]);
|
||||
ctx.Code().mulsd(tmp, op2[0]);
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
MovFloat(ctx, tmp, op1[0]);
|
||||
ctx.Code().divss(tmp, op2[0]);
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPDiv64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
MovDouble(ctx, tmp, op1[0]);
|
||||
ctx.Code().divsd(tmp, op2[0]);
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPNeg16(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt16() : dest[0].getReg().cvt16();
|
||||
MovGP(ctx, tmp, op1[0]);
|
||||
ctx.Code().xor_(tmp, 0x8000);
|
||||
MovGP(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPNeg32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
Reg tmp_reg = ctx.TempGPReg(false).cvt32();
|
||||
ctx.Code().mov(tmp_reg, 0x80000000);
|
||||
ctx.Code().movd(tmp_xmm, tmp_reg);
|
||||
ctx.Code().xorps(tmp_xmm, op1[0]);
|
||||
MovFloat(ctx, dest[0], tmp_xmm);
|
||||
}
|
||||
|
||||
void EmitFPNeg64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
Reg tmp_reg = ctx.TempXmmReg(false);
|
||||
ctx.Code().mov(tmp_reg, 0x8000000000000000);
|
||||
ctx.Code().movq(tmp_xmm, tmp_reg);
|
||||
ctx.Code().xorpd(tmp_xmm, op1[0]);
|
||||
MovDouble(ctx, dest[0], tmp_xmm);
|
||||
|
||||
}
|
||||
|
||||
void EmitFPSin(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPSin");
|
||||
}
|
||||
|
||||
void EmitFPCos(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPCos");
|
||||
}
|
||||
|
||||
void EmitFPExp2(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPExp2");
|
||||
}
|
||||
|
||||
void EmitFPLdexp(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPLdexp");
|
||||
}
|
||||
|
||||
void EmitFPLog2(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPLog2");
|
||||
}
|
||||
|
||||
void EmitFPRecip32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().rcpss(tmp, op1[0]);
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPRecip64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
Reg tmp_reg = ctx.TempGPReg(false);
|
||||
ctx.Code().mov(tmp_reg, 1);
|
||||
ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg);
|
||||
ctx.Code().divsd(tmp_xmm, op1[0]);
|
||||
MovDouble(ctx, dest[0], tmp_xmm);
|
||||
}
|
||||
|
||||
void EmitFPRecipSqrt32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().rsqrtss(tmp, op1[0]);
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPRecipSqrt64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
Reg tmp_reg = ctx.TempGPReg(false);
|
||||
ctx.Code().mov(tmp_reg, 1);
|
||||
ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg);
|
||||
ctx.Code().divsd(tmp_xmm, op1[0]);
|
||||
ctx.Code().sqrtsd(tmp_xmm, tmp_xmm);
|
||||
MovDouble(ctx, dest[0], tmp_xmm);
|
||||
}
|
||||
|
||||
void EmitFPSqrt(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().sqrtss(tmp, op1[0]);
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPSaturate16(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPSaturate16");
|
||||
}
|
||||
|
||||
void EmitFPSaturate32(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPSaturate32");
|
||||
}
|
||||
|
||||
void EmitFPSaturate64(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPSaturate64");
|
||||
}
|
||||
|
||||
void EmitFPClamp16(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
Xmm tmp3 = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp1, op[0]);
|
||||
EmitInlineF16ToF32(ctx, tmp2, min[0]);
|
||||
EmitInlineF16ToF32(ctx, tmp3, max[0]);
|
||||
ctx.Code().maxss(tmp1, tmp2);
|
||||
ctx.Code().minss(tmp1, tmp3);
|
||||
EmitInlineF32ToF16(ctx, dest[0], tmp1);
|
||||
}
|
||||
|
||||
void EmitFPClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
MovFloat(ctx, tmp, op[0]);
|
||||
ctx.Code().maxss(tmp, min[0]);
|
||||
ctx.Code().minss(tmp, max[0]);
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPClamp64(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
MovDouble(ctx, tmp, op[0]);
|
||||
ctx.Code().maxsd(tmp, min[0]);
|
||||
ctx.Code().minsd(tmp, max[0]);
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPRoundEven16(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp, op1[0]);
|
||||
ctx.Code().roundss(tmp, tmp, 0x00);
|
||||
EmitInlineF32ToF16(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPRoundEven32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().roundss(tmp, op1[0], 0x00);
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPRoundEven64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().roundsd(tmp, op1[0], 0x00);
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPFloor16(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp, op1[0]);
|
||||
ctx.Code().roundss(tmp, tmp, 0x01);
|
||||
EmitInlineF32ToF16(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPFloor32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().roundss(tmp, op1[0], 0x01);
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPFloor64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().roundsd(tmp, op1[0], 0x01);
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPCeil16(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp, op1[0]);
|
||||
ctx.Code().roundss(tmp, tmp, 0x02);
|
||||
EmitInlineF32ToF16(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPCeil32(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().roundss(tmp, op1[0], 0x02);
|
||||
MovFloat(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPCeil64(EmitContext& ctx, const Operands& dest, const Operands& op1) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().roundsd(tmp, op1[0], 0x02);
|
||||
MovDouble(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitFPTrunc16(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPTrunc16");
|
||||
}
|
||||
|
||||
void EmitFPTrunc32(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPTrunc32");
|
||||
}
|
||||
|
||||
void EmitFPTrunc64(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPTrunc64");
|
||||
}
|
||||
|
||||
void EmitFPFract32(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPFract32");
|
||||
}
|
||||
|
||||
void EmitFPFract64(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPFract64");
|
||||
}
|
||||
|
||||
void EmitFPFrexpSig32(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPFrexpSig32");
|
||||
}
|
||||
|
||||
void EmitFPFrexpSig64(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPFrexpSig64");
|
||||
}
|
||||
|
||||
void EmitFPFrexpExp32(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPFrexpExp32");
|
||||
}
|
||||
|
||||
void EmitFPFrexpExp64(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPFrexpExp64");
|
||||
}
|
||||
|
||||
void EmitFPOrdEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordEqual16(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0], 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPOrdEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordEqual32(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0], 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPOrdEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordEqual64(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0], 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPUnordEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp1, lhs[0]);
|
||||
EmitInlineF16ToF32(ctx, tmp2, rhs[0]);
|
||||
ctx.Code().ucomiss(tmp1, tmp2);
|
||||
ctx.Code().sete(dest[0]);
|
||||
}
|
||||
|
||||
void EmitFPUnordEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
|
||||
MovFloat(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomiss(tmp, rhs[0]);
|
||||
ctx.Code().sete(dest[0]);
|
||||
}
|
||||
|
||||
void EmitFPUnordEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
|
||||
MovDouble(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomisd(tmp, rhs[0]);
|
||||
ctx.Code().sete(dest[0]);
|
||||
}
|
||||
|
||||
void EmitFPOrdNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordNotEqual16(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0], 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPOrdNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0], 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPOrdNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordNotEqual64(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0], 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPUnordNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp1, lhs[0]);
|
||||
EmitInlineF16ToF32(ctx, tmp2, rhs[0]);
|
||||
ctx.Code().ucomiss(tmp1, tmp2);
|
||||
ctx.Code().setne(dest[0]);
|
||||
}
|
||||
|
||||
void EmitFPUnordNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
|
||||
MovFloat(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomiss(tmp, rhs[0]);
|
||||
ctx.Code().setne(dest[0]);
|
||||
}
|
||||
|
||||
void EmitFPUnordNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
|
||||
MovDouble(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomisd(tmp, rhs[0]);
|
||||
ctx.Code().setne(dest[0]);
|
||||
}
|
||||
|
||||
void EmitFPOrdLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordLessThan16(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0], 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPOrdLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordLessThan32(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0], 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPOrdLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordLessThan64(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0], 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPUnordLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp1, lhs[0]);
|
||||
EmitInlineF16ToF32(ctx, tmp2, rhs[0]);
|
||||
ctx.Code().ucomiss(tmp1, tmp2);
|
||||
ctx.Code().setb(dest[0]);
|
||||
}
|
||||
|
||||
void EmitFPUnordLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
|
||||
MovFloat(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomiss(tmp, rhs[0]);
|
||||
ctx.Code().setb(dest[0]);
|
||||
}
|
||||
|
||||
void EmitFPUnordLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
|
||||
MovDouble(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomisd(tmp, rhs[0]);
|
||||
ctx.Code().setb(dest[0]);
|
||||
}
|
||||
|
||||
void EmitFPOrdGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordGreaterThan16(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0], 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPOrdGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordGreaterThan32(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0], 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPOrdGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordGreaterThan64(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0], 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPUnordGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp1, lhs[0]);
|
||||
EmitInlineF16ToF32(ctx, tmp2, rhs[0]);
|
||||
ctx.Code().ucomiss(tmp1, tmp2);
|
||||
ctx.Code().seta(dest[0]);
|
||||
}
|
||||
|
||||
void EmitFPUnordGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
|
||||
MovFloat(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomiss(tmp, rhs[0]);
|
||||
ctx.Code().seta(dest[0]);
|
||||
}
|
||||
|
||||
void EmitFPUnordGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
|
||||
MovDouble(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomisd(tmp, rhs[0]);
|
||||
ctx.Code().seta(dest[0]);
|
||||
}
|
||||
|
||||
void EmitFPOrdLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordLessThanEqual16(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0], 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPOrdLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordLessThanEqual32(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0], 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPOrdLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordLessThanEqual64(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0], 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPUnordLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp1, lhs[0]);
|
||||
EmitInlineF16ToF32(ctx, tmp2, rhs[0]);
|
||||
ctx.Code().ucomiss(tmp1, tmp2);
|
||||
ctx.Code().setbe(dest[0]);
|
||||
}
|
||||
|
||||
void EmitFPUnordLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
|
||||
MovFloat(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomiss(tmp, rhs[0]);
|
||||
ctx.Code().setbe(dest[0]);
|
||||
}
|
||||
|
||||
void EmitFPUnordLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
|
||||
MovDouble(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomisd(tmp, rhs[0]);
|
||||
ctx.Code().setbe(dest[0]);
|
||||
}
|
||||
|
||||
void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordGreaterThanEqual16(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0], 0);
|
||||
ctx.Code().L(not_nan);
|
||||
ctx.Code().vfpclassss(tmp1, tmp2);
|
||||
}
|
||||
|
||||
void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordGreaterThanEqual32(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0], 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Label not_nan;
|
||||
EmitFPUnordGreaterThanEqual64(ctx, dest, lhs, rhs);
|
||||
ctx.Code().jnp(not_nan);
|
||||
ctx.Code().mov(dest[0], 0);
|
||||
ctx.Code().L(not_nan);
|
||||
}
|
||||
|
||||
void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp1 = ctx.TempXmmReg();
|
||||
Xmm tmp2 = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp1, lhs[0]);
|
||||
EmitInlineF16ToF32(ctx, tmp2, rhs[0]);
|
||||
ctx.Code().ucomiss(tmp1, tmp2);
|
||||
ctx.Code().setae(dest[0]);
|
||||
}
|
||||
|
||||
void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
|
||||
MovFloat(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomiss(tmp, rhs[0]);
|
||||
ctx.Code().setae(dest[0]);
|
||||
}
|
||||
|
||||
void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) {
|
||||
Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128();
|
||||
MovDouble(ctx, tmp, lhs[0]);
|
||||
ctx.Code().ucomisd(tmp, rhs[0]);
|
||||
ctx.Code().setae(dest[0]);
|
||||
}
|
||||
|
||||
void EmitFPIsNan16(EmitContext& ctx, const Operands& dest, const Operands& op) {
|
||||
Xmm tmp = ctx.TempXmmReg();
|
||||
EmitInlineF16ToF32(ctx, tmp, op[0]);
|
||||
ctx.Code().ucomiss(tmp, tmp);
|
||||
ctx.Code().setp(dest[0]);
|
||||
}
|
||||
|
||||
void EmitFPIsNan32(EmitContext& ctx, const Operands& dest, const Operands& op) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
MovFloat(ctx, tmp, op[0]);
|
||||
ctx.Code().ucomiss(tmp, tmp);
|
||||
ctx.Code().setp(dest[0]);
|
||||
}
|
||||
|
||||
void EmitFPIsNan64(EmitContext& ctx, const Operands& dest, const Operands& op) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
MovDouble(ctx, tmp, op[0]);
|
||||
ctx.Code().ucomisd(tmp, tmp);
|
||||
ctx.Code().setp(dest[0]);
|
||||
}
|
||||
|
||||
void EmitFPIsInf32(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPIsInf32");
|
||||
}
|
||||
|
||||
void EmitFPIsInf64(EmitContext& ctx) {
|
||||
throw NotImplementedException("FPIsInf64");
|
||||
}
|
||||
|
||||
void EmitFPCmpClass32(EmitContext&) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
@ -157,7 +157,7 @@ void EmitCompositeInsertF16x4(EmitContext& ctx, const Operands& dest, const Oper
|
||||
void EmitCompositeShuffleF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2);
|
||||
void EmitCompositeShuffleF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3);
|
||||
void EmitCompositeShuffleF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4);
|
||||
void EmitCompositeConstructF32x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2);
|
||||
void EmitCompositeConstructF32x2(EmitContext& ctx, const Operands& dest, const OpEmitFPAbs16erands& src1, const Operands& src2);
|
||||
void EmitCompositeConstructF32x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3);
|
||||
void EmitCompositeConstructF32x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4);
|
||||
void EmitCompositeConstructF32x2x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2);
|
||||
@ -227,103 +227,103 @@ void EmitPackUint2_10_10_10(EmitContext& ctx);
|
||||
void EmitUnpackUint2_10_10_10(EmitContext& ctx);
|
||||
void EmitPackSint2_10_10_10(EmitContext& ctx);
|
||||
void EmitUnpackSint2_10_10_10(EmitContext& ctx);
|
||||
Id EmitFPAbs16(EmitContext& ctx, Id value);
|
||||
Id EmitFPAbs32(EmitContext& ctx, Id value);
|
||||
Id EmitFPAbs64(EmitContext& ctx, Id value);
|
||||
Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitFPSub32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
|
||||
Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
|
||||
Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
|
||||
Id EmitFPMax32(EmitContext& ctx, Id a, Id b, bool is_legacy = false);
|
||||
Id EmitFPMax64(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitFPMin32(EmitContext& ctx, Id a, Id b, bool is_legacy = false);
|
||||
Id EmitFPMin64(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitFPNeg16(EmitContext& ctx, Id value);
|
||||
Id EmitFPNeg32(EmitContext& ctx, Id value);
|
||||
Id EmitFPNeg64(EmitContext& ctx, Id value);
|
||||
Id EmitFPSin(EmitContext& ctx, Id value);
|
||||
Id EmitFPCos(EmitContext& ctx, Id value);
|
||||
Id EmitFPExp2(EmitContext& ctx, Id value);
|
||||
Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp);
|
||||
Id EmitFPLog2(EmitContext& ctx, Id value);
|
||||
Id EmitFPRecip32(EmitContext& ctx, Id value);
|
||||
Id EmitFPRecip64(EmitContext& ctx, Id value);
|
||||
Id EmitFPRecipSqrt32(EmitContext& ctx, Id value);
|
||||
Id EmitFPRecipSqrt64(EmitContext& ctx, Id value);
|
||||
Id EmitFPSqrt(EmitContext& ctx, Id value);
|
||||
Id EmitFPSaturate16(EmitContext& ctx, Id value);
|
||||
Id EmitFPSaturate32(EmitContext& ctx, Id value);
|
||||
Id EmitFPSaturate64(EmitContext& ctx, Id value);
|
||||
Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value);
|
||||
Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value);
|
||||
Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value);
|
||||
Id EmitFPRoundEven16(EmitContext& ctx, Id value);
|
||||
Id EmitFPRoundEven32(EmitContext& ctx, Id value);
|
||||
Id EmitFPRoundEven64(EmitContext& ctx, Id value);
|
||||
Id EmitFPFloor16(EmitContext& ctx, Id value);
|
||||
Id EmitFPFloor32(EmitContext& ctx, Id value);
|
||||
Id EmitFPFloor64(EmitContext& ctx, Id value);
|
||||
Id EmitFPCeil16(EmitContext& ctx, Id value);
|
||||
Id EmitFPCeil32(EmitContext& ctx, Id value);
|
||||
Id EmitFPCeil64(EmitContext& ctx, Id value);
|
||||
Id EmitFPTrunc16(EmitContext& ctx, Id value);
|
||||
Id EmitFPTrunc32(EmitContext& ctx, Id value);
|
||||
Id EmitFPTrunc64(EmitContext& ctx, Id value);
|
||||
Id EmitFPFract32(EmitContext& ctx, Id value);
|
||||
Id EmitFPFract64(EmitContext& ctx, Id value);
|
||||
Id EmitFPFrexpSig32(EmitContext& ctx, Id value);
|
||||
Id EmitFPFrexpSig64(EmitContext& ctx, Id value);
|
||||
Id EmitFPFrexpExp32(EmitContext& ctx, Id value);
|
||||
Id EmitFPFrexpExp64(EmitContext& ctx, Id value);
|
||||
Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitFPIsNan16(EmitContext& ctx, Id value);
|
||||
Id EmitFPIsNan32(EmitContext& ctx, Id value);
|
||||
Id EmitFPIsNan64(EmitContext& ctx, Id value);
|
||||
Id EmitFPIsInf32(EmitContext& ctx, Id value);
|
||||
Id EmitFPIsInf64(EmitContext& ctx, Id value);
|
||||
void EmitFPAbs16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPAbs32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPAbs64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPAdd16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitFPAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitFPAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitFPSub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitFPFma16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
|
||||
void EmitFPFma32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
|
||||
void EmitFPFma64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3);
|
||||
void EmitFPMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy = false);
|
||||
void EmitFPMax64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitFPMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy = false);
|
||||
void EmitFPMin64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitFPMul16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitFPMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitFPMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitFPDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitFPDiv64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||
void EmitFPNeg16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPNeg32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPNeg64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPSin(EmitContext& ctx);
|
||||
void EmitFPCos(EmitContext& ctx);
|
||||
void EmitFPExp2(EmitContext& ctx);
|
||||
void EmitFPLdexp(EmitContext& ctx);
|
||||
void EmitFPLog2(EmitContext& ctx);
|
||||
void EmitFPRecip32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPRecip64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPRecipSqrt32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPRecipSqrt64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPSqrt(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPSaturate16(EmitContext& ctx);
|
||||
void EmitFPSaturate32(EmitContext& ctx);
|
||||
void EmitFPSaturate64(EmitContext& ctx);
|
||||
void EmitFPClamp16(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max);
|
||||
void EmitFPClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max);
|
||||
void EmitFPClamp64(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max);
|
||||
void EmitFPRoundEven16(EmitContext& ctx, const Operands& dest, const Operands& op1);
|
||||
void EmitFPRoundEven32(EmitContext& ctx, const Operands& dest, const Operands& op1);
|
||||
void EmitFPRoundEven64(EmitContext& ctx, const Operands& dest, const Operands& op1);
|
||||
void EmitFPFloor16(EmitContext& ctx, const Operands& dest, const Operands& op1);
|
||||
void EmitFPFloor32(EmitContext& ctx, const Operands& dest, const Operands& op1);
|
||||
void EmitFPFloor64(EmitContext& ctx, const Operands& dest, const Operands& op1);
|
||||
void EmitFPCeil16(EmitContext& ctx, const Operands& dest, const Operands& op1);
|
||||
void EmitFPCeil32(EmitContext& ctx, const Operands& dest, const Operands& op1);
|
||||
void EmitFPCeil64(EmitContext& ctx, const Operands& dest, const Operands& op1);
|
||||
void EmitFPTrunc16(EmitContext& ctx);
|
||||
void EmitFPTrunc32(EmitContext& ctx);
|
||||
void EmitFPTrunc64(EmitContext& ctx);
|
||||
void EmitFPFract32(EmitContext& ctx);
|
||||
void EmitFPFract64(EmitContext& ctx);
|
||||
void EmitFPFrexpSig32(EmitContext& ctx);
|
||||
void EmitFPFrexpSig64(EmitContext& ctx);
|
||||
void EmitFPFrexpExp32(EmitContext& ctx);
|
||||
void EmitFPFrexpExp64(EmitContext& ctx);
|
||||
void EmitFPOrdEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs);
|
||||
void EmitFPIsNan16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPIsNan32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPIsNan64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitFPIsInf32(EmitContext& ctx);
|
||||
void EmitFPIsInf64(EmitContext& ctx);
|
||||
Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitIAdd64(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitIAddCary32(EmitContext& ctx, Id a, Id b);
|
||||
|
@ -48,6 +48,16 @@ Xmm& EmitContext::TempXmmReg(bool reserve) {
|
||||
return reg;
|
||||
}
|
||||
|
||||
void EmitContext::PopTempGPReg() {
|
||||
ASSERT(temp_gp_reg_index > 0);
|
||||
temp_gp_reg_index--;
|
||||
}
|
||||
|
||||
void EmitContext::PopTempXmmReg() {
|
||||
ASSERT(temp_xmm_reg_index > 0);
|
||||
temp_xmm_reg_index--;
|
||||
}
|
||||
|
||||
const Operands& EmitContext::Def(IR::Inst* inst) {
|
||||
return inst_to_operands.at(inst);
|
||||
}
|
||||
|
@ -40,7 +40,9 @@ public:
|
||||
|
||||
[[nodiscard]] Xbyak::Reg64& TempGPReg(bool reserve = true);
|
||||
[[nodiscard]] Xbyak::Xmm& TempXmmReg(bool reserve = true);
|
||||
|
||||
void PopTempGPReg();
|
||||
void PopTempXmmReg();
|
||||
|
||||
[[nodiscard]] const Xbyak::Reg64& UserData() const {return Xbyak::util::r11;}
|
||||
|
||||
[[nodiscard]] const Operands& Def(IR::Inst* inst);
|
||||
|
@ -159,6 +159,9 @@ Reg ResizeRegToType(const Reg& reg, IR::Type type) {
|
||||
|
||||
void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) {
|
||||
CodeGenerator& c = ctx.Code();
|
||||
if (src == dst) {
|
||||
return;
|
||||
}
|
||||
if (src.isMEM() && dst.isMEM()) {
|
||||
Reg tmp = ctx.TempGPReg(false).cvt32();
|
||||
c.mov(tmp, src);
|
||||
@ -176,6 +179,9 @@ void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand&
|
||||
|
||||
void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) {
|
||||
CodeGenerator& c = ctx.Code();
|
||||
if (src == dst) {
|
||||
return;
|
||||
}
|
||||
if (src.isMEM() && dst.isMEM()) {
|
||||
const Reg64& tmp = ctx.TempGPReg(false);
|
||||
c.mov(tmp, src);
|
||||
@ -193,6 +199,9 @@ void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand
|
||||
|
||||
void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) {
|
||||
CodeGenerator& c = ctx.Code();
|
||||
if (src == dst) {
|
||||
return;
|
||||
}
|
||||
Reg tmp = (src.isMEM() && dst.isMEM()) ? ctx.TempGPReg(false).changeBit(dst.getBit()) : dst.getReg();
|
||||
if (src.getBit() == dst.getBit()) {
|
||||
c.mov(tmp, src);
|
||||
@ -288,4 +297,144 @@ void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src) {
|
||||
}
|
||||
}
|
||||
|
||||
void EmitInlineF16ToF32(EmitContext& ctx, const Operand& dest, const Operand& src) {
|
||||
CodeGenerator& c = ctx.Code();
|
||||
Label nonzero_exp, zero_mantissa, norm_loop, norm_done, normal, done;
|
||||
Reg sign = ctx.TempGPReg().cvt32();
|
||||
Reg exponent = ctx.TempGPReg().cvt32();
|
||||
Reg mantissa = ctx.TempGPReg().cvt32();
|
||||
|
||||
c.movzx(mantissa, src);
|
||||
|
||||
// Extract sign, exponent, and mantissa
|
||||
c.mov(sign, mantissa);
|
||||
c.and_(sign, 0x8000);
|
||||
c.shl(sign, 16);
|
||||
c.mov(exponent, mantissa);
|
||||
c.and_(exponent, 0x7C00);
|
||||
c.shr(exponent, 10);
|
||||
c.and_(mantissa, 0x03FF);
|
||||
|
||||
// Check for zero exponent and mantissa
|
||||
c.test(exponent, exponent);
|
||||
c.jnz(nonzero_exp);
|
||||
c.test(mantissa, mantissa);
|
||||
c.jz(zero_mantissa);
|
||||
|
||||
// Nromalize subnormal number
|
||||
c.mov(exponent, 1);
|
||||
c.L(norm_loop);
|
||||
c.test(mantissa, 0x400);
|
||||
c.jnz(norm_done);
|
||||
c.shl(mantissa, 1);
|
||||
c.dec(exponent);
|
||||
c.jmp(norm_loop);
|
||||
c.L(norm_done);
|
||||
c.and_(mantissa, 0x03FF);
|
||||
c.jmp(normal);
|
||||
|
||||
// Zero mantissa
|
||||
c.L(zero_mantissa);
|
||||
c.and_(mantissa, sign);
|
||||
c.jmp(done);
|
||||
|
||||
// Non-zero exponent
|
||||
c.L(nonzero_exp);
|
||||
c.cmp(exponent, 0x1F);
|
||||
c.jne(normal);
|
||||
|
||||
// Infinite or NaN
|
||||
c.shl(mantissa, 13);
|
||||
c.or_(mantissa, sign);
|
||||
c.or_(mantissa, 0x7F800000);
|
||||
c.jmp(done);
|
||||
|
||||
// Normal number
|
||||
c.L(normal);
|
||||
c.add(exponent, 112);
|
||||
c.shl(exponent, 23);
|
||||
c.shl(mantissa, 13);
|
||||
c.or_(mantissa, sign);
|
||||
c.or_(mantissa, exponent);
|
||||
|
||||
c.L(done);
|
||||
if (dest.isMEM()) {
|
||||
c.mov(dest, mantissa);
|
||||
} else {
|
||||
c.movd(dest.getReg().cvt128(), mantissa);
|
||||
}
|
||||
|
||||
ctx.PopTempGPReg();
|
||||
ctx.PopTempGPReg();
|
||||
ctx.PopTempGPReg();
|
||||
}
|
||||
|
||||
void EmitInlineF32ToF16(EmitContext& ctx, const Operand& dest, const Operand& src) {
|
||||
CodeGenerator& c = ctx.Code();
|
||||
Label zero_exp, underflow, overflow, done;
|
||||
Reg sign = ctx.TempGPReg().cvt32();
|
||||
Reg exponent = ctx.TempGPReg().cvt32();
|
||||
Reg mantissa = dest.isMEM() ? ctx.TempGPReg().cvt32() : dest.getReg().cvt32();
|
||||
|
||||
if (src.isMEM()) {
|
||||
c.mov(mantissa, src);
|
||||
} else {
|
||||
c.movd(mantissa, src.getReg().cvt128());
|
||||
}
|
||||
|
||||
// Extract sign, exponent, and mantissa
|
||||
c.mov(exponent, mantissa);
|
||||
c.mov(sign, mantissa);
|
||||
c.and_(exponent, 0x7F800000);
|
||||
c.and_(mantissa, 0x007FFFFF);
|
||||
c.shr(exponent, 23);
|
||||
c.shl(mantissa, 3);
|
||||
c.shr(sign, 16);
|
||||
c.and_(sign, 0x8000);
|
||||
|
||||
// Subnormal numbers will be zero
|
||||
c.test(exponent, exponent);
|
||||
c.jz(zero_exp);
|
||||
|
||||
// Check for overflow and underflow
|
||||
c.sub(exponent, 112);
|
||||
c.cmp(exponent, 0);
|
||||
c.jle(underflow);
|
||||
c.cmp(exponent, 0x1F);
|
||||
c.jge(overflow);
|
||||
|
||||
// Normal number
|
||||
c.shl(exponent, 10);
|
||||
c.shr(mantissa, 13);
|
||||
c.or_(mantissa, exponent);
|
||||
c.or_(mantissa, sign);
|
||||
c.jmp(done);
|
||||
|
||||
// Undeflow
|
||||
c.L(underflow);
|
||||
c.xor_(mantissa, mantissa);
|
||||
c.jmp(done);
|
||||
|
||||
// Overflow
|
||||
c.L(overflow);
|
||||
c.mov(mantissa, 0x7C00);
|
||||
c.or_(mantissa, sign);
|
||||
c.jmp(done);
|
||||
|
||||
// Zero value
|
||||
c.L(zero_exp);
|
||||
c.and_(mantissa, sign);
|
||||
|
||||
c.L(done);
|
||||
if (dest.isMEM()) {
|
||||
c.mov(dest, mantissa);
|
||||
} else {
|
||||
c.and_(mantissa, 0xFFFF);
|
||||
}
|
||||
|
||||
ctx.PopTempGPReg();
|
||||
ctx.PopTempGPReg();
|
||||
ctx.PopTempGPReg();
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::X64
|
@ -19,5 +19,7 @@ void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand&
|
||||
void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src);
|
||||
void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src);
|
||||
void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src);
|
||||
void EmitInlineF16ToF32(EmitContext& ctx, const Xbyak::Operand& dest, const Xbyak::Operand& src);
|
||||
void EmitInlineF32ToF16(EmitContext& ctx, const Xbyak::Operand& dest, const Xbyak::Operand& src);
|
||||
|
||||
} // namespace Shader::Backend::X64
|
Loading…
Reference in New Issue
Block a user