mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-01 15:02:40 +00:00
Utils, context, convert and ctx get set
This commit is contained in:
parent
2584ec2d76
commit
3d971701db
@ -904,6 +904,7 @@ if (ARCHITECTURE STREQUAL "x86_64")
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h
|
||||
src/shader_recompiler/backend/asm_x64/emit_x64_shared_memory.cpp
|
||||
|
@ -3,25 +3,26 @@
|
||||
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
using namespace Xbyak;
|
||||
using namespace Xbyak::util;
|
||||
|
||||
Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) {
|
||||
const u32 index = ctx.binding.user_data + ctx.info.ud_mask.Index(reg);
|
||||
const u32 half = PushData::UdRegsIndex + (index >> 2);
|
||||
const Id ud_ptr{ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
|
||||
ctx.push_data_block, ctx.ConstU32(half),
|
||||
ctx.ConstU32(index & 3))};
|
||||
const Id ud_reg{ctx.OpLoad(ctx.U32[1], ud_ptr)};
|
||||
ctx.Name(ud_reg, fmt::format("ud_{}", u32(reg)));
|
||||
return ud_reg;
|
||||
void EmitGetUserData(EmitContext& ctx, const Operands& dest, IR::ScalarReg reg) {
|
||||
const u32 offset = static_cast<u32>(reg) << 2;
|
||||
Reg& tmp = ctx.TempGPReg();
|
||||
ctx.Code().lea(tmp, ptr[ctx.UserData() + offset]);
|
||||
MovGP( ctx, dest[0], ptr[tmp]);
|
||||
}
|
||||
|
||||
void EmitSetUserData(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
void EmitSetUserData(EmitContext& ctx, const Operands& offset, const Operands& value) {
|
||||
Reg& tmp = ctx.TempGPReg();
|
||||
ctx.Code().mov(tmp, offset[0]);
|
||||
ctx.Code().shl(tmp, 2);
|
||||
ctx.Code().lea(tmp, ptr[ctx.UserData() + tmp]);
|
||||
MovGP(ctx, ptr[tmp], value[0]);
|
||||
}
|
||||
|
||||
void EmitGetThreadBitScalarReg(EmitContext& ctx) {
|
||||
@ -56,482 +57,145 @@ void EmitGetGotoVariable(EmitContext&) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
using BufferAlias = EmitContext::BufferAlias;
|
||||
|
||||
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto& srt_flatbuf = ctx.buffers.back();
|
||||
ASSERT(srt_flatbuf.binding >= 0 && srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
|
||||
const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32];
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.Def(inst->Arg(1)))};
|
||||
return ctx.OpLoad(ctx.U32[1], ptr);
|
||||
}
|
||||
|
||||
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords);
|
||||
const auto [id, pointer_type] = buffer[BufferAlias::U32];
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||
const Id result{ctx.OpLoad(ctx.U32[1], ptr)};
|
||||
|
||||
if (Sirit::ValidId(buffer.size_dwords)) {
|
||||
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer.size_dwords);
|
||||
return ctx.OpSelect(ctx.U32[1], in_bounds, result, ctx.u32_zero_value);
|
||||
void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset) {
|
||||
Reg& tmp = ctx.TempGPReg(false);
|
||||
ctx.Code().mov(tmp, base[1]);
|
||||
ctx.Code().shl(tmp, 32);
|
||||
ctx.Code().or_(tmp, base[0]);
|
||||
if (offset[0].isMEM()) {
|
||||
ctx.Code().add(tmp, offset[0]);
|
||||
} else {
|
||||
return result;
|
||||
ctx.Code().lea(tmp, ptr[tmp + offset[0].getReg()]);
|
||||
}
|
||||
MovGP(ctx, dest[0], ptr[tmp]);
|
||||
}
|
||||
|
||||
Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
|
||||
const auto index{rate_idx == 0 ? PushData::Step0Index : PushData::Step1Index};
|
||||
return ctx.OpLoad(
|
||||
ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
|
||||
ctx.push_data_block, ctx.ConstU32(index)));
|
||||
void EmitReadConstBuffer(EmitContext& ctx) {
|
||||
throw NotImplementedException("ReadConstBuffer");
|
||||
}
|
||||
|
||||
static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
|
||||
if (IR::IsPosition(attr)) {
|
||||
ASSERT(attr == IR::Attribute::Position0);
|
||||
const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
||||
const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, index, ctx.ConstU32(0u))};
|
||||
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
||||
return ctx.OpLoad(ctx.F32[1],
|
||||
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
|
||||
}
|
||||
|
||||
if (IR::IsParam(attr)) {
|
||||
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
|
||||
const auto param = ctx.input_params.at(param_id).id;
|
||||
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
||||
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)};
|
||||
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
||||
return ctx.OpLoad(ctx.F32[1],
|
||||
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
|
||||
}
|
||||
UNREACHABLE();
|
||||
void EmitReadStepRate(EmitContext& ctx) {
|
||||
throw NotImplementedException("ReadStepRate");
|
||||
}
|
||||
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
|
||||
if (ctx.info.l_stage == LogicalStage::Geometry) {
|
||||
return EmitGetAttributeForGeometry(ctx, attr, comp, index);
|
||||
} else if (ctx.info.l_stage == LogicalStage::TessellationControl ||
|
||||
ctx.info.l_stage == LogicalStage::TessellationEval) {
|
||||
if (IR::IsTessCoord(attr)) {
|
||||
const u32 component = attr == IR::Attribute::TessellationEvaluationPointU ? 0 : 1;
|
||||
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
||||
const auto pointer{
|
||||
ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))};
|
||||
return ctx.OpLoad(ctx.F32[1], pointer);
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
if (IR::IsParam(attr)) {
|
||||
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||
const auto& param{ctx.input_params.at(index)};
|
||||
if (param.buffer_handle >= 0) {
|
||||
const auto step_rate = EmitReadStepRate(ctx, param.id.value);
|
||||
const auto offset = ctx.OpIAdd(
|
||||
ctx.U32[1],
|
||||
ctx.OpIMul(
|
||||
ctx.U32[1],
|
||||
ctx.OpUDiv(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id), step_rate),
|
||||
ctx.ConstU32(param.num_components)),
|
||||
ctx.ConstU32(comp));
|
||||
return EmitReadConstBuffer(ctx, param.buffer_handle, offset);
|
||||
}
|
||||
|
||||
Id result;
|
||||
if (param.is_loaded) {
|
||||
// Attribute is either default or manually interpolated. The id points to an already
|
||||
// loaded vector.
|
||||
result = ctx.OpCompositeExtract(param.component_type, param.id, comp);
|
||||
} else if (param.num_components > 1) {
|
||||
// Attribute is a vector and we need to access a specific component.
|
||||
const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
|
||||
result = ctx.OpLoad(param.component_type, pointer);
|
||||
} else {
|
||||
// Attribute is a single float or interger, simply load it.
|
||||
result = ctx.OpLoad(param.component_type, param.id);
|
||||
}
|
||||
if (param.is_integer) {
|
||||
result = ctx.OpBitcast(ctx.F32[1], result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
switch (attr) {
|
||||
case IR::Attribute::FragCoord: {
|
||||
const Id coord = ctx.OpLoad(
|
||||
ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.frag_coord, ctx.ConstU32(comp)));
|
||||
if (comp == 3) {
|
||||
return ctx.OpFDiv(ctx.F32[1], ctx.ConstF32(1.f), coord);
|
||||
}
|
||||
return coord;
|
||||
}
|
||||
case IR::Attribute::TessellationEvaluationPointU:
|
||||
return ctx.OpLoad(ctx.F32[1],
|
||||
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value));
|
||||
case IR::Attribute::TessellationEvaluationPointV:
|
||||
return ctx.OpLoad(ctx.F32[1],
|
||||
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.ConstU32(1U)));
|
||||
default:
|
||||
UNREACHABLE_MSG("Read attribute {}", attr);
|
||||
}
|
||||
void EmitGetAttribute(EmitContext& ctx) {
|
||||
throw NotImplementedException("GetAttribute");
|
||||
}
|
||||
|
||||
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
||||
switch (attr) {
|
||||
case IR::Attribute::VertexId:
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.vertex_index);
|
||||
case IR::Attribute::InstanceId:
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.instance_id);
|
||||
case IR::Attribute::InstanceId0:
|
||||
return EmitReadStepRate(ctx, 0);
|
||||
case IR::Attribute::InstanceId1:
|
||||
return EmitReadStepRate(ctx, 1);
|
||||
case IR::Attribute::WorkgroupIndex:
|
||||
return ctx.workgroup_index_id;
|
||||
case IR::Attribute::WorkgroupId:
|
||||
return ctx.OpCompositeExtract(ctx.U32[1], ctx.OpLoad(ctx.U32[3], ctx.workgroup_id), comp);
|
||||
case IR::Attribute::LocalInvocationId:
|
||||
return ctx.OpCompositeExtract(ctx.U32[1], ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id),
|
||||
comp);
|
||||
case IR::Attribute::IsFrontFace:
|
||||
return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value,
|
||||
ctx.u32_zero_value);
|
||||
case IR::Attribute::PrimitiveId:
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.primitive_id);
|
||||
case IR::Attribute::InvocationId:
|
||||
ASSERT(ctx.info.l_stage == LogicalStage::Geometry ||
|
||||
ctx.info.l_stage == LogicalStage::TessellationControl);
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
|
||||
case IR::Attribute::PatchVertices:
|
||||
ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl);
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.patch_vertices);
|
||||
case IR::Attribute::PackedHullInvocationInfo: {
|
||||
ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl);
|
||||
// [0:8]: patch id within VGT
|
||||
// [8:12]: output control point id
|
||||
// But 0:8 should be treated as 0 for attribute addressing purposes
|
||||
if (ctx.runtime_info.hs_info.IsPassthrough()) {
|
||||
// Gcn shader would run with 1 thread, but we need to run a thread for
|
||||
// each output control point.
|
||||
// If Gcn shader uses this value, we should make sure all threads in the
|
||||
// Vulkan shader use 0
|
||||
return ctx.ConstU32(0u);
|
||||
} else {
|
||||
const Id invocation_id = ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
|
||||
return ctx.OpShiftLeftLogical(ctx.U32[1], invocation_id, ctx.ConstU32(8u));
|
||||
}
|
||||
}
|
||||
default:
|
||||
UNREACHABLE_MSG("Read U32 attribute {}", attr);
|
||||
}
|
||||
void EmitGetAttributeU32(EmitContext& ctx) {
|
||||
throw NotImplementedException("GetAttributeU32");
|
||||
}
|
||||
|
||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) {
|
||||
if (attr == IR::Attribute::Position1) {
|
||||
LOG_WARNING(Render_Vulkan, "Ignoring pos1 export");
|
||||
return;
|
||||
}
|
||||
const Id pointer{OutputAttrPointer(ctx, attr, element)};
|
||||
const auto component_type{OutputAttrComponentType(ctx, attr)};
|
||||
if (component_type.second) {
|
||||
ctx.OpStore(pointer, ctx.OpBitcast(component_type.first, value));
|
||||
} else {
|
||||
ctx.OpStore(pointer, value);
|
||||
}
|
||||
void EmitSetAttribute(EmitContext& ctx) {
|
||||
throw NotImplementedException("SetAttribute");
|
||||
}
|
||||
|
||||
Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index) {
|
||||
const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
||||
return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.input_attr_array,
|
||||
vertex_index, attr_index, comp_index));
|
||||
void EmitGetTessGenericAttribute(EmitContext& ctx) {
|
||||
throw NotImplementedException("GetTessGenericAttribute");
|
||||
}
|
||||
|
||||
Id EmitReadTcsGenericOuputAttribute(EmitContext& ctx, Id vertex_index, Id attr_index,
|
||||
Id comp_index) {
|
||||
const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]);
|
||||
return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.output_attr_array,
|
||||
vertex_index, attr_index, comp_index));
|
||||
void EmitReadTcsGenericOuputAttribute(EmitContext& ctx) {
|
||||
throw NotImplementedException("ReadTcsGenericOuputAttribute");
|
||||
}
|
||||
|
||||
void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index) {
|
||||
// Implied vertex index is invocation_id
|
||||
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]);
|
||||
Id pointer =
|
||||
ctx.OpAccessChain(component_ptr, ctx.output_attr_array,
|
||||
ctx.OpLoad(ctx.U32[1], ctx.invocation_id), attr_index, comp_index);
|
||||
ctx.OpStore(pointer, value);
|
||||
void EmitSetTcsGenericAttribute(EmitContext& ctx) {
|
||||
throw NotImplementedException("SetTcsGenericAttribute");
|
||||
}
|
||||
|
||||
Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) {
|
||||
const u32 index{IR::GenericPatchIndex(patch)};
|
||||
const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))};
|
||||
const Id type{ctx.l_stage == LogicalStage::TessellationControl ? ctx.output_f32
|
||||
: ctx.input_f32};
|
||||
const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)};
|
||||
return ctx.OpLoad(ctx.F32[1], pointer);
|
||||
void EmitGetPatch(EmitContext& ctx) {
|
||||
throw NotImplementedException("GetPatch");
|
||||
}
|
||||
|
||||
void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
|
||||
const Id pointer{[&] {
|
||||
if (IR::IsGeneric(patch)) {
|
||||
const u32 index{IR::GenericPatchIndex(patch)};
|
||||
const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))};
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element);
|
||||
}
|
||||
switch (patch) {
|
||||
case IR::Patch::TessellationLodLeft:
|
||||
case IR::Patch::TessellationLodRight:
|
||||
case IR::Patch::TessellationLodTop:
|
||||
case IR::Patch::TessellationLodBottom: {
|
||||
const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
|
||||
const Id index_id{ctx.ConstU32(index)};
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id);
|
||||
}
|
||||
case IR::Patch::TessellationLodInteriorU:
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner,
|
||||
ctx.u32_zero_value);
|
||||
case IR::Patch::TessellationLodInteriorV:
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.ConstU32(1u));
|
||||
default:
|
||||
UNREACHABLE_MSG("Patch {}", u32(patch));
|
||||
}
|
||||
}()};
|
||||
ctx.OpStore(pointer, value);
|
||||
void EmitSetPatch(EmitContext& ctx) {
|
||||
throw NotImplementedException("SetPatch");
|
||||
}
|
||||
|
||||
template <u32 N>
|
||||
static Id EmitLoadBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, Id result,
|
||||
bool is_float) {
|
||||
if (Sirit::ValidId(buffer_size)) {
|
||||
// Bounds checking enabled, wrap in a select.
|
||||
const auto result_type = is_float ? ctx.F32[N] : ctx.U32[N];
|
||||
auto compare_index = index;
|
||||
auto zero_value = is_float ? ctx.f32_zero_value : ctx.u32_zero_value;
|
||||
if (N > 1) {
|
||||
compare_index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(N - 1));
|
||||
std::array<Id, N> zero_ids;
|
||||
zero_ids.fill(zero_value);
|
||||
zero_value = ctx.ConstantComposite(result_type, zero_ids);
|
||||
}
|
||||
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], compare_index, buffer_size);
|
||||
return ctx.OpSelect(result_type, in_bounds, result, zero_value);
|
||||
}
|
||||
// Bounds checking not enabled, just return the plain value.
|
||||
return result;
|
||||
void EmitLoadBufferU8(EmitContext& ctx) {
|
||||
throw NotImplementedException("LoadBufferU8");
|
||||
}
|
||||
|
||||
template <u32 N, BufferAlias alias>
|
||||
static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
const auto flags = inst->Flags<IR::BufferInstInfo>();
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32;
|
||||
const auto [id, pointer_type] = spv_buffer[alias];
|
||||
|
||||
boost::container::static_vector<Id, N> ids;
|
||||
for (u32 i = 0; i < N; i++) {
|
||||
const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
|
||||
const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i);
|
||||
const Id result_i = ctx.OpLoad(data_types[1], ptr_i);
|
||||
if (!flags.typed) {
|
||||
// Untyped loads have bounds checking per-component.
|
||||
ids.push_back(EmitLoadBufferBoundsCheck<1>(ctx, index_i, spv_buffer.size_dwords,
|
||||
result_i, alias == BufferAlias::F32));
|
||||
} else {
|
||||
ids.push_back(result_i);
|
||||
}
|
||||
}
|
||||
|
||||
const Id result = N == 1 ? ids[0] : ctx.OpCompositeConstruct(data_types[N], ids);
|
||||
if (flags.typed) {
|
||||
// Typed loads have single bounds check for the whole load.
|
||||
return EmitLoadBufferBoundsCheck<N>(ctx, index, spv_buffer.size_dwords, result,
|
||||
alias == BufferAlias::F32);
|
||||
}
|
||||
return result;
|
||||
void EmitLoadBufferU16(EmitContext& ctx) {
|
||||
throw NotImplementedException("LoadBufferU16");
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const auto [id, pointer_type] = spv_buffer[BufferAlias::U8];
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
|
||||
const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, ptr))};
|
||||
return EmitLoadBufferBoundsCheck<1>(ctx, address, spv_buffer.size, result, false);
|
||||
void EmitLoadBufferU32(EmitContext& ctx) {
|
||||
throw NotImplementedException("LoadBufferU32");
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const auto [id, pointer_type] = spv_buffer[BufferAlias::U16];
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||
const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, ptr))};
|
||||
return EmitLoadBufferBoundsCheck<1>(ctx, index, spv_buffer.size_shorts, result, false);
|
||||
void EmitLoadBufferU32x2(EmitContext& ctx) {
|
||||
throw NotImplementedException("LoadBufferU32x2");
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address);
|
||||
void EmitLoadBufferU32x3(EmitContext& ctx) {
|
||||
throw NotImplementedException("LoadBufferU32x3");
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address);
|
||||
void EmitLoadBufferU32x4(EmitContext& ctx) {
|
||||
throw NotImplementedException("LoadBufferU32x4");
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address);
|
||||
void EmitLoadBufferF32(EmitContext& ctx) {
|
||||
throw NotImplementedException("LoadBufferF32");
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address);
|
||||
void EmitLoadBufferF32x2(EmitContext& ctx) {
|
||||
throw NotImplementedException("LoadBufferF32x2");
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address);
|
||||
void EmitLoadBufferF32x3(EmitContext& ctx) {
|
||||
throw NotImplementedException("LoadBufferF32x3");
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address);
|
||||
void EmitLoadBufferF32x4(EmitContext& ctx) {
|
||||
throw NotImplementedException("LoadBufferF32x4");
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address);
|
||||
void EmitLoadBufferFormatF32(EmitContext& ctx) {
|
||||
throw NotImplementedException("LoadBufferFormatF32");
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address);
|
||||
void EmitStoreBufferU8(EmitContext& ctx) {
|
||||
throw NotImplementedException("StoreBufferU8");
|
||||
}
|
||||
|
||||
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
UNREACHABLE_MSG("SPIR-V instruction");
|
||||
void EmitStoreBufferU16(EmitContext& ctx) {
|
||||
throw NotImplementedException("StoreBufferU16");
|
||||
}
|
||||
|
||||
template <u32 N>
|
||||
void EmitStoreBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto emit_func) {
|
||||
if (Sirit::ValidId(buffer_size)) {
|
||||
// Bounds checking enabled, wrap in a conditional branch.
|
||||
auto compare_index = index;
|
||||
if (N > 1) {
|
||||
index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(N - 1));
|
||||
}
|
||||
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], compare_index, buffer_size);
|
||||
const Id in_bounds_label = ctx.OpLabel();
|
||||
const Id merge_label = ctx.OpLabel();
|
||||
ctx.OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
|
||||
ctx.OpBranchConditional(in_bounds, in_bounds_label, merge_label);
|
||||
ctx.AddLabel(in_bounds_label);
|
||||
emit_func();
|
||||
ctx.OpBranch(merge_label);
|
||||
ctx.AddLabel(merge_label);
|
||||
return;
|
||||
}
|
||||
// Bounds checking not enabled, just perform the store.
|
||||
emit_func();
|
||||
void EmitStoreBufferU32(EmitContext& ctx) {
|
||||
throw NotImplementedException("StoreBufferU32");
|
||||
}
|
||||
|
||||
template <u32 N, BufferAlias alias>
|
||||
static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
|
||||
Id value) {
|
||||
const auto flags = inst->Flags<IR::BufferInstInfo>();
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32;
|
||||
const auto [id, pointer_type] = spv_buffer[alias];
|
||||
|
||||
auto store = [&] {
|
||||
for (u32 i = 0; i < N; i++) {
|
||||
const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
|
||||
const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i);
|
||||
const Id value_i = N == 1 ? value : ctx.OpCompositeExtract(data_types[1], value, i);
|
||||
auto store_i = [&]() { ctx.OpStore(ptr_i, value_i); };
|
||||
if (!flags.typed) {
|
||||
// Untyped stores have bounds checking per-component.
|
||||
EmitStoreBufferBoundsCheck<1>(ctx, index_i, spv_buffer.size_dwords, store_i);
|
||||
} else {
|
||||
store_i();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if (flags.typed) {
|
||||
// Typed stores have single bounds check for the whole store.
|
||||
EmitStoreBufferBoundsCheck<N>(ctx, index, spv_buffer.size_dwords, store);
|
||||
} else {
|
||||
store();
|
||||
}
|
||||
void EmitStoreBufferU32x2(EmitContext& ctx) {
|
||||
throw NotImplementedException("StoreBufferU32x2");
|
||||
}
|
||||
|
||||
void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const auto [id, pointer_type] = spv_buffer[BufferAlias::U8];
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
|
||||
const Id result{ctx.OpUConvert(ctx.U8, value)};
|
||||
EmitStoreBufferBoundsCheck<1>(ctx, address, spv_buffer.size, [&] { ctx.OpStore(ptr, result); });
|
||||
void EmitStoreBufferU32x3(EmitContext& ctx) {
|
||||
throw NotImplementedException("StoreBufferU32x3");
|
||||
}
|
||||
|
||||
void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const auto [id, pointer_type] = spv_buffer[BufferAlias::U16];
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||
const Id result{ctx.OpUConvert(ctx.U16, value)};
|
||||
EmitStoreBufferBoundsCheck<1>(ctx, index, spv_buffer.size_shorts,
|
||||
[&] { ctx.OpStore(ptr, result); });
|
||||
void EmitStoreBufferU32x4(EmitContext& ctx) {
|
||||
throw NotImplementedException("StoreBufferU32x4");
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address, value);
|
||||
void EmitStoreBufferF32(EmitContext& ctx) {
|
||||
throw NotImplementedException("StoreBufferF32");
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address, value);
|
||||
void EmitStoreBufferF32x2(EmitContext& ctx) {
|
||||
throw NotImplementedException("StoreBufferF32x2");
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address, value);
|
||||
void EmitStoreBufferF32x3(EmitContext& ctx) {
|
||||
throw NotImplementedException("StoreBufferF32x3");
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address, value);
|
||||
void EmitStoreBufferF32x4(EmitContext& ctx) {
|
||||
throw NotImplementedException("StoreBufferF32x4");
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
UNREACHABLE_MSG("SPIR-V instruction");
|
||||
void EmitStoreBufferFormatF32(EmitContext& ctx) {
|
||||
throw NotImplementedException("StoreBufferFormatF32");
|
||||
}
|
||||
|
||||
}
|
455
src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp
Normal file
455
src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp
Normal file
@ -0,0 +1,455 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
|
||||
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
|
||||
|
||||
namespace Shader::Backend::X64 {
|
||||
|
||||
using namespace Xbyak;
|
||||
using namespace Xbyak::util;
|
||||
|
||||
namespace {
|
||||
|
||||
static void EmitInlineF16ToF32(EmitContext& ctx, const Operand& dest, const Operand& src) {
|
||||
CodeGenerator& c = ctx.Code();
|
||||
Label nonzero_exp, zero_mantissa, norm_loop, norm_done, normal, done;
|
||||
Reg sign = ctx.TempGPReg().cvt32();
|
||||
Reg exponent = ctx.TempGPReg().cvt32();
|
||||
Reg mantissa = ctx.TempGPReg().cvt32();
|
||||
|
||||
c.movzx(mantissa, src);
|
||||
|
||||
// Extract sign, exponent, and mantissa
|
||||
c.mov(sign, mantissa);
|
||||
c.and_(sign, 0x8000);
|
||||
c.shl(sign, 16);
|
||||
c.mov(exponent, mantissa);
|
||||
c.and_(exponent, 0x7C00);
|
||||
c.shr(exponent, 10);
|
||||
c.and_(mantissa, 0x03FF);
|
||||
|
||||
// Check for zero exponent and mantissa
|
||||
c.test(exponent, exponent);
|
||||
c.jnz(nonzero_exp);
|
||||
c.test(mantissa, mantissa);
|
||||
c.jz(zero_mantissa);
|
||||
|
||||
// Nromalize subnormal number
|
||||
c.mov(exponent, 1);
|
||||
c.L(norm_loop);
|
||||
c.test(mantissa, 0x400);
|
||||
c.jnz(norm_done);
|
||||
c.shl(mantissa, 1);
|
||||
c.dec(exponent);
|
||||
c.jmp(norm_loop);
|
||||
c.L(norm_done);
|
||||
c.and_(mantissa, 0x03FF);
|
||||
c.jmp(normal);
|
||||
|
||||
// Zero mantissa
|
||||
c.L(zero_mantissa);
|
||||
c.and_(mantissa, sign);
|
||||
c.jmp(done);
|
||||
|
||||
// Non-zero exponent
|
||||
c.L(nonzero_exp);
|
||||
c.cmp(exponent, 0x1F);
|
||||
c.jne(normal);
|
||||
|
||||
// Infinite or NaN
|
||||
c.shl(mantissa, 13);
|
||||
c.or_(mantissa, sign);
|
||||
c.or_(mantissa, 0x7F800000);
|
||||
c.jmp(done);
|
||||
|
||||
// Normal number
|
||||
c.L(normal);
|
||||
c.add(exponent, 112);
|
||||
c.shl(exponent, 23);
|
||||
c.shl(mantissa, 13);
|
||||
c.or_(mantissa, sign);
|
||||
c.or_(mantissa, exponent);
|
||||
|
||||
c.L(done);
|
||||
if (dest.isMEM()) {
|
||||
c.mov(dest, mantissa);
|
||||
} else {
|
||||
c.movd(dest.getReg().cvt128(), mantissa);
|
||||
}
|
||||
}
|
||||
|
||||
static void EmitInlineF32ToF16(EmitContext& ctx, const Operand& dest, const Operand& src) {
|
||||
CodeGenerator& c = ctx.Code();
|
||||
Label zero_exp, underflow, overflow, done;
|
||||
Reg sign = ctx.TempGPReg().cvt32();
|
||||
Reg exponent = ctx.TempGPReg().cvt32();
|
||||
Reg mantissa = dest.isMEM() ? ctx.TempGPReg().cvt32() : dest.getReg().cvt32();
|
||||
|
||||
if (src.isMEM()) {
|
||||
c.mov(mantissa, src);
|
||||
} else {
|
||||
c.movd(mantissa, src.getReg().cvt128());
|
||||
}
|
||||
|
||||
// Extract sign, exponent, and mantissa
|
||||
c.mov(exponent, mantissa);
|
||||
c.mov(sign, mantissa);
|
||||
c.and_(exponent, 0x7F800000);
|
||||
c.and_(mantissa, 0x007FFFFF);
|
||||
c.shr(exponent, 23);
|
||||
c.shl(mantissa, 3);
|
||||
c.shr(sign, 16);
|
||||
c.and_(sign, 0x8000);
|
||||
|
||||
// Subnormal numbers will be zero
|
||||
c.test(exponent, exponent);
|
||||
c.jz(zero_exp);
|
||||
|
||||
// Check for overflow and underflow
|
||||
c.sub(exponent, 112);
|
||||
c.cmp(exponent, 0);
|
||||
c.jle(underflow);
|
||||
c.cmp(exponent, 0x1F);
|
||||
c.jge(overflow);
|
||||
|
||||
// Normal number
|
||||
c.shl(exponent, 10);
|
||||
c.shr(mantissa, 13);
|
||||
c.or_(mantissa, exponent);
|
||||
c.or_(mantissa, sign);
|
||||
c.jmp(done);
|
||||
|
||||
// Undeflow
|
||||
c.L(underflow);
|
||||
c.xor_(mantissa, mantissa);
|
||||
c.jmp(done);
|
||||
|
||||
// Overflow
|
||||
c.L(overflow);
|
||||
c.mov(mantissa, 0x7C00);
|
||||
c.or_(mantissa, sign);
|
||||
c.jmp(done);
|
||||
|
||||
// Zero value
|
||||
c.L(zero_exp);
|
||||
c.and_(mantissa, sign);
|
||||
|
||||
c.L(done);
|
||||
if (dest.isMEM()) {
|
||||
c.mov(dest, mantissa);
|
||||
} else {
|
||||
c.and_(mantissa, 0xFFFF);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void EmitConvertS16F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp_xmm = ctx.TempXmmReg(false);
|
||||
Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32();
|
||||
EmitInlineF16ToF32(ctx, tmp_xmm, src[0]);
|
||||
ctx.Code().cvttss2si(tmp_reg, tmp_xmm);
|
||||
ctx.Code().and_(tmp_reg, 0xFFFF);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp_reg.cvt16());
|
||||
}
|
||||
}
|
||||
|
||||
void EmitConvertS16F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32();
|
||||
ctx.Code().cvttss2si(tmp, src[0]);
|
||||
ctx.Code().and_(tmp, 0xFFFF);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp.cvt16());
|
||||
}
|
||||
}
|
||||
|
||||
void EmitConvertS16F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32();
|
||||
ctx.Code().cvttsd2si(tmp, src[0]);
|
||||
ctx.Code().and_(tmp, 0xFFFF);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp.cvt16());
|
||||
}
|
||||
}
|
||||
|
||||
void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp_xmm = ctx.TempXmmReg(false);
|
||||
Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32();
|
||||
EmitInlineF16ToF32(ctx, tmp_xmm, src[0]);
|
||||
ctx.Code().cvttss2si(tmp_reg, tmp_xmm);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp_reg);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitConvertS32F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32();
|
||||
ctx.Code().cvttss2si(tmp, src[0]);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitConvertS32F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32();
|
||||
ctx.Code().cvttsd2si(tmp, src[0]);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp_xmm = ctx.TempXmmReg(false);
|
||||
Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg() : dest[0].getReg();
|
||||
EmitInlineF16ToF32(ctx, tmp_xmm, src[0]);
|
||||
ctx.Code().cvttss2si(tmp_reg, tmp_xmm);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp_reg);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitConvertS64F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg();
|
||||
ctx.Code().cvttss2si(tmp, src[0]);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitConvertS64F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg();
|
||||
ctx.Code().cvttsd2si(tmp, src[0]);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitConvertU16F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertS16F16(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertU16F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertS16F32(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertU16F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertS16F64(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertU32F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertS32F16(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertU32F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertS32F32(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertU32F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertS32F64(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertU64F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertS64F16(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertU64F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertS64F32(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertU64F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertS64F64(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertU64U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
MovGP(ctx, dest[0], src[0]);
|
||||
}
|
||||
|
||||
void EmitConvertU32U64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
MovGP(ctx, dest[0], src[0]);
|
||||
}
|
||||
|
||||
void EmitConvertF16F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitInlineF32ToF16(ctx, dest[0], src[0]);
|
||||
}
|
||||
|
||||
void EmitConvertF32F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitInlineF16ToF32(ctx, dest[0], src[0]);
|
||||
}
|
||||
|
||||
void EmitConvertF32F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().cvtsd2ss(tmp, src[0]);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitConvertF64F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().cvtss2sd(tmp, src[0]);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitConvertF16S8(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32();
|
||||
Xmm tmp_xmm = ctx.TempXmmReg(false);
|
||||
ctx.Code().movsx(tmp_reg, src[0]);
|
||||
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
|
||||
EmitInlineF32ToF16(ctx, dest[0], tmp_xmm);
|
||||
}
|
||||
|
||||
void EmitConvertF16S16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32();
|
||||
Xmm tmp_xmm = ctx.TempXmmReg(false);
|
||||
ctx.Code().movsx(tmp_reg, src[0]);
|
||||
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
|
||||
EmitInlineF32ToF16(ctx, dest[0], tmp_xmm);
|
||||
}
|
||||
|
||||
void EmitConvertF16S32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = ctx.TempXmmReg(false);
|
||||
ctx.Code().cvtsi2ss(tmp, src[0]);
|
||||
EmitInlineF32ToF16(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertF16S64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = ctx.TempXmmReg(false);
|
||||
ctx.Code().cvtsi2ss(tmp, src[0]);
|
||||
EmitInlineF32ToF16(ctx, dest[0], tmp);
|
||||
}
|
||||
|
||||
void EmitConvertF16U8(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF16S8(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertF16U16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF16S16(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertF16U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF16S32(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertF16U64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF16S64(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertF32S8(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp_reg = ctx.TempGPReg(false).cvt32();
|
||||
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().movsx(tmp_reg, src[0]);
|
||||
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp_xmm);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitConvertF32S16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp_reg = ctx.TempGPReg(false).cvt32();
|
||||
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().movsx(tmp_reg, src[0]);
|
||||
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp_xmm);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitConvertF32S32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().cvtsi2ss(tmp, src[0]);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitConvertF32S64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().cvtsi2ss(tmp, src[0]);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitConvertF32U8(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF32S8(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertF32U16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF32S16(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertF32U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF32S32(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertF32U64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF32S64(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertF64S8(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp_reg = ctx.TempGPReg(false).cvt32();
|
||||
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().movsx(tmp_reg, src[0]);
|
||||
ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp_xmm);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitConvertF64S16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Reg tmp_reg = ctx.TempGPReg(false).cvt32();
|
||||
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().movsx(tmp_reg, src[0]);
|
||||
ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp_xmm);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitConvertF64S32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().cvtsi2sd(tmp, src[0]);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitConvertF64S64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
|
||||
ctx.Code().cvtsi2sd(tmp, src[0]);
|
||||
if (dest[0].isMEM()) {
|
||||
ctx.Code().mov(dest[0], tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitConvertF64U8(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF64S8(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertF64U16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF64S16(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertF64U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF64S32(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertF64U64(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
EmitConvertF64S64(ctx, dest, src);
|
||||
}
|
||||
|
||||
void EmitConvertU16U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
MovGP(ctx, dest[0], src[0]);
|
||||
}
|
||||
|
||||
void EmitConvertU32U16(EmitContext& ctx, const Operands& dest, const Operands& src) {
|
||||
MovGP(ctx, dest[0], src[0]);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -52,8 +52,8 @@ void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id arg0, Id arg1, Id arg2,
|
||||
void EmitBarrier(EmitContext& ctx);
|
||||
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
|
||||
void EmitDeviceMemoryBarrier(EmitContext& ctx);
|
||||
Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg);
|
||||
void EmitSetUserData(EmitContext& ctx, const IR::Value& offset, const IR::Value& data);
|
||||
void EmitGetUserData(EmitContext& ctx, const Operands& dest, IR::ScalarReg reg);
|
||||
void EmitSetUserData(EmitContext& ctx, const Operands& offset, const Operands& value);
|
||||
void EmitGetThreadBitScalarReg(EmitContext& ctx);
|
||||
void EmitSetThreadBitScalarReg(EmitContext& ctx);
|
||||
void EmitGetScalarRegister(EmitContext& ctx);
|
||||
@ -63,30 +63,30 @@ void EmitSetVectorRegister(EmitContext& ctx);
|
||||
void EmitSetGotoVariable(EmitContext& ctx);
|
||||
void EmitGetGotoVariable(EmitContext& ctx);
|
||||
void EmitSetScc(EmitContext& ctx);
|
||||
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst);
|
||||
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index);
|
||||
Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||
Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||
Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||
Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||
Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||
void EmitStoreBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset);
|
||||
void EmitReadConstBuffer(EmitContext& ctx);
|
||||
void EmitLoadBufferU8(EmitContext& ctx);
|
||||
void EmitLoadBufferU16(EmitContext& ctx);
|
||||
void EmitLoadBufferU32(EmitContext& ctx);
|
||||
void EmitLoadBufferU32x2(EmitContext& ctx);
|
||||
void EmitLoadBufferU32x3(EmitContext& ctx);
|
||||
void EmitLoadBufferU32x4(EmitContext& ctx);
|
||||
void EmitLoadBufferF32(EmitContext& ctx);
|
||||
void EmitLoadBufferF32x2(EmitContext& ctx);
|
||||
void EmitLoadBufferF32x3(EmitContext& ctx);
|
||||
void EmitLoadBufferF32x4(EmitContext& ctx);
|
||||
void EmitLoadBufferFormatF32(EmitContext& ctx);
|
||||
void EmitStoreBufferU8(EmitContext& ctx);
|
||||
void EmitStoreBufferU16(EmitContext& ctx);
|
||||
void EmitStoreBufferU32(EmitContext& ctx);
|
||||
void EmitStoreBufferU32x2(EmitContext& ctx);
|
||||
void EmitStoreBufferU32x3(EmitContext& ctx);
|
||||
void EmitStoreBufferU32x4(EmitContext& ctx);
|
||||
void EmitStoreBufferF32(EmitContext& ctx);
|
||||
void EmitStoreBufferF32x2(EmitContext& ctx);
|
||||
void EmitStoreBufferF32x3(EmitContext& ctx);
|
||||
void EmitStoreBufferF32x4(EmitContext& ctx);
|
||||
void EmitStoreBufferFormatF32(EmitContext& ctx);
|
||||
void EmitBufferAtomicIAdd32(EmitContext& ctx);
|
||||
void EmitBufferAtomicSMin32(EmitContext& ctx);
|
||||
void EmitBufferAtomicUMin32(EmitContext& ctx);
|
||||
@ -386,56 +386,56 @@ Id EmitLogicalOr(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitLogicalXor(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitLogicalNot(EmitContext& ctx, Id value);
|
||||
Id EmitConvertS16F16(EmitContext& ctx, Id value);
|
||||
Id EmitConvertS16F32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertS16F64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertS32F16(EmitContext& ctx, Id value);
|
||||
Id EmitConvertS32F32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertS32F64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertS64F16(EmitContext& ctx, Id value);
|
||||
Id EmitConvertS64F32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertS64F64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertU16F16(EmitContext& ctx, Id value);
|
||||
Id EmitConvertU16F32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertU16F64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertU32F16(EmitContext& ctx, Id value);
|
||||
Id EmitConvertU32F32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertU32F64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertU64F16(EmitContext& ctx, Id value);
|
||||
Id EmitConvertU64F32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertU64F64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertU64U32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertU32U64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF16F32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF32F16(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF32F64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF64F32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF16S8(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF16S16(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF16S32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF16S64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF16U8(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF16U16(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF16U32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF16U64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF32S8(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF32S16(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF32S32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF32S64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF32U8(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF32U16(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF32U32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF32U64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF64S8(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF64S16(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF64S32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF64S64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF64U8(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF64U16(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF64U32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF64U64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertU16U32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertU32U16(EmitContext& ctx, Id value);
|
||||
void EmitConvertS16F16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertS16F32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertS16F64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertS32F32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertS32F64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertS64F32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertS64F64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU16F16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU16F32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU16F64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU32F16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU32F32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU32F64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU64F16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU64F32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU64F64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU64U32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU32U64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF16F32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF32F16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF32F64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF64F32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF16S8(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF16S16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF16S32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF16S64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF16U8(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF16U16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF16U32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF16U64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF32S8(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF32S16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF32S32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF32S64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF32U8(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF32U16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF32U32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF32U64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF64S8(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF64S16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF64S32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF64S64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF64U8(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF64U16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF64U32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertF64U64(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU16U32(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
void EmitConvertU32U16(EmitContext& ctx, const Operands& dest, const Operands& src);
|
||||
|
||||
void EmitImageSampleRaw(EmitContext& ctx);
|
||||
void EmitImageSampleImplicitLod(EmitContext& ctx);
|
||||
|
@ -41,6 +41,8 @@ public:
|
||||
[[nodiscard]] Xbyak::Reg64& TempGPReg(bool reserve = true);
|
||||
[[nodiscard]] Xbyak::Xmm& TempXmmReg(bool reserve = true);
|
||||
|
||||
[[nodiscard]] const Xbyak::Reg64& UserData() const {return Xbyak::util::r11;}
|
||||
|
||||
[[nodiscard]] const Operands& Def(IR::Inst* inst);
|
||||
[[nodiscard]] Operands Def(const IR::Value& value);
|
||||
[[nodiscard]] std::optional<std::reference_wrapper<const EmitContext::PhiAssignmentList>>
|
||||
|
@ -193,12 +193,18 @@ void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand
|
||||
|
||||
void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) {
|
||||
CodeGenerator& c = ctx.Code();
|
||||
if (src.isMEM() && dst.isMEM()) {
|
||||
const Reg64& tmp = ctx.TempGPReg(false);
|
||||
Reg tmp = (src.isMEM() && dst.isMEM()) ? ctx.TempGPReg(false).changeBit(dst.getBit()) : dst.getReg();
|
||||
if (src.getBit() == dst.getBit()) {
|
||||
c.mov(tmp, src);
|
||||
c.mov(dst, tmp);
|
||||
} else if (src.getBit() < dst.getBit()) {
|
||||
c.movzx(tmp, src);
|
||||
} else {
|
||||
c.mov(dst, src);
|
||||
Operand src_tmp = src;
|
||||
src_tmp.setBit(dst.getBit());
|
||||
c.mov(tmp, src_tmp);
|
||||
}
|
||||
if (src.isMEM() && dst.isMEM()) {
|
||||
c.mov(dst, tmp);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user