Utils, context, convert and ctx get set

This commit is contained in:
Lander Gallastegi 2025-04-05 02:59:01 +02:00 committed by Lander Gallastegi
parent 2584ec2d76
commit 3d971701db
6 changed files with 629 additions and 501 deletions

View File

@ -904,6 +904,7 @@ if (ARCHITECTURE STREQUAL "x86_64")
src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h
src/shader_recompiler/backend/asm_x64/emit_x64_shared_memory.cpp

View File

@ -3,25 +3,26 @@
#include "shader_recompiler/exception.h"
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
namespace Shader::Backend::X64 {
using namespace Xbyak;
using namespace Xbyak::util;
Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) {
const u32 index = ctx.binding.user_data + ctx.info.ud_mask.Index(reg);
const u32 half = PushData::UdRegsIndex + (index >> 2);
const Id ud_ptr{ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
ctx.push_data_block, ctx.ConstU32(half),
ctx.ConstU32(index & 3))};
const Id ud_reg{ctx.OpLoad(ctx.U32[1], ud_ptr)};
ctx.Name(ud_reg, fmt::format("ud_{}", u32(reg)));
return ud_reg;
void EmitGetUserData(EmitContext& ctx, const Operands& dest, IR::ScalarReg reg) {
const u32 offset = static_cast<u32>(reg) << 2;
Reg& tmp = ctx.TempGPReg();
ctx.Code().lea(tmp, ptr[ctx.UserData() + offset]);
MovGP( ctx, dest[0], ptr[tmp]);
}
void EmitSetUserData(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
void EmitSetUserData(EmitContext& ctx, const Operands& offset, const Operands& value) {
Reg& tmp = ctx.TempGPReg();
ctx.Code().mov(tmp, offset[0]);
ctx.Code().shl(tmp, 2);
ctx.Code().lea(tmp, ptr[ctx.UserData() + tmp]);
MovGP(ctx, ptr[tmp], value[0]);
}
void EmitGetThreadBitScalarReg(EmitContext& ctx) {
@ -56,482 +57,145 @@ void EmitGetGotoVariable(EmitContext&) {
UNREACHABLE_MSG("Unreachable instruction");
}
using BufferAlias = EmitContext::BufferAlias;
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) {
const auto& srt_flatbuf = ctx.buffers.back();
ASSERT(srt_flatbuf.binding >= 0 && srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32];
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.Def(inst->Arg(1)))};
return ctx.OpLoad(ctx.U32[1], ptr);
}
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
const auto& buffer = ctx.buffers[handle];
index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords);
const auto [id, pointer_type] = buffer[BufferAlias::U32];
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
const Id result{ctx.OpLoad(ctx.U32[1], ptr)};
if (Sirit::ValidId(buffer.size_dwords)) {
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer.size_dwords);
return ctx.OpSelect(ctx.U32[1], in_bounds, result, ctx.u32_zero_value);
void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset) {
Reg& tmp = ctx.TempGPReg(false);
ctx.Code().mov(tmp, base[1]);
ctx.Code().shl(tmp, 32);
ctx.Code().or_(tmp, base[0]);
if (offset[0].isMEM()) {
ctx.Code().add(tmp, offset[0]);
} else {
return result;
ctx.Code().lea(tmp, ptr[tmp + offset[0].getReg()]);
}
MovGP(ctx, dest[0], ptr[tmp]);
}
Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
const auto index{rate_idx == 0 ? PushData::Step0Index : PushData::Step1Index};
return ctx.OpLoad(
ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
ctx.push_data_block, ctx.ConstU32(index)));
void EmitReadConstBuffer(EmitContext& ctx) {
throw NotImplementedException("ReadConstBuffer");
}
static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
if (IR::IsPosition(attr)) {
ASSERT(attr == IR::Attribute::Position0);
const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, index, ctx.ConstU32(0u))};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
}
if (IR::IsParam(attr)) {
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
const auto param = ctx.input_params.at(param_id).id;
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
}
UNREACHABLE();
void EmitReadStepRate(EmitContext& ctx) {
throw NotImplementedException("ReadStepRate");
}
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
if (ctx.info.l_stage == LogicalStage::Geometry) {
return EmitGetAttributeForGeometry(ctx, attr, comp, index);
} else if (ctx.info.l_stage == LogicalStage::TessellationControl ||
ctx.info.l_stage == LogicalStage::TessellationEval) {
if (IR::IsTessCoord(attr)) {
const u32 component = attr == IR::Attribute::TessellationEvaluationPointU ? 0 : 1;
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
const auto pointer{
ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))};
return ctx.OpLoad(ctx.F32[1], pointer);
}
UNREACHABLE();
}
if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& param{ctx.input_params.at(index)};
if (param.buffer_handle >= 0) {
const auto step_rate = EmitReadStepRate(ctx, param.id.value);
const auto offset = ctx.OpIAdd(
ctx.U32[1],
ctx.OpIMul(
ctx.U32[1],
ctx.OpUDiv(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id), step_rate),
ctx.ConstU32(param.num_components)),
ctx.ConstU32(comp));
return EmitReadConstBuffer(ctx, param.buffer_handle, offset);
}
Id result;
if (param.is_loaded) {
// Attribute is either default or manually interpolated. The id points to an already
// loaded vector.
result = ctx.OpCompositeExtract(param.component_type, param.id, comp);
} else if (param.num_components > 1) {
// Attribute is a vector and we need to access a specific component.
const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
result = ctx.OpLoad(param.component_type, pointer);
} else {
// Attribute is a single float or interger, simply load it.
result = ctx.OpLoad(param.component_type, param.id);
}
if (param.is_integer) {
result = ctx.OpBitcast(ctx.F32[1], result);
}
return result;
}
switch (attr) {
case IR::Attribute::FragCoord: {
const Id coord = ctx.OpLoad(
ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.frag_coord, ctx.ConstU32(comp)));
if (comp == 3) {
return ctx.OpFDiv(ctx.F32[1], ctx.ConstF32(1.f), coord);
}
return coord;
}
case IR::Attribute::TessellationEvaluationPointU:
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value));
case IR::Attribute::TessellationEvaluationPointV:
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.ConstU32(1U)));
default:
UNREACHABLE_MSG("Read attribute {}", attr);
}
void EmitGetAttribute(EmitContext& ctx) {
throw NotImplementedException("GetAttribute");
}
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
switch (attr) {
case IR::Attribute::VertexId:
return ctx.OpLoad(ctx.U32[1], ctx.vertex_index);
case IR::Attribute::InstanceId:
return ctx.OpLoad(ctx.U32[1], ctx.instance_id);
case IR::Attribute::InstanceId0:
return EmitReadStepRate(ctx, 0);
case IR::Attribute::InstanceId1:
return EmitReadStepRate(ctx, 1);
case IR::Attribute::WorkgroupIndex:
return ctx.workgroup_index_id;
case IR::Attribute::WorkgroupId:
return ctx.OpCompositeExtract(ctx.U32[1], ctx.OpLoad(ctx.U32[3], ctx.workgroup_id), comp);
case IR::Attribute::LocalInvocationId:
return ctx.OpCompositeExtract(ctx.U32[1], ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id),
comp);
case IR::Attribute::IsFrontFace:
return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value,
ctx.u32_zero_value);
case IR::Attribute::PrimitiveId:
return ctx.OpLoad(ctx.U32[1], ctx.primitive_id);
case IR::Attribute::InvocationId:
ASSERT(ctx.info.l_stage == LogicalStage::Geometry ||
ctx.info.l_stage == LogicalStage::TessellationControl);
return ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
case IR::Attribute::PatchVertices:
ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl);
return ctx.OpLoad(ctx.U32[1], ctx.patch_vertices);
case IR::Attribute::PackedHullInvocationInfo: {
ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl);
// [0:8]: patch id within VGT
// [8:12]: output control point id
// But 0:8 should be treated as 0 for attribute addressing purposes
if (ctx.runtime_info.hs_info.IsPassthrough()) {
// Gcn shader would run with 1 thread, but we need to run a thread for
// each output control point.
// If Gcn shader uses this value, we should make sure all threads in the
// Vulkan shader use 0
return ctx.ConstU32(0u);
} else {
const Id invocation_id = ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
return ctx.OpShiftLeftLogical(ctx.U32[1], invocation_id, ctx.ConstU32(8u));
}
}
default:
UNREACHABLE_MSG("Read U32 attribute {}", attr);
}
void EmitGetAttributeU32(EmitContext& ctx) {
throw NotImplementedException("GetAttributeU32");
}
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) {
if (attr == IR::Attribute::Position1) {
LOG_WARNING(Render_Vulkan, "Ignoring pos1 export");
return;
}
const Id pointer{OutputAttrPointer(ctx, attr, element)};
const auto component_type{OutputAttrComponentType(ctx, attr)};
if (component_type.second) {
ctx.OpStore(pointer, ctx.OpBitcast(component_type.first, value));
} else {
ctx.OpStore(pointer, value);
}
void EmitSetAttribute(EmitContext& ctx) {
throw NotImplementedException("SetAttribute");
}
Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index) {
const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.input_attr_array,
vertex_index, attr_index, comp_index));
void EmitGetTessGenericAttribute(EmitContext& ctx) {
throw NotImplementedException("GetTessGenericAttribute");
}
Id EmitReadTcsGenericOuputAttribute(EmitContext& ctx, Id vertex_index, Id attr_index,
Id comp_index) {
const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.output_attr_array,
vertex_index, attr_index, comp_index));
void EmitReadTcsGenericOuputAttribute(EmitContext& ctx) {
throw NotImplementedException("ReadTcsGenericOuputAttribute");
}
void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index) {
// Implied vertex index is invocation_id
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]);
Id pointer =
ctx.OpAccessChain(component_ptr, ctx.output_attr_array,
ctx.OpLoad(ctx.U32[1], ctx.invocation_id), attr_index, comp_index);
ctx.OpStore(pointer, value);
void EmitSetTcsGenericAttribute(EmitContext& ctx) {
throw NotImplementedException("SetTcsGenericAttribute");
}
Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) {
const u32 index{IR::GenericPatchIndex(patch)};
const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))};
const Id type{ctx.l_stage == LogicalStage::TessellationControl ? ctx.output_f32
: ctx.input_f32};
const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)};
return ctx.OpLoad(ctx.F32[1], pointer);
void EmitGetPatch(EmitContext& ctx) {
throw NotImplementedException("GetPatch");
}
void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
const Id pointer{[&] {
if (IR::IsGeneric(patch)) {
const u32 index{IR::GenericPatchIndex(patch)};
const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))};
return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element);
}
switch (patch) {
case IR::Patch::TessellationLodLeft:
case IR::Patch::TessellationLodRight:
case IR::Patch::TessellationLodTop:
case IR::Patch::TessellationLodBottom: {
const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
const Id index_id{ctx.ConstU32(index)};
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id);
}
case IR::Patch::TessellationLodInteriorU:
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner,
ctx.u32_zero_value);
case IR::Patch::TessellationLodInteriorV:
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.ConstU32(1u));
default:
UNREACHABLE_MSG("Patch {}", u32(patch));
}
}()};
ctx.OpStore(pointer, value);
void EmitSetPatch(EmitContext& ctx) {
throw NotImplementedException("SetPatch");
}
template <u32 N>
static Id EmitLoadBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, Id result,
bool is_float) {
if (Sirit::ValidId(buffer_size)) {
// Bounds checking enabled, wrap in a select.
const auto result_type = is_float ? ctx.F32[N] : ctx.U32[N];
auto compare_index = index;
auto zero_value = is_float ? ctx.f32_zero_value : ctx.u32_zero_value;
if (N > 1) {
compare_index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(N - 1));
std::array<Id, N> zero_ids;
zero_ids.fill(zero_value);
zero_value = ctx.ConstantComposite(result_type, zero_ids);
}
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], compare_index, buffer_size);
return ctx.OpSelect(result_type, in_bounds, result, zero_value);
}
// Bounds checking not enabled, just return the plain value.
return result;
void EmitLoadBufferU8(EmitContext& ctx) {
throw NotImplementedException("LoadBufferU8");
}
template <u32 N, BufferAlias alias>
static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
const auto flags = inst->Flags<IR::BufferInstInfo>();
const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32;
const auto [id, pointer_type] = spv_buffer[alias];
boost::container::static_vector<Id, N> ids;
for (u32 i = 0; i < N; i++) {
const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i);
const Id result_i = ctx.OpLoad(data_types[1], ptr_i);
if (!flags.typed) {
// Untyped loads have bounds checking per-component.
ids.push_back(EmitLoadBufferBoundsCheck<1>(ctx, index_i, spv_buffer.size_dwords,
result_i, alias == BufferAlias::F32));
} else {
ids.push_back(result_i);
}
}
const Id result = N == 1 ? ids[0] : ctx.OpCompositeConstruct(data_types[N], ids);
if (flags.typed) {
// Typed loads have single bounds check for the whole load.
return EmitLoadBufferBoundsCheck<N>(ctx, index, spv_buffer.size_dwords, result,
alias == BufferAlias::F32);
}
return result;
void EmitLoadBufferU16(EmitContext& ctx) {
throw NotImplementedException("LoadBufferU16");
}
Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
const auto [id, pointer_type] = spv_buffer[BufferAlias::U8];
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, ptr))};
return EmitLoadBufferBoundsCheck<1>(ctx, address, spv_buffer.size, result, false);
void EmitLoadBufferU32(EmitContext& ctx) {
throw NotImplementedException("LoadBufferU32");
}
Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
const auto [id, pointer_type] = spv_buffer[BufferAlias::U16];
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, ptr))};
return EmitLoadBufferBoundsCheck<1>(ctx, index, spv_buffer.size_shorts, result, false);
void EmitLoadBufferU32x2(EmitContext& ctx) {
throw NotImplementedException("LoadBufferU32x2");
}
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address);
void EmitLoadBufferU32x3(EmitContext& ctx) {
throw NotImplementedException("LoadBufferU32x3");
}
Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address);
void EmitLoadBufferU32x4(EmitContext& ctx) {
throw NotImplementedException("LoadBufferU32x4");
}
Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address);
void EmitLoadBufferF32(EmitContext& ctx) {
throw NotImplementedException("LoadBufferF32");
}
Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address);
void EmitLoadBufferF32x2(EmitContext& ctx) {
throw NotImplementedException("LoadBufferF32x2");
}
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address);
void EmitLoadBufferF32x3(EmitContext& ctx) {
throw NotImplementedException("LoadBufferF32x3");
}
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address);
void EmitLoadBufferF32x4(EmitContext& ctx) {
throw NotImplementedException("LoadBufferF32x4");
}
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address);
void EmitLoadBufferFormatF32(EmitContext& ctx) {
throw NotImplementedException("LoadBufferFormatF32");
}
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address);
void EmitStoreBufferU8(EmitContext& ctx) {
throw NotImplementedException("StoreBufferU8");
}
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
UNREACHABLE_MSG("SPIR-V instruction");
void EmitStoreBufferU16(EmitContext& ctx) {
throw NotImplementedException("StoreBufferU16");
}
template <u32 N>
void EmitStoreBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto emit_func) {
if (Sirit::ValidId(buffer_size)) {
// Bounds checking enabled, wrap in a conditional branch.
auto compare_index = index;
if (N > 1) {
index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(N - 1));
}
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], compare_index, buffer_size);
const Id in_bounds_label = ctx.OpLabel();
const Id merge_label = ctx.OpLabel();
ctx.OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
ctx.OpBranchConditional(in_bounds, in_bounds_label, merge_label);
ctx.AddLabel(in_bounds_label);
emit_func();
ctx.OpBranch(merge_label);
ctx.AddLabel(merge_label);
return;
}
// Bounds checking not enabled, just perform the store.
emit_func();
void EmitStoreBufferU32(EmitContext& ctx) {
throw NotImplementedException("StoreBufferU32");
}
template <u32 N, BufferAlias alias>
static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
Id value) {
const auto flags = inst->Flags<IR::BufferInstInfo>();
const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32;
const auto [id, pointer_type] = spv_buffer[alias];
auto store = [&] {
for (u32 i = 0; i < N; i++) {
const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i);
const Id value_i = N == 1 ? value : ctx.OpCompositeExtract(data_types[1], value, i);
auto store_i = [&]() { ctx.OpStore(ptr_i, value_i); };
if (!flags.typed) {
// Untyped stores have bounds checking per-component.
EmitStoreBufferBoundsCheck<1>(ctx, index_i, spv_buffer.size_dwords, store_i);
} else {
store_i();
}
}
};
if (flags.typed) {
// Typed stores have single bounds check for the whole store.
EmitStoreBufferBoundsCheck<N>(ctx, index, spv_buffer.size_dwords, store);
} else {
store();
}
void EmitStoreBufferU32x2(EmitContext& ctx) {
throw NotImplementedException("StoreBufferU32x2");
}
void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
const auto [id, pointer_type] = spv_buffer[BufferAlias::U8];
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
const Id result{ctx.OpUConvert(ctx.U8, value)};
EmitStoreBufferBoundsCheck<1>(ctx, address, spv_buffer.size, [&] { ctx.OpStore(ptr, result); });
void EmitStoreBufferU32x3(EmitContext& ctx) {
throw NotImplementedException("StoreBufferU32x3");
}
void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
const auto [id, pointer_type] = spv_buffer[BufferAlias::U16];
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
const Id result{ctx.OpUConvert(ctx.U16, value)};
EmitStoreBufferBoundsCheck<1>(ctx, index, spv_buffer.size_shorts,
[&] { ctx.OpStore(ptr, result); });
void EmitStoreBufferU32x4(EmitContext& ctx) {
throw NotImplementedException("StoreBufferU32x4");
}
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address, value);
void EmitStoreBufferF32(EmitContext& ctx) {
throw NotImplementedException("StoreBufferF32");
}
void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address, value);
void EmitStoreBufferF32x2(EmitContext& ctx) {
throw NotImplementedException("StoreBufferF32x2");
}
void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address, value);
void EmitStoreBufferF32x3(EmitContext& ctx) {
throw NotImplementedException("StoreBufferF32x3");
}
void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address, value);
void EmitStoreBufferF32x4(EmitContext& ctx) {
throw NotImplementedException("StoreBufferF32x4");
}
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
UNREACHABLE_MSG("SPIR-V instruction");
void EmitStoreBufferFormatF32(EmitContext& ctx) {
throw NotImplementedException("StoreBufferFormatF32");
}
}

View File

@ -0,0 +1,455 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/exception.h"
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
namespace Shader::Backend::X64 {
using namespace Xbyak;
using namespace Xbyak::util;
namespace {
static void EmitInlineF16ToF32(EmitContext& ctx, const Operand& dest, const Operand& src) {
CodeGenerator& c = ctx.Code();
Label nonzero_exp, zero_mantissa, norm_loop, norm_done, normal, done;
Reg sign = ctx.TempGPReg().cvt32();
Reg exponent = ctx.TempGPReg().cvt32();
Reg mantissa = ctx.TempGPReg().cvt32();
c.movzx(mantissa, src);
// Extract sign, exponent, and mantissa
c.mov(sign, mantissa);
c.and_(sign, 0x8000);
c.shl(sign, 16);
c.mov(exponent, mantissa);
c.and_(exponent, 0x7C00);
c.shr(exponent, 10);
c.and_(mantissa, 0x03FF);
// Check for zero exponent and mantissa
c.test(exponent, exponent);
c.jnz(nonzero_exp);
c.test(mantissa, mantissa);
c.jz(zero_mantissa);
// Nromalize subnormal number
c.mov(exponent, 1);
c.L(norm_loop);
c.test(mantissa, 0x400);
c.jnz(norm_done);
c.shl(mantissa, 1);
c.dec(exponent);
c.jmp(norm_loop);
c.L(norm_done);
c.and_(mantissa, 0x03FF);
c.jmp(normal);
// Zero mantissa
c.L(zero_mantissa);
c.and_(mantissa, sign);
c.jmp(done);
// Non-zero exponent
c.L(nonzero_exp);
c.cmp(exponent, 0x1F);
c.jne(normal);
// Infinite or NaN
c.shl(mantissa, 13);
c.or_(mantissa, sign);
c.or_(mantissa, 0x7F800000);
c.jmp(done);
// Normal number
c.L(normal);
c.add(exponent, 112);
c.shl(exponent, 23);
c.shl(mantissa, 13);
c.or_(mantissa, sign);
c.or_(mantissa, exponent);
c.L(done);
if (dest.isMEM()) {
c.mov(dest, mantissa);
} else {
c.movd(dest.getReg().cvt128(), mantissa);
}
}
static void EmitInlineF32ToF16(EmitContext& ctx, const Operand& dest, const Operand& src) {
CodeGenerator& c = ctx.Code();
Label zero_exp, underflow, overflow, done;
Reg sign = ctx.TempGPReg().cvt32();
Reg exponent = ctx.TempGPReg().cvt32();
Reg mantissa = dest.isMEM() ? ctx.TempGPReg().cvt32() : dest.getReg().cvt32();
if (src.isMEM()) {
c.mov(mantissa, src);
} else {
c.movd(mantissa, src.getReg().cvt128());
}
// Extract sign, exponent, and mantissa
c.mov(exponent, mantissa);
c.mov(sign, mantissa);
c.and_(exponent, 0x7F800000);
c.and_(mantissa, 0x007FFFFF);
c.shr(exponent, 23);
c.shl(mantissa, 3);
c.shr(sign, 16);
c.and_(sign, 0x8000);
// Subnormal numbers will be zero
c.test(exponent, exponent);
c.jz(zero_exp);
// Check for overflow and underflow
c.sub(exponent, 112);
c.cmp(exponent, 0);
c.jle(underflow);
c.cmp(exponent, 0x1F);
c.jge(overflow);
// Normal number
c.shl(exponent, 10);
c.shr(mantissa, 13);
c.or_(mantissa, exponent);
c.or_(mantissa, sign);
c.jmp(done);
// Undeflow
c.L(underflow);
c.xor_(mantissa, mantissa);
c.jmp(done);
// Overflow
c.L(overflow);
c.mov(mantissa, 0x7C00);
c.or_(mantissa, sign);
c.jmp(done);
// Zero value
c.L(zero_exp);
c.and_(mantissa, sign);
c.L(done);
if (dest.isMEM()) {
c.mov(dest, mantissa);
} else {
c.and_(mantissa, 0xFFFF);
}
}
}
void EmitConvertS16F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp_xmm = ctx.TempXmmReg(false);
Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32();
EmitInlineF16ToF32(ctx, tmp_xmm, src[0]);
ctx.Code().cvttss2si(tmp_reg, tmp_xmm);
ctx.Code().and_(tmp_reg, 0xFFFF);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp_reg.cvt16());
}
}
void EmitConvertS16F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32();
ctx.Code().cvttss2si(tmp, src[0]);
ctx.Code().and_(tmp, 0xFFFF);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp.cvt16());
}
}
void EmitConvertS16F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32();
ctx.Code().cvttsd2si(tmp, src[0]);
ctx.Code().and_(tmp, 0xFFFF);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp.cvt16());
}
}
void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp_xmm = ctx.TempXmmReg(false);
Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32();
EmitInlineF16ToF32(ctx, tmp_xmm, src[0]);
ctx.Code().cvttss2si(tmp_reg, tmp_xmm);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp_reg);
}
}
void EmitConvertS32F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32();
ctx.Code().cvttss2si(tmp, src[0]);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp);
}
}
void EmitConvertS32F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32();
ctx.Code().cvttsd2si(tmp, src[0]);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp);
}
}
void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp_xmm = ctx.TempXmmReg(false);
Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg() : dest[0].getReg();
EmitInlineF16ToF32(ctx, tmp_xmm, src[0]);
ctx.Code().cvttss2si(tmp_reg, tmp_xmm);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp_reg);
}
}
void EmitConvertS64F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg();
ctx.Code().cvttss2si(tmp, src[0]);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp);
}
}
void EmitConvertS64F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg();
ctx.Code().cvttsd2si(tmp, src[0]);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp);
}
}
void EmitConvertU16F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertS16F16(ctx, dest, src);
}
void EmitConvertU16F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertS16F32(ctx, dest, src);
}
void EmitConvertU16F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertS16F64(ctx, dest, src);
}
void EmitConvertU32F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertS32F16(ctx, dest, src);
}
void EmitConvertU32F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertS32F32(ctx, dest, src);
}
void EmitConvertU32F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertS32F64(ctx, dest, src);
}
void EmitConvertU64F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertS64F16(ctx, dest, src);
}
void EmitConvertU64F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertS64F32(ctx, dest, src);
}
void EmitConvertU64F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertS64F64(ctx, dest, src);
}
void EmitConvertU64U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
MovGP(ctx, dest[0], src[0]);
}
void EmitConvertU32U64(EmitContext& ctx, const Operands& dest, const Operands& src) {
MovGP(ctx, dest[0], src[0]);
}
void EmitConvertF16F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitInlineF32ToF16(ctx, dest[0], src[0]);
}
void EmitConvertF32F16(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitInlineF16ToF32(ctx, dest[0], src[0]);
}
void EmitConvertF32F64(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().cvtsd2ss(tmp, src[0]);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp);
}
}
void EmitConvertF64F32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().cvtss2sd(tmp, src[0]);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp);
}
}
void EmitConvertF16S8(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32();
Xmm tmp_xmm = ctx.TempXmmReg(false);
ctx.Code().movsx(tmp_reg, src[0]);
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
EmitInlineF32ToF16(ctx, dest[0], tmp_xmm);
}
void EmitConvertF16S16(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32();
Xmm tmp_xmm = ctx.TempXmmReg(false);
ctx.Code().movsx(tmp_reg, src[0]);
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
EmitInlineF32ToF16(ctx, dest[0], tmp_xmm);
}
void EmitConvertF16S32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = ctx.TempXmmReg(false);
ctx.Code().cvtsi2ss(tmp, src[0]);
EmitInlineF32ToF16(ctx, dest[0], tmp);
}
void EmitConvertF16S64(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = ctx.TempXmmReg(false);
ctx.Code().cvtsi2ss(tmp, src[0]);
EmitInlineF32ToF16(ctx, dest[0], tmp);
}
void EmitConvertF16U8(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF16S8(ctx, dest, src);
}
void EmitConvertF16U16(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF16S16(ctx, dest, src);
}
void EmitConvertF16U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF16S32(ctx, dest, src);
}
void EmitConvertF16U64(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF16S64(ctx, dest, src);
}
void EmitConvertF32S8(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp_reg = ctx.TempGPReg(false).cvt32();
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().movsx(tmp_reg, src[0]);
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp_xmm);
}
}
void EmitConvertF32S16(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp_reg = ctx.TempGPReg(false).cvt32();
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().movsx(tmp_reg, src[0]);
ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp_xmm);
}
}
void EmitConvertF32S32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().cvtsi2ss(tmp, src[0]);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp);
}
}
void EmitConvertF32S64(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().cvtsi2ss(tmp, src[0]);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp);
}
}
void EmitConvertF32U8(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF32S8(ctx, dest, src);
}
void EmitConvertF32U16(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF32S16(ctx, dest, src);
}
void EmitConvertF32U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF32S32(ctx, dest, src);
}
void EmitConvertF32U64(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF32S64(ctx, dest, src);
}
void EmitConvertF64S8(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp_reg = ctx.TempGPReg(false).cvt32();
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().movsx(tmp_reg, src[0]);
ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp_xmm);
}
}
void EmitConvertF64S16(EmitContext& ctx, const Operands& dest, const Operands& src) {
Reg tmp_reg = ctx.TempGPReg(false).cvt32();
Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().movsx(tmp_reg, src[0]);
ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp_xmm);
}
}
void EmitConvertF64S32(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().cvtsi2sd(tmp, src[0]);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp);
}
}
void EmitConvertF64S64(EmitContext& ctx, const Operands& dest, const Operands& src) {
Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128();
ctx.Code().cvtsi2sd(tmp, src[0]);
if (dest[0].isMEM()) {
ctx.Code().mov(dest[0], tmp);
}
}
void EmitConvertF64U8(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF64S8(ctx, dest, src);
}
void EmitConvertF64U16(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF64S16(ctx, dest, src);
}
void EmitConvertF64U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF64S32(ctx, dest, src);
}
void EmitConvertF64U64(EmitContext& ctx, const Operands& dest, const Operands& src) {
EmitConvertF64S64(ctx, dest, src);
}
void EmitConvertU16U32(EmitContext& ctx, const Operands& dest, const Operands& src) {
MovGP(ctx, dest[0], src[0]);
}
void EmitConvertU32U16(EmitContext& ctx, const Operands& dest, const Operands& src) {
MovGP(ctx, dest[0], src[0]);
}
}

View File

@ -52,8 +52,8 @@ void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id arg0, Id arg1, Id arg2,
void EmitBarrier(EmitContext& ctx);
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
void EmitDeviceMemoryBarrier(EmitContext& ctx);
Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg);
void EmitSetUserData(EmitContext& ctx, const IR::Value& offset, const IR::Value& data);
void EmitGetUserData(EmitContext& ctx, const Operands& dest, IR::ScalarReg reg);
void EmitSetUserData(EmitContext& ctx, const Operands& offset, const Operands& value);
void EmitGetThreadBitScalarReg(EmitContext& ctx);
void EmitSetThreadBitScalarReg(EmitContext& ctx);
void EmitGetScalarRegister(EmitContext& ctx);
@ -63,30 +63,30 @@ void EmitSetVectorRegister(EmitContext& ctx);
void EmitSetGotoVariable(EmitContext& ctx);
void EmitGetGotoVariable(EmitContext& ctx);
void EmitSetScc(EmitContext& ctx);
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst);
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index);
Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
void EmitStoreBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset);
void EmitReadConstBuffer(EmitContext& ctx);
void EmitLoadBufferU8(EmitContext& ctx);
void EmitLoadBufferU16(EmitContext& ctx);
void EmitLoadBufferU32(EmitContext& ctx);
void EmitLoadBufferU32x2(EmitContext& ctx);
void EmitLoadBufferU32x3(EmitContext& ctx);
void EmitLoadBufferU32x4(EmitContext& ctx);
void EmitLoadBufferF32(EmitContext& ctx);
void EmitLoadBufferF32x2(EmitContext& ctx);
void EmitLoadBufferF32x3(EmitContext& ctx);
void EmitLoadBufferF32x4(EmitContext& ctx);
void EmitLoadBufferFormatF32(EmitContext& ctx);
void EmitStoreBufferU8(EmitContext& ctx);
void EmitStoreBufferU16(EmitContext& ctx);
void EmitStoreBufferU32(EmitContext& ctx);
void EmitStoreBufferU32x2(EmitContext& ctx);
void EmitStoreBufferU32x3(EmitContext& ctx);
void EmitStoreBufferU32x4(EmitContext& ctx);
void EmitStoreBufferF32(EmitContext& ctx);
void EmitStoreBufferF32x2(EmitContext& ctx);
void EmitStoreBufferF32x3(EmitContext& ctx);
void EmitStoreBufferF32x4(EmitContext& ctx);
void EmitStoreBufferFormatF32(EmitContext& ctx);
void EmitBufferAtomicIAdd32(EmitContext& ctx);
void EmitBufferAtomicSMin32(EmitContext& ctx);
void EmitBufferAtomicUMin32(EmitContext& ctx);
@ -386,56 +386,56 @@ Id EmitLogicalOr(EmitContext& ctx, Id a, Id b);
Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b);
Id EmitLogicalXor(EmitContext& ctx, Id a, Id b);
Id EmitLogicalNot(EmitContext& ctx, Id value);
Id EmitConvertS16F16(EmitContext& ctx, Id value);
Id EmitConvertS16F32(EmitContext& ctx, Id value);
Id EmitConvertS16F64(EmitContext& ctx, Id value);
Id EmitConvertS32F16(EmitContext& ctx, Id value);
Id EmitConvertS32F32(EmitContext& ctx, Id value);
Id EmitConvertS32F64(EmitContext& ctx, Id value);
Id EmitConvertS64F16(EmitContext& ctx, Id value);
Id EmitConvertS64F32(EmitContext& ctx, Id value);
Id EmitConvertS64F64(EmitContext& ctx, Id value);
Id EmitConvertU16F16(EmitContext& ctx, Id value);
Id EmitConvertU16F32(EmitContext& ctx, Id value);
Id EmitConvertU16F64(EmitContext& ctx, Id value);
Id EmitConvertU32F16(EmitContext& ctx, Id value);
Id EmitConvertU32F32(EmitContext& ctx, Id value);
Id EmitConvertU32F64(EmitContext& ctx, Id value);
Id EmitConvertU64F16(EmitContext& ctx, Id value);
Id EmitConvertU64F32(EmitContext& ctx, Id value);
Id EmitConvertU64F64(EmitContext& ctx, Id value);
Id EmitConvertU64U32(EmitContext& ctx, Id value);
Id EmitConvertU32U64(EmitContext& ctx, Id value);
Id EmitConvertF16F32(EmitContext& ctx, Id value);
Id EmitConvertF32F16(EmitContext& ctx, Id value);
Id EmitConvertF32F64(EmitContext& ctx, Id value);
Id EmitConvertF64F32(EmitContext& ctx, Id value);
Id EmitConvertF16S8(EmitContext& ctx, Id value);
Id EmitConvertF16S16(EmitContext& ctx, Id value);
Id EmitConvertF16S32(EmitContext& ctx, Id value);
Id EmitConvertF16S64(EmitContext& ctx, Id value);
Id EmitConvertF16U8(EmitContext& ctx, Id value);
Id EmitConvertF16U16(EmitContext& ctx, Id value);
Id EmitConvertF16U32(EmitContext& ctx, Id value);
Id EmitConvertF16U64(EmitContext& ctx, Id value);
Id EmitConvertF32S8(EmitContext& ctx, Id value);
Id EmitConvertF32S16(EmitContext& ctx, Id value);
Id EmitConvertF32S32(EmitContext& ctx, Id value);
Id EmitConvertF32S64(EmitContext& ctx, Id value);
Id EmitConvertF32U8(EmitContext& ctx, Id value);
Id EmitConvertF32U16(EmitContext& ctx, Id value);
Id EmitConvertF32U32(EmitContext& ctx, Id value);
Id EmitConvertF32U64(EmitContext& ctx, Id value);
Id EmitConvertF64S8(EmitContext& ctx, Id value);
Id EmitConvertF64S16(EmitContext& ctx, Id value);
Id EmitConvertF64S32(EmitContext& ctx, Id value);
Id EmitConvertF64S64(EmitContext& ctx, Id value);
Id EmitConvertF64U8(EmitContext& ctx, Id value);
Id EmitConvertF64U16(EmitContext& ctx, Id value);
Id EmitConvertF64U32(EmitContext& ctx, Id value);
Id EmitConvertF64U64(EmitContext& ctx, Id value);
Id EmitConvertU16U32(EmitContext& ctx, Id value);
Id EmitConvertU32U16(EmitContext& ctx, Id value);
void EmitConvertS16F16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertS16F32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertS16F64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertS32F32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertS32F64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertS64F32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertS64F64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU16F16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU16F32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU16F64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU32F16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU32F32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU32F64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU64F16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU64F32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU64F64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU64U32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU32U64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF16F32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF32F16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF32F64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF64F32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF16S8(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF16S16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF16S32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF16S64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF16U8(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF16U16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF16U32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF16U64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF32S8(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF32S16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF32S32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF32S64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF32U8(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF32U16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF32U32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF32U64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF64S8(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF64S16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF64S32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF64S64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF64U8(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF64U16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF64U32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertF64U64(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU16U32(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitConvertU32U16(EmitContext& ctx, const Operands& dest, const Operands& src);
void EmitImageSampleRaw(EmitContext& ctx);
void EmitImageSampleImplicitLod(EmitContext& ctx);

View File

@ -41,6 +41,8 @@ public:
[[nodiscard]] Xbyak::Reg64& TempGPReg(bool reserve = true);
[[nodiscard]] Xbyak::Xmm& TempXmmReg(bool reserve = true);
[[nodiscard]] const Xbyak::Reg64& UserData() const {return Xbyak::util::r11;}
[[nodiscard]] const Operands& Def(IR::Inst* inst);
[[nodiscard]] Operands Def(const IR::Value& value);
[[nodiscard]] std::optional<std::reference_wrapper<const EmitContext::PhiAssignmentList>>

View File

@ -193,12 +193,18 @@ void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand
void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) {
CodeGenerator& c = ctx.Code();
if (src.isMEM() && dst.isMEM()) {
const Reg64& tmp = ctx.TempGPReg(false);
Reg tmp = (src.isMEM() && dst.isMEM()) ? ctx.TempGPReg(false).changeBit(dst.getBit()) : dst.getReg();
if (src.getBit() == dst.getBit()) {
c.mov(tmp, src);
c.mov(dst, tmp);
} else if (src.getBit() < dst.getBit()) {
c.movzx(tmp, src);
} else {
c.mov(dst, src);
Operand src_tmp = src;
src_tmp.setBit(dst.getBit());
c.mov(tmp, src_tmp);
}
if (src.isMEM() && dst.isMEM()) {
c.mov(dst, tmp);
}
}