From 3d971701dbadf27c2517401a29e23078bf4178fa Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sat, 5 Apr 2025 02:59:01 +0200 Subject: [PATCH] Utils, context, convert and ctx get set --- CMakeLists.txt | 1 + .../asm_x64/emit_x64_context_get_set.cpp | 506 +++--------------- .../backend/asm_x64/emit_x64_convert.cpp | 455 ++++++++++++++++ .../backend/asm_x64/emit_x64_instructions.h | 152 +++--- .../backend/asm_x64/x64_emit_context.h | 2 + .../backend/asm_x64/x64_utils.cpp | 14 +- 6 files changed, 629 insertions(+), 501 deletions(-) create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index ddef52f92..c8596f317 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -904,6 +904,7 @@ if (ARCHITECTURE STREQUAL "x86_64") src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h src/shader_recompiler/backend/asm_x64/emit_x64_shared_memory.cpp diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp index 5486f0179..192570d8f 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp @@ -3,25 +3,26 @@ #include "shader_recompiler/exception.h" #include "shader_recompiler/backend/asm_x64/x64_emit_context.h" +#include "shader_recompiler/backend/asm_x64/x64_utils.h" namespace Shader::Backend::X64 { using namespace Xbyak; using namespace Xbyak::util; -Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) { - const u32 index = ctx.binding.user_data + ctx.info.ud_mask.Index(reg); - const u32 half = PushData::UdRegsIndex + (index >> 2); - const Id ud_ptr{ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]), - ctx.push_data_block, ctx.ConstU32(half), - ctx.ConstU32(index & 3))}; - const Id ud_reg{ctx.OpLoad(ctx.U32[1], ud_ptr)}; - ctx.Name(ud_reg, fmt::format("ud_{}", u32(reg))); - return ud_reg; +void EmitGetUserData(EmitContext& ctx, const Operands& dest, IR::ScalarReg reg) { + const u32 offset = static_cast(reg) << 2; + Reg& tmp = ctx.TempGPReg(); + ctx.Code().lea(tmp, ptr[ctx.UserData() + offset]); + MovGP( ctx, dest[0], ptr[tmp]); } -void EmitSetUserData(EmitContext& ctx) { - UNREACHABLE_MSG("Unreachable instruction"); +void EmitSetUserData(EmitContext& ctx, const Operands& offset, const Operands& value) { + Reg& tmp = ctx.TempGPReg(); + ctx.Code().mov(tmp, offset[0]); + ctx.Code().shl(tmp, 2); + ctx.Code().lea(tmp, ptr[ctx.UserData() + tmp]); + MovGP(ctx, ptr[tmp], value[0]); } void EmitGetThreadBitScalarReg(EmitContext& ctx) { @@ -56,482 +57,145 @@ void EmitGetGotoVariable(EmitContext&) { UNREACHABLE_MSG("Unreachable instruction"); } -using BufferAlias = EmitContext::BufferAlias; - -Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) { - const auto& srt_flatbuf = ctx.buffers.back(); - ASSERT(srt_flatbuf.binding >= 0 && srt_flatbuf.buffer_type == BufferType::ReadConstUbo); - const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32]; - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.Def(inst->Arg(1)))}; - return ctx.OpLoad(ctx.U32[1], ptr); -} - -Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) { - const auto& buffer = ctx.buffers[handle]; - index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords); - const auto [id, pointer_type] = buffer[BufferAlias::U32]; - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; - const Id result{ctx.OpLoad(ctx.U32[1], ptr)}; - - if (Sirit::ValidId(buffer.size_dwords)) { - const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer.size_dwords); - return ctx.OpSelect(ctx.U32[1], in_bounds, result, ctx.u32_zero_value); +void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset) { + Reg& tmp = ctx.TempGPReg(false); + ctx.Code().mov(tmp, base[1]); + ctx.Code().shl(tmp, 32); + ctx.Code().or_(tmp, base[0]); + if (offset[0].isMEM()) { + ctx.Code().add(tmp, offset[0]); } else { - return result; + ctx.Code().lea(tmp, ptr[tmp + offset[0].getReg()]); } + MovGP(ctx, dest[0], ptr[tmp]); } -Id EmitReadStepRate(EmitContext& ctx, int rate_idx) { - const auto index{rate_idx == 0 ? PushData::Step0Index : PushData::Step1Index}; - return ctx.OpLoad( - ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]), - ctx.push_data_block, ctx.ConstU32(index))); +void EmitReadConstBuffer(EmitContext& ctx) { + throw NotImplementedException("ReadConstBuffer"); } -static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) { - if (IR::IsPosition(attr)) { - ASSERT(attr == IR::Attribute::Position0); - const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); - const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, index, ctx.ConstU32(0u))}; - const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); - } - - if (IR::IsParam(attr)) { - const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)}; - const auto param = ctx.input_params.at(param_id).id; - const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); - const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)}; - const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); - } - UNREACHABLE(); +void EmitReadStepRate(EmitContext& ctx) { + throw NotImplementedException("ReadStepRate"); } -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) { - if (ctx.info.l_stage == LogicalStage::Geometry) { - return EmitGetAttributeForGeometry(ctx, attr, comp, index); - } else if (ctx.info.l_stage == LogicalStage::TessellationControl || - ctx.info.l_stage == LogicalStage::TessellationEval) { - if (IR::IsTessCoord(attr)) { - const u32 component = attr == IR::Attribute::TessellationEvaluationPointU ? 0 : 1; - const auto component_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); - const auto pointer{ - ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))}; - return ctx.OpLoad(ctx.F32[1], pointer); - } - UNREACHABLE(); - } - - if (IR::IsParam(attr)) { - const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; - const auto& param{ctx.input_params.at(index)}; - if (param.buffer_handle >= 0) { - const auto step_rate = EmitReadStepRate(ctx, param.id.value); - const auto offset = ctx.OpIAdd( - ctx.U32[1], - ctx.OpIMul( - ctx.U32[1], - ctx.OpUDiv(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id), step_rate), - ctx.ConstU32(param.num_components)), - ctx.ConstU32(comp)); - return EmitReadConstBuffer(ctx, param.buffer_handle, offset); - } - - Id result; - if (param.is_loaded) { - // Attribute is either default or manually interpolated. The id points to an already - // loaded vector. - result = ctx.OpCompositeExtract(param.component_type, param.id, comp); - } else if (param.num_components > 1) { - // Attribute is a vector and we need to access a specific component. - const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))}; - result = ctx.OpLoad(param.component_type, pointer); - } else { - // Attribute is a single float or interger, simply load it. - result = ctx.OpLoad(param.component_type, param.id); - } - if (param.is_integer) { - result = ctx.OpBitcast(ctx.F32[1], result); - } - return result; - } - - switch (attr) { - case IR::Attribute::FragCoord: { - const Id coord = ctx.OpLoad( - ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.frag_coord, ctx.ConstU32(comp))); - if (comp == 3) { - return ctx.OpFDiv(ctx.F32[1], ctx.ConstF32(1.f), coord); - } - return coord; - } - case IR::Attribute::TessellationEvaluationPointU: - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value)); - case IR::Attribute::TessellationEvaluationPointV: - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.ConstU32(1U))); - default: - UNREACHABLE_MSG("Read attribute {}", attr); - } +void EmitGetAttribute(EmitContext& ctx) { + throw NotImplementedException("GetAttribute"); } -Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) { - switch (attr) { - case IR::Attribute::VertexId: - return ctx.OpLoad(ctx.U32[1], ctx.vertex_index); - case IR::Attribute::InstanceId: - return ctx.OpLoad(ctx.U32[1], ctx.instance_id); - case IR::Attribute::InstanceId0: - return EmitReadStepRate(ctx, 0); - case IR::Attribute::InstanceId1: - return EmitReadStepRate(ctx, 1); - case IR::Attribute::WorkgroupIndex: - return ctx.workgroup_index_id; - case IR::Attribute::WorkgroupId: - return ctx.OpCompositeExtract(ctx.U32[1], ctx.OpLoad(ctx.U32[3], ctx.workgroup_id), comp); - case IR::Attribute::LocalInvocationId: - return ctx.OpCompositeExtract(ctx.U32[1], ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id), - comp); - case IR::Attribute::IsFrontFace: - return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value, - ctx.u32_zero_value); - case IR::Attribute::PrimitiveId: - return ctx.OpLoad(ctx.U32[1], ctx.primitive_id); - case IR::Attribute::InvocationId: - ASSERT(ctx.info.l_stage == LogicalStage::Geometry || - ctx.info.l_stage == LogicalStage::TessellationControl); - return ctx.OpLoad(ctx.U32[1], ctx.invocation_id); - case IR::Attribute::PatchVertices: - ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl); - return ctx.OpLoad(ctx.U32[1], ctx.patch_vertices); - case IR::Attribute::PackedHullInvocationInfo: { - ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl); - // [0:8]: patch id within VGT - // [8:12]: output control point id - // But 0:8 should be treated as 0 for attribute addressing purposes - if (ctx.runtime_info.hs_info.IsPassthrough()) { - // Gcn shader would run with 1 thread, but we need to run a thread for - // each output control point. - // If Gcn shader uses this value, we should make sure all threads in the - // Vulkan shader use 0 - return ctx.ConstU32(0u); - } else { - const Id invocation_id = ctx.OpLoad(ctx.U32[1], ctx.invocation_id); - return ctx.OpShiftLeftLogical(ctx.U32[1], invocation_id, ctx.ConstU32(8u)); - } - } - default: - UNREACHABLE_MSG("Read U32 attribute {}", attr); - } +void EmitGetAttributeU32(EmitContext& ctx) { + throw NotImplementedException("GetAttributeU32"); } -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) { - if (attr == IR::Attribute::Position1) { - LOG_WARNING(Render_Vulkan, "Ignoring pos1 export"); - return; - } - const Id pointer{OutputAttrPointer(ctx, attr, element)}; - const auto component_type{OutputAttrComponentType(ctx, attr)}; - if (component_type.second) { - ctx.OpStore(pointer, ctx.OpBitcast(component_type.first, value)); - } else { - ctx.OpStore(pointer, value); - } +void EmitSetAttribute(EmitContext& ctx) { + throw NotImplementedException("SetAttribute"); } -Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index) { - const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); - return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.input_attr_array, - vertex_index, attr_index, comp_index)); +void EmitGetTessGenericAttribute(EmitContext& ctx) { + throw NotImplementedException("GetTessGenericAttribute"); } -Id EmitReadTcsGenericOuputAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, - Id comp_index) { - const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]); - return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.output_attr_array, - vertex_index, attr_index, comp_index)); +void EmitReadTcsGenericOuputAttribute(EmitContext& ctx) { + throw NotImplementedException("ReadTcsGenericOuputAttribute"); } -void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index) { - // Implied vertex index is invocation_id - const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]); - Id pointer = - ctx.OpAccessChain(component_ptr, ctx.output_attr_array, - ctx.OpLoad(ctx.U32[1], ctx.invocation_id), attr_index, comp_index); - ctx.OpStore(pointer, value); +void EmitSetTcsGenericAttribute(EmitContext& ctx) { + throw NotImplementedException("SetTcsGenericAttribute"); } -Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) { - const u32 index{IR::GenericPatchIndex(patch)}; - const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))}; - const Id type{ctx.l_stage == LogicalStage::TessellationControl ? ctx.output_f32 - : ctx.input_f32}; - const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)}; - return ctx.OpLoad(ctx.F32[1], pointer); +void EmitGetPatch(EmitContext& ctx) { + throw NotImplementedException("GetPatch"); } -void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { - const Id pointer{[&] { - if (IR::IsGeneric(patch)) { - const u32 index{IR::GenericPatchIndex(patch)}; - const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))}; - return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element); - } - switch (patch) { - case IR::Patch::TessellationLodLeft: - case IR::Patch::TessellationLodRight: - case IR::Patch::TessellationLodTop: - case IR::Patch::TessellationLodBottom: { - const u32 index{static_cast(patch) - u32(IR::Patch::TessellationLodLeft)}; - const Id index_id{ctx.ConstU32(index)}; - return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id); - } - case IR::Patch::TessellationLodInteriorU: - return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, - ctx.u32_zero_value); - case IR::Patch::TessellationLodInteriorV: - return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.ConstU32(1u)); - default: - UNREACHABLE_MSG("Patch {}", u32(patch)); - } - }()}; - ctx.OpStore(pointer, value); +void EmitSetPatch(EmitContext& ctx) { + throw NotImplementedException("SetPatch"); } -template -static Id EmitLoadBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, Id result, - bool is_float) { - if (Sirit::ValidId(buffer_size)) { - // Bounds checking enabled, wrap in a select. - const auto result_type = is_float ? ctx.F32[N] : ctx.U32[N]; - auto compare_index = index; - auto zero_value = is_float ? ctx.f32_zero_value : ctx.u32_zero_value; - if (N > 1) { - compare_index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(N - 1)); - std::array zero_ids; - zero_ids.fill(zero_value); - zero_value = ctx.ConstantComposite(result_type, zero_ids); - } - const Id in_bounds = ctx.OpULessThan(ctx.U1[1], compare_index, buffer_size); - return ctx.OpSelect(result_type, in_bounds, result, zero_value); - } - // Bounds checking not enabled, just return the plain value. - return result; +void EmitLoadBufferU8(EmitContext& ctx) { + throw NotImplementedException("LoadBufferU8"); } -template -static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - const auto flags = inst->Flags(); - const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); - } - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); - const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32; - const auto [id, pointer_type] = spv_buffer[alias]; - - boost::container::static_vector ids; - for (u32 i = 0; i < N; i++) { - const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); - const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i); - const Id result_i = ctx.OpLoad(data_types[1], ptr_i); - if (!flags.typed) { - // Untyped loads have bounds checking per-component. - ids.push_back(EmitLoadBufferBoundsCheck<1>(ctx, index_i, spv_buffer.size_dwords, - result_i, alias == BufferAlias::F32)); - } else { - ids.push_back(result_i); - } - } - - const Id result = N == 1 ? ids[0] : ctx.OpCompositeConstruct(data_types[N], ids); - if (flags.typed) { - // Typed loads have single bounds check for the whole load. - return EmitLoadBufferBoundsCheck(ctx, index, spv_buffer.size_dwords, result, - alias == BufferAlias::F32); - } - return result; +void EmitLoadBufferU16(EmitContext& ctx) { + throw NotImplementedException("LoadBufferU16"); } -Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); - } - const auto [id, pointer_type] = spv_buffer[BufferAlias::U8]; - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)}; - const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, ptr))}; - return EmitLoadBufferBoundsCheck<1>(ctx, address, spv_buffer.size, result, false); +void EmitLoadBufferU32(EmitContext& ctx) { + throw NotImplementedException("LoadBufferU32"); } -Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); - } - const auto [id, pointer_type] = spv_buffer[BufferAlias::U16]; - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u)); - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; - const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, ptr))}; - return EmitLoadBufferBoundsCheck<1>(ctx, index, spv_buffer.size_shorts, result, false); +void EmitLoadBufferU32x2(EmitContext& ctx) { + throw NotImplementedException("LoadBufferU32x2"); } -Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address); +void EmitLoadBufferU32x3(EmitContext& ctx) { + throw NotImplementedException("LoadBufferU32x3"); } -Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address); +void EmitLoadBufferU32x4(EmitContext& ctx) { + throw NotImplementedException("LoadBufferU32x4"); } -Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address); +void EmitLoadBufferF32(EmitContext& ctx) { + throw NotImplementedException("LoadBufferF32"); } -Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address); +void EmitLoadBufferF32x2(EmitContext& ctx) { + throw NotImplementedException("LoadBufferF32x2"); } -Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address); +void EmitLoadBufferF32x3(EmitContext& ctx) { + throw NotImplementedException("LoadBufferF32x3"); } -Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address); +void EmitLoadBufferF32x4(EmitContext& ctx) { + throw NotImplementedException("LoadBufferF32x4"); } -Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address); +void EmitLoadBufferFormatF32(EmitContext& ctx) { + throw NotImplementedException("LoadBufferFormatF32"); } -Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address); +void EmitStoreBufferU8(EmitContext& ctx) { + throw NotImplementedException("StoreBufferU8"); } -Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - UNREACHABLE_MSG("SPIR-V instruction"); +void EmitStoreBufferU16(EmitContext& ctx) { + throw NotImplementedException("StoreBufferU16"); } -template -void EmitStoreBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto emit_func) { - if (Sirit::ValidId(buffer_size)) { - // Bounds checking enabled, wrap in a conditional branch. - auto compare_index = index; - if (N > 1) { - index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(N - 1)); - } - const Id in_bounds = ctx.OpULessThan(ctx.U1[1], compare_index, buffer_size); - const Id in_bounds_label = ctx.OpLabel(); - const Id merge_label = ctx.OpLabel(); - ctx.OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone); - ctx.OpBranchConditional(in_bounds, in_bounds_label, merge_label); - ctx.AddLabel(in_bounds_label); - emit_func(); - ctx.OpBranch(merge_label); - ctx.AddLabel(merge_label); - return; - } - // Bounds checking not enabled, just perform the store. - emit_func(); +void EmitStoreBufferU32(EmitContext& ctx) { + throw NotImplementedException("StoreBufferU32"); } -template -static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, - Id value) { - const auto flags = inst->Flags(); - const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); - } - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); - const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32; - const auto [id, pointer_type] = spv_buffer[alias]; - - auto store = [&] { - for (u32 i = 0; i < N; i++) { - const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); - const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i); - const Id value_i = N == 1 ? value : ctx.OpCompositeExtract(data_types[1], value, i); - auto store_i = [&]() { ctx.OpStore(ptr_i, value_i); }; - if (!flags.typed) { - // Untyped stores have bounds checking per-component. - EmitStoreBufferBoundsCheck<1>(ctx, index_i, spv_buffer.size_dwords, store_i); - } else { - store_i(); - } - } - }; - - if (flags.typed) { - // Typed stores have single bounds check for the whole store. - EmitStoreBufferBoundsCheck(ctx, index, spv_buffer.size_dwords, store); - } else { - store(); - } +void EmitStoreBufferU32x2(EmitContext& ctx) { + throw NotImplementedException("StoreBufferU32x2"); } -void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { - const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); - } - const auto [id, pointer_type] = spv_buffer[BufferAlias::U8]; - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)}; - const Id result{ctx.OpUConvert(ctx.U8, value)}; - EmitStoreBufferBoundsCheck<1>(ctx, address, spv_buffer.size, [&] { ctx.OpStore(ptr, result); }); +void EmitStoreBufferU32x3(EmitContext& ctx) { + throw NotImplementedException("StoreBufferU32x3"); } -void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { - const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); - } - const auto [id, pointer_type] = spv_buffer[BufferAlias::U16]; - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u)); - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; - const Id result{ctx.OpUConvert(ctx.U16, value)}; - EmitStoreBufferBoundsCheck<1>(ctx, index, spv_buffer.size_shorts, - [&] { ctx.OpStore(ptr, result); }); +void EmitStoreBufferU32x4(EmitContext& ctx) { + throw NotImplementedException("StoreBufferU32x4"); } -void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address, value); +void EmitStoreBufferF32(EmitContext& ctx) { + throw NotImplementedException("StoreBufferF32"); } -void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address, value); +void EmitStoreBufferF32x2(EmitContext& ctx) { + throw NotImplementedException("StoreBufferF32x2"); } -void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address, value); +void EmitStoreBufferF32x3(EmitContext& ctx) { + throw NotImplementedException("StoreBufferF32x3"); } -void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address, value); +void EmitStoreBufferF32x4(EmitContext& ctx) { + throw NotImplementedException("StoreBufferF32x4"); } -void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address, value); -} - -void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address, value); -} - -void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address, value); -} - -void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address, value); -} - -void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - UNREACHABLE_MSG("SPIR-V instruction"); +void EmitStoreBufferFormatF32(EmitContext& ctx) { + throw NotImplementedException("StoreBufferFormatF32"); } } \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp new file mode 100644 index 000000000..52726342e --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp @@ -0,0 +1,455 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" +#include "shader_recompiler/backend/asm_x64/x64_utils.h" + +namespace Shader::Backend::X64 { + +using namespace Xbyak; +using namespace Xbyak::util; + +namespace { + +static void EmitInlineF16ToF32(EmitContext& ctx, const Operand& dest, const Operand& src) { + CodeGenerator& c = ctx.Code(); + Label nonzero_exp, zero_mantissa, norm_loop, norm_done, normal, done; + Reg sign = ctx.TempGPReg().cvt32(); + Reg exponent = ctx.TempGPReg().cvt32(); + Reg mantissa = ctx.TempGPReg().cvt32(); + + c.movzx(mantissa, src); + + // Extract sign, exponent, and mantissa + c.mov(sign, mantissa); + c.and_(sign, 0x8000); + c.shl(sign, 16); + c.mov(exponent, mantissa); + c.and_(exponent, 0x7C00); + c.shr(exponent, 10); + c.and_(mantissa, 0x03FF); + + // Check for zero exponent and mantissa + c.test(exponent, exponent); + c.jnz(nonzero_exp); + c.test(mantissa, mantissa); + c.jz(zero_mantissa); + + // Nromalize subnormal number + c.mov(exponent, 1); + c.L(norm_loop); + c.test(mantissa, 0x400); + c.jnz(norm_done); + c.shl(mantissa, 1); + c.dec(exponent); + c.jmp(norm_loop); + c.L(norm_done); + c.and_(mantissa, 0x03FF); + c.jmp(normal); + + // Zero mantissa + c.L(zero_mantissa); + c.and_(mantissa, sign); + c.jmp(done); + + // Non-zero exponent + c.L(nonzero_exp); + c.cmp(exponent, 0x1F); + c.jne(normal); + + // Infinite or NaN + c.shl(mantissa, 13); + c.or_(mantissa, sign); + c.or_(mantissa, 0x7F800000); + c.jmp(done); + + // Normal number + c.L(normal); + c.add(exponent, 112); + c.shl(exponent, 23); + c.shl(mantissa, 13); + c.or_(mantissa, sign); + c.or_(mantissa, exponent); + + c.L(done); + if (dest.isMEM()) { + c.mov(dest, mantissa); + } else { + c.movd(dest.getReg().cvt128(), mantissa); + } +} + +static void EmitInlineF32ToF16(EmitContext& ctx, const Operand& dest, const Operand& src) { + CodeGenerator& c = ctx.Code(); + Label zero_exp, underflow, overflow, done; + Reg sign = ctx.TempGPReg().cvt32(); + Reg exponent = ctx.TempGPReg().cvt32(); + Reg mantissa = dest.isMEM() ? ctx.TempGPReg().cvt32() : dest.getReg().cvt32(); + + if (src.isMEM()) { + c.mov(mantissa, src); + } else { + c.movd(mantissa, src.getReg().cvt128()); + } + + // Extract sign, exponent, and mantissa + c.mov(exponent, mantissa); + c.mov(sign, mantissa); + c.and_(exponent, 0x7F800000); + c.and_(mantissa, 0x007FFFFF); + c.shr(exponent, 23); + c.shl(mantissa, 3); + c.shr(sign, 16); + c.and_(sign, 0x8000); + + // Subnormal numbers will be zero + c.test(exponent, exponent); + c.jz(zero_exp); + + // Check for overflow and underflow + c.sub(exponent, 112); + c.cmp(exponent, 0); + c.jle(underflow); + c.cmp(exponent, 0x1F); + c.jge(overflow); + + // Normal number + c.shl(exponent, 10); + c.shr(mantissa, 13); + c.or_(mantissa, exponent); + c.or_(mantissa, sign); + c.jmp(done); + + // Undeflow + c.L(underflow); + c.xor_(mantissa, mantissa); + c.jmp(done); + + // Overflow + c.L(overflow); + c.mov(mantissa, 0x7C00); + c.or_(mantissa, sign); + c.jmp(done); + + // Zero value + c.L(zero_exp); + c.and_(mantissa, sign); + + c.L(done); + if (dest.isMEM()) { + c.mov(dest, mantissa); + } else { + c.and_(mantissa, 0xFFFF); + } +} + +} + +void EmitConvertS16F16(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp_xmm = ctx.TempXmmReg(false); + Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); + EmitInlineF16ToF32(ctx, tmp_xmm, src[0]); + ctx.Code().cvttss2si(tmp_reg, tmp_xmm); + ctx.Code().and_(tmp_reg, 0xFFFF); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp_reg.cvt16()); + } +} + +void EmitConvertS16F32(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + ctx.Code().cvttss2si(tmp, src[0]); + ctx.Code().and_(tmp, 0xFFFF); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp.cvt16()); + } +} + +void EmitConvertS16F64(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + ctx.Code().cvttsd2si(tmp, src[0]); + ctx.Code().and_(tmp, 0xFFFF); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp.cvt16()); + } +} + +void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp_xmm = ctx.TempXmmReg(false); + Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); + EmitInlineF16ToF32(ctx, tmp_xmm, src[0]); + ctx.Code().cvttss2si(tmp_reg, tmp_xmm); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp_reg); + } +} + +void EmitConvertS32F32(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + ctx.Code().cvttss2si(tmp, src[0]); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp); + } +} + +void EmitConvertS32F64(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + ctx.Code().cvttsd2si(tmp, src[0]); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp); + } +} + +void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp_xmm = ctx.TempXmmReg(false); + Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg() : dest[0].getReg(); + EmitInlineF16ToF32(ctx, tmp_xmm, src[0]); + ctx.Code().cvttss2si(tmp_reg, tmp_xmm); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp_reg); + } +} + +void EmitConvertS64F32(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + ctx.Code().cvttss2si(tmp, src[0]); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp); + } +} + +void EmitConvertS64F64(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + ctx.Code().cvttsd2si(tmp, src[0]); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp); + } +} + +void EmitConvertU16F16(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertS16F16(ctx, dest, src); +} + +void EmitConvertU16F32(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertS16F32(ctx, dest, src); +} + +void EmitConvertU16F64(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertS16F64(ctx, dest, src); +} + +void EmitConvertU32F16(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertS32F16(ctx, dest, src); +} + +void EmitConvertU32F32(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertS32F32(ctx, dest, src); +} + +void EmitConvertU32F64(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertS32F64(ctx, dest, src); +} + +void EmitConvertU64F16(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertS64F16(ctx, dest, src); +} + +void EmitConvertU64F32(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertS64F32(ctx, dest, src); +} + +void EmitConvertU64F64(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertS64F64(ctx, dest, src); +} + +void EmitConvertU64U32(EmitContext& ctx, const Operands& dest, const Operands& src) { + MovGP(ctx, dest[0], src[0]); +} + +void EmitConvertU32U64(EmitContext& ctx, const Operands& dest, const Operands& src) { + MovGP(ctx, dest[0], src[0]); +} + +void EmitConvertF16F32(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitInlineF32ToF16(ctx, dest[0], src[0]); +} + +void EmitConvertF32F16(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitInlineF16ToF32(ctx, dest[0], src[0]); +} + +void EmitConvertF32F64(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().cvtsd2ss(tmp, src[0]); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp); + } +} + +void EmitConvertF64F32(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().cvtss2sd(tmp, src[0]); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp); + } +} + +void EmitConvertF16S8(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); + Xmm tmp_xmm = ctx.TempXmmReg(false); + ctx.Code().movsx(tmp_reg, src[0]); + ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); + EmitInlineF32ToF16(ctx, dest[0], tmp_xmm); +} + +void EmitConvertF16S16(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); + Xmm tmp_xmm = ctx.TempXmmReg(false); + ctx.Code().movsx(tmp_reg, src[0]); + ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); + EmitInlineF32ToF16(ctx, dest[0], tmp_xmm); +} + +void EmitConvertF16S32(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp = ctx.TempXmmReg(false); + ctx.Code().cvtsi2ss(tmp, src[0]); + EmitInlineF32ToF16(ctx, dest[0], tmp); +} + +void EmitConvertF16S64(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp = ctx.TempXmmReg(false); + ctx.Code().cvtsi2ss(tmp, src[0]); + EmitInlineF32ToF16(ctx, dest[0], tmp); +} + +void EmitConvertF16U8(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF16S8(ctx, dest, src); +} + +void EmitConvertF16U16(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF16S16(ctx, dest, src); +} + +void EmitConvertF16U32(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF16S32(ctx, dest, src); +} + +void EmitConvertF16U64(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF16S64(ctx, dest, src); +} + +void EmitConvertF32S8(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp_reg = ctx.TempGPReg(false).cvt32(); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().movsx(tmp_reg, src[0]); + ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp_xmm); + } +} + +void EmitConvertF32S16(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp_reg = ctx.TempGPReg(false).cvt32(); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().movsx(tmp_reg, src[0]); + ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp_xmm); + } +} + +void EmitConvertF32S32(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().cvtsi2ss(tmp, src[0]); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp); + } +} + +void EmitConvertF32S64(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().cvtsi2ss(tmp, src[0]); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp); + } +} + +void EmitConvertF32U8(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF32S8(ctx, dest, src); +} + +void EmitConvertF32U16(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF32S16(ctx, dest, src); +} + +void EmitConvertF32U32(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF32S32(ctx, dest, src); +} + +void EmitConvertF32U64(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF32S64(ctx, dest, src); +} + +void EmitConvertF64S8(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp_reg = ctx.TempGPReg(false).cvt32(); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().movsx(tmp_reg, src[0]); + ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp_xmm); + } +} + +void EmitConvertF64S16(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp_reg = ctx.TempGPReg(false).cvt32(); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().movsx(tmp_reg, src[0]); + ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp_xmm); + } +} + +void EmitConvertF64S32(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().cvtsi2sd(tmp, src[0]); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp); + } +} + +void EmitConvertF64S64(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().cvtsi2sd(tmp, src[0]); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp); + } +} + +void EmitConvertF64U8(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF64S8(ctx, dest, src); +} + +void EmitConvertF64U16(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF64S16(ctx, dest, src); +} + +void EmitConvertF64U32(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF64S32(ctx, dest, src); +} + +void EmitConvertF64U64(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF64S64(ctx, dest, src); +} + +void EmitConvertU16U32(EmitContext& ctx, const Operands& dest, const Operands& src) { + MovGP(ctx, dest[0], src[0]); +} + +void EmitConvertU32U16(EmitContext& ctx, const Operands& dest, const Operands& src) { + MovGP(ctx, dest[0], src[0]); +} + +} + diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h index 6c086553b..48f0facd4 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h @@ -52,8 +52,8 @@ void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id arg0, Id arg1, Id arg2, void EmitBarrier(EmitContext& ctx); void EmitWorkgroupMemoryBarrier(EmitContext& ctx); void EmitDeviceMemoryBarrier(EmitContext& ctx); -Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg); -void EmitSetUserData(EmitContext& ctx, const IR::Value& offset, const IR::Value& data); +void EmitGetUserData(EmitContext& ctx, const Operands& dest, IR::ScalarReg reg); +void EmitSetUserData(EmitContext& ctx, const Operands& offset, const Operands& value); void EmitGetThreadBitScalarReg(EmitContext& ctx); void EmitSetThreadBitScalarReg(EmitContext& ctx); void EmitGetScalarRegister(EmitContext& ctx); @@ -63,30 +63,30 @@ void EmitSetVectorRegister(EmitContext& ctx); void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); void EmitSetScc(EmitContext& ctx); -Id EmitReadConst(EmitContext& ctx, IR::Inst* inst); -Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index); -Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -void EmitStoreBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset); +void EmitReadConstBuffer(EmitContext& ctx); +void EmitLoadBufferU8(EmitContext& ctx); +void EmitLoadBufferU16(EmitContext& ctx); +void EmitLoadBufferU32(EmitContext& ctx); +void EmitLoadBufferU32x2(EmitContext& ctx); +void EmitLoadBufferU32x3(EmitContext& ctx); +void EmitLoadBufferU32x4(EmitContext& ctx); +void EmitLoadBufferF32(EmitContext& ctx); +void EmitLoadBufferF32x2(EmitContext& ctx); +void EmitLoadBufferF32x3(EmitContext& ctx); +void EmitLoadBufferF32x4(EmitContext& ctx); +void EmitLoadBufferFormatF32(EmitContext& ctx); +void EmitStoreBufferU8(EmitContext& ctx); +void EmitStoreBufferU16(EmitContext& ctx); +void EmitStoreBufferU32(EmitContext& ctx); +void EmitStoreBufferU32x2(EmitContext& ctx); +void EmitStoreBufferU32x3(EmitContext& ctx); +void EmitStoreBufferU32x4(EmitContext& ctx); +void EmitStoreBufferF32(EmitContext& ctx); +void EmitStoreBufferF32x2(EmitContext& ctx); +void EmitStoreBufferF32x3(EmitContext& ctx); +void EmitStoreBufferF32x4(EmitContext& ctx); +void EmitStoreBufferFormatF32(EmitContext& ctx); void EmitBufferAtomicIAdd32(EmitContext& ctx); void EmitBufferAtomicSMin32(EmitContext& ctx); void EmitBufferAtomicUMin32(EmitContext& ctx); @@ -386,56 +386,56 @@ Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); Id EmitLogicalNot(EmitContext& ctx, Id value); -Id EmitConvertS16F16(EmitContext& ctx, Id value); -Id EmitConvertS16F32(EmitContext& ctx, Id value); -Id EmitConvertS16F64(EmitContext& ctx, Id value); -Id EmitConvertS32F16(EmitContext& ctx, Id value); -Id EmitConvertS32F32(EmitContext& ctx, Id value); -Id EmitConvertS32F64(EmitContext& ctx, Id value); -Id EmitConvertS64F16(EmitContext& ctx, Id value); -Id EmitConvertS64F32(EmitContext& ctx, Id value); -Id EmitConvertS64F64(EmitContext& ctx, Id value); -Id EmitConvertU16F16(EmitContext& ctx, Id value); -Id EmitConvertU16F32(EmitContext& ctx, Id value); -Id EmitConvertU16F64(EmitContext& ctx, Id value); -Id EmitConvertU32F16(EmitContext& ctx, Id value); -Id EmitConvertU32F32(EmitContext& ctx, Id value); -Id EmitConvertU32F64(EmitContext& ctx, Id value); -Id EmitConvertU64F16(EmitContext& ctx, Id value); -Id EmitConvertU64F32(EmitContext& ctx, Id value); -Id EmitConvertU64F64(EmitContext& ctx, Id value); -Id EmitConvertU64U32(EmitContext& ctx, Id value); -Id EmitConvertU32U64(EmitContext& ctx, Id value); -Id EmitConvertF16F32(EmitContext& ctx, Id value); -Id EmitConvertF32F16(EmitContext& ctx, Id value); -Id EmitConvertF32F64(EmitContext& ctx, Id value); -Id EmitConvertF64F32(EmitContext& ctx, Id value); -Id EmitConvertF16S8(EmitContext& ctx, Id value); -Id EmitConvertF16S16(EmitContext& ctx, Id value); -Id EmitConvertF16S32(EmitContext& ctx, Id value); -Id EmitConvertF16S64(EmitContext& ctx, Id value); -Id EmitConvertF16U8(EmitContext& ctx, Id value); -Id EmitConvertF16U16(EmitContext& ctx, Id value); -Id EmitConvertF16U32(EmitContext& ctx, Id value); -Id EmitConvertF16U64(EmitContext& ctx, Id value); -Id EmitConvertF32S8(EmitContext& ctx, Id value); -Id EmitConvertF32S16(EmitContext& ctx, Id value); -Id EmitConvertF32S32(EmitContext& ctx, Id value); -Id EmitConvertF32S64(EmitContext& ctx, Id value); -Id EmitConvertF32U8(EmitContext& ctx, Id value); -Id EmitConvertF32U16(EmitContext& ctx, Id value); -Id EmitConvertF32U32(EmitContext& ctx, Id value); -Id EmitConvertF32U64(EmitContext& ctx, Id value); -Id EmitConvertF64S8(EmitContext& ctx, Id value); -Id EmitConvertF64S16(EmitContext& ctx, Id value); -Id EmitConvertF64S32(EmitContext& ctx, Id value); -Id EmitConvertF64S64(EmitContext& ctx, Id value); -Id EmitConvertF64U8(EmitContext& ctx, Id value); -Id EmitConvertF64U16(EmitContext& ctx, Id value); -Id EmitConvertF64U32(EmitContext& ctx, Id value); -Id EmitConvertF64U64(EmitContext& ctx, Id value); -Id EmitConvertU16U32(EmitContext& ctx, Id value); -Id EmitConvertU32U16(EmitContext& ctx, Id value); +void EmitConvertS16F16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertS16F32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertS16F64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertS32F32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertS32F64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertS64F32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertS64F64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU16F16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU16F32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU16F64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU32F16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU32F32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU32F64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU64F16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU64F32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU64F64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU64U32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU32U64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF16F32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF32F16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF32F64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF64F32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF16S8(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF16S16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF16S32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF16S64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF16U8(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF16U16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF16U32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF16U64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF32S8(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF32S16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF32S32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF32S64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF32U8(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF32U16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF32U32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF32U64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF64S8(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF64S16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF64S32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF64S64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF64U8(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF64U16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF64U32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF64U64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU16U32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU32U16(EmitContext& ctx, const Operands& dest, const Operands& src); void EmitImageSampleRaw(EmitContext& ctx); void EmitImageSampleImplicitLod(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h index c967f9295..43aebc26a 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h @@ -41,6 +41,8 @@ public: [[nodiscard]] Xbyak::Reg64& TempGPReg(bool reserve = true); [[nodiscard]] Xbyak::Xmm& TempXmmReg(bool reserve = true); + [[nodiscard]] const Xbyak::Reg64& UserData() const {return Xbyak::util::r11;} + [[nodiscard]] const Operands& Def(IR::Inst* inst); [[nodiscard]] Operands Def(const IR::Value& value); [[nodiscard]] std::optional> diff --git a/src/shader_recompiler/backend/asm_x64/x64_utils.cpp b/src/shader_recompiler/backend/asm_x64/x64_utils.cpp index 90375b9d4..7948a41e8 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_utils.cpp +++ b/src/shader_recompiler/backend/asm_x64/x64_utils.cpp @@ -193,12 +193,18 @@ void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) { CodeGenerator& c = ctx.Code(); - if (src.isMEM() && dst.isMEM()) { - const Reg64& tmp = ctx.TempGPReg(false); + Reg tmp = (src.isMEM() && dst.isMEM()) ? ctx.TempGPReg(false).changeBit(dst.getBit()) : dst.getReg(); + if (src.getBit() == dst.getBit()) { c.mov(tmp, src); - c.mov(dst, tmp); + } else if (src.getBit() < dst.getBit()) { + c.movzx(tmp, src); } else { - c.mov(dst, src); + Operand src_tmp = src; + src_tmp.setBit(dst.getBit()); + c.mov(tmp, src_tmp); + } + if (src.isMEM() && dst.isMEM()) { + c.mov(dst, tmp); } }