From 4224a95583b8bf5db7f01d1b73040270ae967f79 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Tue, 1 Apr 2025 10:12:23 +0200 Subject: [PATCH] x64 backend core and context --- CMakeLists.txt | 8 +- .../backend/asm_x64/emit_x64.cpp | 153 +++++++- .../backend/asm_x64/emit_x64_condition.cpp | 6 + .../backend/asm_x64/emit_x64_condition.h | 8 + .../backend/asm_x64/x64_emit_context.cpp | 353 ++++++++++++++++++ .../backend/asm_x64/x64_emit_context.h | 113 ++++++ .../backend/asm_x64/x64_utils.cpp | 285 ++++++++++++++ .../backend/asm_x64/x64_utils.h | 23 ++ 8 files changed, 947 insertions(+), 2 deletions(-) create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_condition.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_condition.h create mode 100644 src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/x64_emit_context.h create mode 100644 src/shader_recompiler/backend/asm_x64/x64_utils.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/x64_utils.h diff --git a/CMakeLists.txt b/CMakeLists.txt index f7617d050..a550a7a88 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -899,8 +899,14 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h if (ARCHITECTURE STREQUAL "x86_64") set(SHADER_RECOMPILER ${SHADER_RECOMPILER} + src/shader_recompiler/backend/asm_x64/emit_x64_condition.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_condition.h src/shader_recompiler/backend/asm_x64/emit_x64.cpp - src/shader_recompiler/backend/asm_x64/emit_x64.h) + src/shader_recompiler/backend/asm_x64/emit_x64.h + src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp + src/shader_recompiler/backend/asm_x64/x64_emit_context.h + src/shader_recompiler/backend/asm_x64/x64_utils.cpp + src/shader_recompiler/backend/asm_x64/x64_utils.h) endif() set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64.cpp index d1a95bd0a..d7d284cb8 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64.cpp @@ -1,12 +1,163 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/func_traits.h" #include "shader_recompiler/backend/asm_x64/emit_x64.h" +#include "shader_recompiler/backend/asm_x64/emit_x64_condition.h" +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" +#include "shader_recompiler/backend/asm_x64/x64_utils.h" namespace Shader::Backend::X64 { +using namespace Xbyak; +using namespace Xbyak::util; + +static void TestCondition(EmitContext& ctx, const IR::Inst* ref) { + IR::Value cond = ref->Arg(0); + Operand& op = ctx.Def(cond)[0]; + Reg8 tmp = op.isREG() ? op.getReg().cvt8() : ctx.TempGPReg(false).cvt8(); + if (!op.isREG()) { + ctx.Code().mov(tmp, op); + } + ctx.Code().test(tmp, tmp); +} + +template +ArgType Arg(EmitContext& ctx, const IR::Value& arg) { + if constexpr (std::is_same_v) { + return ctx.Def(arg); + } else if constexpr (std::is_same_v) { + return arg; + } else if constexpr (std::is_same_v) { + return arg.U32(); + } else if constexpr (std::is_same_v) { + return arg.U64(); + } else if constexpr (std::is_same_v) { + return arg.U1(); + } else if constexpr (std::is_same_v) { + return arg.Attribute(); + } else if constexpr (std::is_same_v) { + return arg.ScalarReg(); + } else if constexpr (std::is_same_v) { + return arg.VectorReg(); + } else if constexpr (std::is_same_v) { + return arg.StringLiteral(); + } else if constexpr (std::is_same_v) { + return arg.Patch(); + } + UNREACHABLE(); +} + +template +static void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { + using Traits = Common::FuncTraits; + if constexpr (has_dest) { + if constexpr (is_first_arg_inst) { + func(ctx, inst, ctx.Def(inst), + Arg>(ctx, inst->Arg(I))...); + } else { + func(ctx, ctx.Def(inst), + Arg>(ctx, inst->Arg(I))...); + } + } else { + if constexpr (is_first_arg_inst) { + func(ctx, inst, Arg>(ctx, inst->Arg(I))...); + } else { + func(ctx, Arg>(ctx, inst->Arg(I))...); + } + } +} + +template +static void Invoke(EmitContext& ctx, IR::Inst* inst) { + using Traits = Common::FuncTraits; + static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); + if constexpr (Traits::NUM_ARGS == 1) { + Invoke(ctx, inst, std::make_index_sequence<0>{}); + } else { + using FirstArgType = typename Traits::template ArgType<1>; + static constexpr bool is_first_arg_inst = std::is_same_v; + static constexpr size_t num_inst_args = Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1); + if constexpr (num_inst_args > 0 && has_dest) { + Invoke(ctx, inst, + std::make_index_sequence{}); + } else { + Invoke(ctx, inst, + std::make_index_sequence{}); + } + } +} + +static void EmitInst(EmitContext& ctx, IR::Inst* inst) { + switch (inst->GetOpcode()) { +#define OPCODE(name, result_type, ...) \ + case IR::Opcode::name: \ + Invoke<&Emit##name, IR::Type::result_type != IR::Type::Void>(ctx, inst); +#include "shader_recompiler/ir/opcodes.inc" +#undef OPCODE + } + UNREACHABLE_MSG("Invalid opcode {}", inst->GetOpcode()); +} + +static void Traverse(EmitContext& ctx, const IR::Program& program) { + CodeGenerator& c = ctx.Code(); + for (const IR::AbstractSyntaxNode& node : program.syntax_list) { + ctx.ResetTempRegs(); + switch (node.type) { + case IR::AbstractSyntaxNode::Type::Block: { + IR::Block* block = node.data.block; + c.L(ctx.BlockLabel(block)); + for (IR::Inst& inst : *block) { + } + const auto& phi_assignments = ctx.PhiAssignments(block); + if (phi_assignments) { + for (const auto& [phi, value] : phi_assignments->get()) { + MovValue(ctx, ctx.Def(phi), value); + } + } + break; + } + case IR::AbstractSyntaxNode::Type::If: { + IR::Inst* ref = node.data.if_node.cond.InstRecursive(); + Label& merge = ctx.BlockLabel(node.data.if_node.merge); + TestCondition(ctx, ref); + c.jz(merge); + break; + } + case IR::AbstractSyntaxNode::Type::Repeat: { + IR::Inst* ref = node.data.repeat.cond.InstRecursive(); + Label& loop_header = ctx.BlockLabel(node.data.repeat.loop_header); + TestCondition(ctx, ref); + c.jnz(loop_header); + break; + } + case IR::AbstractSyntaxNode::Type::Break: { + IR::Inst* ref = node.data.break_node.cond.InstRecursive(); + Label& merge = ctx.BlockLabel(node.data.break_node.merge); + TestCondition(ctx, ref); + c.jz(merge); + break; + } + case IR::AbstractSyntaxNode::Type::Return: { + c.jmp(ctx.EndLabel()); + break; + } + case IR::AbstractSyntaxNode::Type::Unreachable: { + c.int3(); + break; + } + case IR::AbstractSyntaxNode::Type::Loop: + case IR::AbstractSyntaxNode::Type::EndIf: + break; + } + } +} + void EmitX64(const IR::Program& program, Xbyak::CodeGenerator& c) { - + EmitContext context(program, c); + Traverse(context, program); + context.Code().L(context.EndLabel()); + context.Epilogue(); } } // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_condition.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_condition.cpp new file mode 100644 index 000000000..046454b6f --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_condition.cpp @@ -0,0 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/asm_x64/emit_x64_condition.h" + +namespace Shader::Backend::X64 {} \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_condition.h b/src/shader_recompiler/backend/asm_x64/emit_x64_condition.h new file mode 100644 index 000000000..16d6093ea --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_condition.h @@ -0,0 +1,8 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" + +namespace Shader::Backend::X64 {} \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp new file mode 100644 index 000000000..1c5d5c103 --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp @@ -0,0 +1,353 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" +#include "shader_recompiler/backend/asm_x64/x64_utils.h" + +using namespace Xbyak; +using namespace Xbyak::util; + +namespace Shader::Backend::X64 { + +EmitContext::EmitContext(const IR::Program& program_, Xbyak::CodeGenerator& code_) + : program(program_), code(code_) { + for (IR::Block* block : program.blocks) { + block_labels[block] = {}; + } + AllocateRegisters(); +} + +Reg64& EmitContext::TempGPReg(bool reserve) { + ASSERT(temp_gp_reg_index < temp_gp_regs.size()); + u64 idx = temp_gp_reg_index; + if (reserve) { + temp_gp_reg_index++; + } + Reg64& reg = temp_gp_regs[idx]; + if (idx > num_scratch_gp_regs && + std::ranges::find(preserved_regs, reg) == preserved_regs.end()) { + preserved_regs.push_back(reg); + code.push(reg); + } + return reg; +} + +Xmm& EmitContext::TempXmmReg(bool reserve) { + ASSERT(temp_xmm_reg_index < temp_xmm_regs.size()); + u64 idx = temp_xmm_reg_index; + if (reserve) { + temp_xmm_reg_index++; + } + Xmm& reg = temp_xmm_regs[idx]; + if (idx > num_scratch_xmm_regs && + std::ranges::find(preserved_regs, reg) == preserved_regs.end()) { + preserved_regs.push_back(reg); + code.sub(rsp, 16); + code.movdqu(ptr[rsp], reg); + } + return reg; +} + +Operands EmitContext::Def(IR::Inst* inst) { + return inst_to_operands.at(inst); +} + +Operands EmitContext::Def(const IR::Value& value) { + if (!value.IsImmediate()) { + return Def(value.InstRecursive()); + } + Operands operands; + Reg64& tmp = TempGPReg(false); + switch (value.Type()) { + case IR::Type::U1: + operands.push_back(TempGPReg().cvt8()); + code.mov(operands.back(), value.U1()); + break; + case IR::Type::U8: + operands.push_back(TempGPReg().cvt8()); + code.mov(operands.back(), value.U8()); + break; + case IR::Type::U16: + operands.push_back(TempGPReg().cvt16()); + code.mov(operands.back(), value.U16()); + break; + case IR::Type::U32: + operands.push_back(TempGPReg().cvt32()); + code.mov(operands.back(), value.U32()); + break; + case IR::Type::F32: { + code.mov(tmp.cvt32(), std::bit_cast(value.F32())); + Xmm& xmm32 = TempXmmReg(); + code.movd(xmm32, tmp.cvt32()); + operands.push_back(xmm32); + break; + } + case IR::Type::U64: + operands.push_back(TempGPReg()); + code.mov(operands.back(), value.U64()); + break; + case IR::Type::F64: { + code.mov(tmp, std::bit_cast(value.F64())); + Xmm& xmm64 = TempXmmReg(); + code.movq(xmm64, tmp); + operands.push_back(xmm64); + break; + } + case IR::Type::ScalarReg: + operands.push_back(TempGPReg().cvt32()); + code.mov(operands.back(), std::bit_cast(value.ScalarReg())); + break; + case IR::Type::VectorReg: + operands.push_back(TempXmmReg().cvt32()); + code.mov(operands.back(), std::bit_cast(value.VectorReg())); + break; + case IR::Type::Attribute: + operands.push_back(TempGPReg()); + code.mov(operands.back(), std::bit_cast(value.Attribute())); + break; + case IR::Type::Patch: + operands.push_back(TempGPReg()); + code.mov(operands.back(), std::bit_cast(value.Patch())); + break; + default: + UNREACHABLE_MSG("Unsupported value type: %s", IR::NameOf(value.Type())); + break; + } + return operands; +} + +std::optional> +EmitContext::PhiAssignments(IR::Block* block) const { + auto it = phi_assignments.find(block); + if (it != phi_assignments.end()) { + return std::cref(it->second); + } + return std::nullopt; +} + +void EmitContext::ResetTempRegs() { + temp_gp_reg_index = 0; + temp_xmm_reg_index = 0; +} + +void EmitContext::Prologue() { + if (inst_stack_space > 0) { + code.sub(rsp, inst_stack_space); + code.mov(r11, rsp); + } +} + +void EmitContext::Epilogue() { + for (auto it = preserved_regs.rbegin(); it != preserved_regs.rend(); ++it) { + Reg& reg = *it; + if (reg.isMMX()) { + code.movdqu(reg.cvt128(), ptr[rsp]); + code.add(rsp, 16); + } else { + code.pop(reg); + } + } + preserved_regs.clear(); + if (inst_stack_space > 0) { + code.add(rsp, inst_stack_space); + } +} + +void EmitContext::SpillInst(RegAllocContext& ctx, const ActiveInstInterval& interval, + ActiveIntervalList& active_intervals) { + const auto get_operand = [&](IR::Inst* inst) -> Operand { + size_t current_sp = inst_stack_space; + if (ctx.free_stack_slots.empty()) { + inst_stack_space += 8; + } else { + current_sp += ctx.free_stack_slots.back(); + ctx.free_stack_slots.pop_back(); + } + switch (GetRegBytesOfType(inst->Type())) { + case 8: + return byte[r11 + current_sp]; + case 16: + return word[r11 + current_sp]; + case 32: + return dword[r11 + current_sp]; + case 64: + return qword[r11 + current_sp]; + default: + UNREACHABLE_MSG("Unsupported register size: %zu", GetRegBytesOfType(inst->Type())); + return {}; + } + }; + auto spill_candidate = std::max_element( + active_intervals.begin(), active_intervals.end(), + [](const ActiveInstInterval& a, const ActiveInstInterval& b) { return a.end < b.end; }); + if (spill_candidate == active_intervals.end() || spill_candidate->end <= interval.start) { + inst_to_operands[interval.inst][interval.component] = get_operand(interval.inst); + ctx.active_spill_intervals.push_back(interval); + } else { + Operands& operands = inst_to_operands[spill_candidate->inst]; + Reg reg = operands[spill_candidate->component].getReg(); + inst_to_operands[interval.inst][interval.component] = + reg.isXMM() ? reg : ResizeRegToType(reg, interval.inst->Type()); + operands[spill_candidate->component] = get_operand(spill_candidate->inst); + ctx.active_spill_intervals.push_back(*spill_candidate); + *spill_candidate = interval; + } +} + +void EmitContext::AdjustInstInterval(InstInterval& interval, const FlatInstList& insts) { + IR::Inst* inst = interval.inst; + size_t dist = std::distance(insts.begin(), std::find(insts.begin(), insts.end(), inst)); + interval.start = dist; + interval.end = dist; + for (const auto& use : inst->Uses()) { + if (use.user->GetOpcode() == IR::Opcode::Phi) { + // We assign the value at the end of the phi block + IR::Inst& last_inst = use.user->PhiBlock(use.operand)->back(); + dist = std::distance(insts.begin(), std::find(insts.begin(), insts.end(), &last_inst)); + interval.start = std::min(interval.start, dist); + interval.end = std::max(interval.end, dist); + } else { + dist = std::distance(insts.begin(), std::find(insts.begin(), insts.end(), use.user)); + interval.end = std::max(interval.end, dist); + } + } + if (inst->GetOpcode() == IR::Opcode::Phi) { + for (size_t i = 0; i < inst->NumArgs(); i++) { + IR::Block* block = inst->PhiBlock(i); + dist = + std::distance(insts.begin(), std::find(insts.begin(), insts.end(), &block->back())); + interval.start = std::min(interval.start, dist); + interval.end = std::max(interval.end, dist); + phi_assignments[block].emplace_back(inst, inst->Arg(i)); + } + } +} + +// Rregister utilization: +// Instruction registers: +// General purpose registers: rcx, rdx, rsi, r8, r9, r10 +// XMM registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6 +// +// Value / temporary registers: +// General purpose registers: rax (scratch), rbx, r12, r13, r14, r15 +// XMM registers: xmm7 (scratch), xmm7 (scratch), xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, +// xmm15 +// +// r11: Stack pointer for spilled instructions +// rdi: User data pointer +// rsp: Stack pointer +// +// If instruction registers are never used, will be used as temporary registers +void EmitContext::AllocateRegisters() { + const std::array initial_gp_inst_regs = {rcx, rdx, rsi, r8, r9, r10}; + const std::array initial_xmm_inst_regs = {xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6}; + const std::array initial_gp_temp_regs = {rax, rbx, r12, r13, r14, r15}; + const std::array initial_xmm_temp_regs = {xmm7, xmm7, xmm8, xmm9, xmm10, + xmm11, xmm12, xmm13, xmm14, xmm15}; + + boost::container::small_vector intervals; + FlatInstList insts; + // We copy insts tot the flat list for faster iteration + for (IR::Block* block : program.blocks) { + insts.reserve(insts.size() + block->size()); + for (IR::Inst& inst : *block) { + insts.push_back(&inst); + } + } + for (IR::Inst* inst : insts) { + if (inst->GetOpcode() == IR::Opcode::ConditionRef || inst->Type() == IR::Type::Void) { + continue; + } + intervals.emplace_back(inst, 0, 0); + AdjustInstInterval(intervals.back(), insts); + } + std::sort(intervals.begin(), intervals.end(), + [](const InstInterval& a, const InstInterval& b) { return a.start < b.start; }); + RegAllocContext ctx; + ctx.free_gp_regs.insert(ctx.free_gp_regs.end(), initial_gp_temp_regs.begin(), + initial_gp_temp_regs.end()); + ctx.free_xmm_regs.insert(ctx.free_xmm_regs.end(), initial_xmm_temp_regs.begin(), + initial_xmm_temp_regs.end()); + boost::container::static_vector unused_gp_inst_regs; + boost::container::static_vector unused_xmm_inst_regs; + unused_gp_inst_regs.insert(unused_gp_inst_regs.end(), ctx.free_gp_regs.begin(), + ctx.free_gp_regs.end()); + unused_xmm_inst_regs.insert(unused_xmm_inst_regs.end(), ctx.free_xmm_regs.begin(), + ctx.free_xmm_regs.end()); + for (const InstInterval& interval : intervals) { + // Free old interval resources + for (auto it = ctx.active_gp_intervals.begin(); it != ctx.active_gp_intervals.end();) { + if (it->end <= interval.start) { + Reg64 reg = inst_to_operands[it->inst][it->component].getReg().cvt64(); + ctx.free_gp_regs.push_back(reg); + it = ctx.active_gp_intervals.erase(it); + } else { + ++it; + } + } + for (auto it = ctx.active_xmm_intervals.begin(); it != ctx.active_xmm_intervals.end();) { + if (it->end <= interval.start) { + Xmm reg = inst_to_operands[it->inst][it->component].getReg().cvt128(); + ctx.free_xmm_regs.push_back(reg); + it = ctx.active_xmm_intervals.erase(it); + } else { + ++it; + } + } + for (auto it = ctx.active_spill_intervals.begin(); + it != ctx.active_spill_intervals.end();) { + if (it->end <= interval.start) { + const Address& addr = inst_to_operands[it->inst][it->component].getAddress(); + ctx.free_stack_slots.push_back(addr.getDisp()); + it = ctx.active_spill_intervals.erase(it); + } else { + ++it; + } + } + u8 num_components = GetNumComponentsOfType(interval.inst->Type()); + bool is_floating = IsFloatingType(interval.inst->Type()); + if (is_floating) { + for (size_t i = 0; i < num_components; ++i) { + ActiveInstInterval active(interval, i); + if (!ctx.free_xmm_regs.empty()) { + Xmm& reg = ctx.free_xmm_regs.back(); + ctx.free_xmm_regs.pop_back(); + inst_to_operands[active.inst][active.component] = reg; + unused_xmm_inst_regs.erase( + std::remove(unused_xmm_inst_regs.begin(), unused_xmm_inst_regs.end(), reg), + unused_xmm_inst_regs.end()); + ctx.active_xmm_intervals.push_back(active); + } else { + SpillInst(ctx, active, ctx.active_xmm_intervals); + } + } + } else { + for (size_t i = 0; i < num_components; ++i) { + ActiveInstInterval active(interval, i); + if (!ctx.free_gp_regs.empty()) { + Reg64& reg = ctx.free_gp_regs.back(); + ctx.free_gp_regs.pop_back(); + inst_to_operands[active.inst][active.component] = + ResizeRegToType(reg, active.inst->Type()); + unused_gp_inst_regs.erase( + std::remove(unused_gp_inst_regs.begin(), unused_gp_inst_regs.end(), reg), + unused_gp_inst_regs.end()); + ctx.active_gp_intervals.push_back(active); + } else { + SpillInst(ctx, active, ctx.active_gp_intervals); + } + } + } + } + temp_gp_regs.insert(temp_gp_regs.end(), unused_gp_inst_regs.begin(), unused_gp_inst_regs.end()); + temp_xmm_regs.insert(temp_xmm_regs.end(), unused_xmm_inst_regs.begin(), + unused_xmm_inst_regs.end()); + num_scratch_gp_regs = unused_gp_inst_regs.size() + 1; // rax is scratch + num_scratch_xmm_regs = unused_xmm_inst_regs.size() + 1; // xmm7 is scratch + temp_gp_regs.insert(temp_gp_regs.end(), initial_gp_inst_regs.begin(), + initial_gp_inst_regs.end()); + temp_xmm_regs.insert(temp_xmm_regs.end(), initial_xmm_inst_regs.begin(), + initial_xmm_inst_regs.end()); +} + +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h new file mode 100644 index 000000000..59e0f2822 --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h @@ -0,0 +1,113 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include +#include "shader_recompiler/ir/program.h" + +namespace Shader::Backend::X64 { + +using Operands = boost::container::static_vector; + +class EmitContext { +public: + static constexpr size_t NumGPRegs = 16; + static constexpr size_t NumXmmRegs = 16; + + using PhiAssignmentList = boost::container::small_vector, 4>; + + EmitContext(const IR::Program& program_, Xbyak::CodeGenerator& code_); + + [[nodiscard]] Xbyak::CodeGenerator& Code() const { + return code; + } + + [[nodiscard]] const IR::Program& Program() const { + return program; + } + + [[nodiscard]] Xbyak::Label& EndLabel() { + return end_label; + } + + [[nodiscard]] Xbyak::Label& BlockLabel(IR::Block* block) { + return block_labels.at(block); + } + + [[nodiscard]] Xbyak::Reg64& TempGPReg(bool reserve = true); + [[nodiscard]] Xbyak::Xmm& TempXmmReg(bool reserve = true); + + [[nodiscard]] Operands Def(IR::Inst* inst); + [[nodiscard]] Operands Def(const IR::Value& value); + [[nodiscard]] std::optional> + PhiAssignments(IR::Block* block) const; + + void ResetTempRegs(); + + void Prologue(); + void Epilogue(); + +private: + struct InstInterval { + IR::Inst* inst; + size_t start; + size_t end; + }; + + struct ActiveInstInterval : InstInterval { + size_t component; + + ActiveInstInterval(const InstInterval& interval, size_t component_) + : InstInterval(interval), component(component_) {} + }; + using ActiveIntervalList = boost::container::small_vector; + + struct RegAllocContext { + boost::container::static_vector free_gp_regs; + boost::container::static_vector free_xmm_regs; + boost::container::small_vector free_stack_slots; + ActiveIntervalList active_gp_intervals; + ActiveIntervalList active_xmm_intervals; + ActiveIntervalList active_spill_intervals; + }; + + using FlatInstList = boost::container::small_vector; + + const IR::Program& program; + Xbyak::CodeGenerator& code; + + // Map of blocks to their phi assignments + boost::container::small_flat_map phi_assignments; + + // Map of instructions to their operands + boost::container::small_flat_map inst_to_operands; + + // Space used for spilled instructions + size_t inst_stack_space = 0; + + // Temporary register allocation + boost::container::static_vector temp_gp_regs; + boost::container::static_vector temp_xmm_regs; + size_t temp_gp_reg_index = 0; + size_t temp_xmm_reg_index = 0; + size_t num_scratch_gp_regs = 0; + size_t num_scratch_xmm_regs = 0; + + // Preseved registers + boost::container::static_vector preserved_regs; + + // Labels + boost::container::small_flat_map block_labels; + Xbyak::Label end_label; + + void SpillInst(RegAllocContext& ctx, const ActiveInstInterval& interval, + ActiveIntervalList& active_intervals); + void AdjustInstInterval(InstInterval& interval, const FlatInstList& insts); + void AllocateRegisters(); +}; + +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/x64_utils.cpp b/src/shader_recompiler/backend/asm_x64/x64_utils.cpp new file mode 100644 index 000000000..90375b9d4 --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/x64_utils.cpp @@ -0,0 +1,285 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/asm_x64/x64_utils.h" + +using namespace Xbyak; +using namespace Xbyak::util; + +namespace Shader::Backend::X64 { + +bool IsFloatingType(IR::Type type) { + // We store F16 on general purpose registers since we don't do + // arithmetic on them + return type == IR::Type::F32 || type == IR::Type::F64; +} + +bool IsConditionalOpcode(IR::Opcode opcode) { + switch (opcode) { + case IR::Opcode::FPOrdEqual32: + case IR::Opcode::FPOrdEqual64: + case IR::Opcode::FPUnordEqual32: + case IR::Opcode::FPUnordEqual64: + case IR::Opcode::FPOrdNotEqual32: + case IR::Opcode::FPOrdNotEqual64: + case IR::Opcode::FPUnordNotEqual32: + case IR::Opcode::FPUnordNotEqual64: + case IR::Opcode::FPOrdLessThan32: + case IR::Opcode::FPOrdLessThan64: + case IR::Opcode::FPUnordLessThan32: + case IR::Opcode::FPUnordLessThan64: + case IR::Opcode::FPOrdGreaterThan32: + case IR::Opcode::FPOrdGreaterThan64: + case IR::Opcode::FPUnordGreaterThan32: + case IR::Opcode::FPUnordGreaterThan64: + case IR::Opcode::FPOrdLessThanEqual32: + case IR::Opcode::FPOrdLessThanEqual64: + case IR::Opcode::FPUnordLessThanEqual32: + case IR::Opcode::FPUnordLessThanEqual64: + case IR::Opcode::FPOrdGreaterThanEqual32: + case IR::Opcode::FPOrdGreaterThanEqual64: + case IR::Opcode::FPUnordGreaterThanEqual32: + case IR::Opcode::FPUnordGreaterThanEqual64: + case IR::Opcode::FPIsNan32: + case IR::Opcode::FPIsNan64: + case IR::Opcode::FPIsInf32: + case IR::Opcode::FPIsInf64: + case IR::Opcode::FPCmpClass32: + case IR::Opcode::SLessThan32: + case IR::Opcode::SLessThan64: + case IR::Opcode::ULessThan32: + case IR::Opcode::ULessThan64: + case IR::Opcode::IEqual32: + case IR::Opcode::IEqual64: + case IR::Opcode::SLessThanEqual: + case IR::Opcode::ULessThanEqual: + case IR::Opcode::SGreaterThan: + case IR::Opcode::UGreaterThan: + case IR::Opcode::INotEqual32: + case IR::Opcode::INotEqual64: + case IR::Opcode::SGreaterThanEqual: + case IR::Opcode::UGreaterThanEqual: + return true; + default: + return false; + } +} + +size_t GetRegBytesOfType(IR::Type type) { + switch (type) { + case IR::Type::U1: + case IR::Type::U8: + return 1; + case IR::Type::U16: + case IR::Type::F16: + case IR::Type::F16x2: + case IR::Type::F16x3: + case IR::Type::F16x4: + return 2; + case IR::Type::U32: + case IR::Type::U32x2: + case IR::Type::U32x3: + case IR::Type::U32x4: + case IR::Type::F32: + case IR::Type::F32x2: + case IR::Type::F32x3: + case IR::Type::F32x4: + case IR::Type::ScalarReg: + case IR::Type::VectorReg: + return 4; + case IR::Type::U64: + case IR::Type::F64: + case IR::Type::F64x2: + case IR::Type::F64x3: + case IR::Type::F64x4: + case IR::Type::Attribute: + case IR::Type::Patch: + return 8; + default: + break; + } + UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(type)); + return 0; +} + +u8 GetNumComponentsOfType(IR::Type type) { + switch (type) { + case IR::Type::U1: + case IR::Type::U8: + case IR::Type::U16: + case IR::Type::F16: + case IR::Type::U32: + case IR::Type::F32: + case IR::Type::U64: + case IR::Type::F64: + case IR::Type::ScalarReg: + case IR::Type::VectorReg: + case IR::Type::Attribute: + case IR::Type::Patch: + return 1; + case IR::Type::U32x2: + case IR::Type::F32x2: + case IR::Type::F16x2: + case IR::Type::F64x2: + return 2; + case IR::Type::U32x3: + case IR::Type::F32x3: + case IR::Type::F16x3: + case IR::Type::F64x3: + return 3; + case IR::Type::U32x4: + case IR::Type::F32x4: + case IR::Type::F16x4: + case IR::Type::F64x4: + return 4; + default: + break; + } + UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(type)); + return 0; +} + +Reg ResizeRegToType(const Reg& reg, IR::Type type) { + ASSERT(reg.getKind() == Operand::Kind::REG); + switch (GetRegBytesOfType(type)) { + case 1: + return reg.cvt8(); + case 2: + return reg.cvt16(); + case 4: + return reg.cvt32(); + case 8: + return reg.cvt64(); + default: + break; + } + UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(type)); + return reg; +} + +void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) { + CodeGenerator& c = ctx.Code(); + if (src.isMEM() && dst.isMEM()) { + Reg tmp = ctx.TempGPReg(false).cvt32(); + c.mov(tmp, src); + c.mov(dst, tmp); + } else if (src.isMEM() && dst.isXMM()) { + c.movss(dst.getReg().cvt128(), src.getAddress()); + } else if (src.isXMM() && dst.isMEM()) { + c.movss(dst.getAddress(), src.getReg().cvt128()); + } else if (src.isXMM() && dst.isXMM()) { + c.movaps(dst.getReg().cvt128(), src.getReg().cvt128()); + } else { + UNREACHABLE_MSG("Unsupported mov float %s %s", src.toString(), dst.toString()); + } +} + +void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) { + CodeGenerator& c = ctx.Code(); + if (src.isMEM() && dst.isMEM()) { + const Reg64& tmp = ctx.TempGPReg(false); + c.mov(tmp, src); + c.mov(dst, tmp); + } else if (src.isMEM() && dst.isXMM()) { + c.movsd(dst.getReg().cvt128(), src.getAddress()); + } else if (src.isXMM() && dst.isMEM()) { + c.movsd(dst.getAddress(), src.getReg().cvt128()); + } else if (src.isXMM() && dst.isXMM()) { + c.movapd(dst.getReg().cvt128(), src.getReg().cvt128()); + } else { + UNREACHABLE_MSG("Unsupported mov double %s %s", src.toString(), dst.toString()); + } +} + +void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) { + CodeGenerator& c = ctx.Code(); + if (src.isMEM() && dst.isMEM()) { + const Reg64& tmp = ctx.TempGPReg(false); + c.mov(tmp, src); + c.mov(dst, tmp); + } else { + c.mov(dst, src); + } +} + +void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src) { + if (!src.IsImmediate()) { + const Operands& src_op = ctx.Def(src); + if (IsFloatingType(src.Type())) { + switch (GetRegBytesOfType(src.Type())) { + case 32: + for (size_t i = 0; i < src_op.size(); i++) { + MovFloat(ctx, dst[i], src_op[i]); + } + break; + case 64: + for (size_t i = 0; i < src_op.size(); i++) { + MovDouble(ctx, dst[i], src_op[i]); + } + break; + default: + UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(src.Type())); + break; + } + } else { + for (size_t i = 0; i < src_op.size(); i++) { + MovGP(ctx, dst[i], src_op[i]); + } + } + } else { + CodeGenerator& c = ctx.Code(); + const bool is_mem = dst[0].isMEM(); + Reg64& tmp = ctx.TempGPReg(false); + switch (src.Type()) { + case IR::Type::U1: + c.mov(is_mem ? tmp.cvt8() : dst[0], src.U1()); + break; + case IR::Type::U8: + c.mov(is_mem ? tmp.cvt8() : dst[0], src.U8()); + break; + case IR::Type::U16: + c.mov(is_mem ? tmp.cvt16() : dst[0], src.U16()); + break; + case IR::Type::U32: + c.mov(is_mem ? tmp.cvt32() : dst[0], src.U32()); + break; + case IR::Type::F32: + c.mov(tmp.cvt32(), std::bit_cast(src.F32())); + if (!is_mem) { + c.movd(dst[0].getReg().cvt128(), tmp.cvt32()); + return; + } + break; + case IR::Type::U64: + c.mov(is_mem ? tmp : dst[0], src.U64()); + break; + case IR::Type::F64: + c.mov(tmp, std::bit_cast(src.F64())); + if (!is_mem) { + c.movq(dst[0].getReg().cvt128(), tmp); + return; + } + break; + case IR::Type::ScalarReg: + c.mov(is_mem ? tmp.cvt32() : dst[0], std::bit_cast(src.ScalarReg())); + break; + case IR::Type::VectorReg: + c.mov(is_mem ? tmp.cvt32() : dst[0], std::bit_cast(src.VectorReg())); + break; + case IR::Type::Attribute: + c.mov(is_mem ? tmp : dst[0], std::bit_cast(src.Attribute())); + break; + case IR::Type::Patch: + c.mov(is_mem ? tmp : dst[0], std::bit_cast(src.Patch())); + break; + default: + UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(src.Type())); + break; + } + if (is_mem) { + c.mov(dst[0], tmp); + } + } +} + +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/x64_utils.h b/src/shader_recompiler/backend/asm_x64/x64_utils.h new file mode 100644 index 000000000..2d665653d --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/x64_utils.h @@ -0,0 +1,23 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" +#include "shader_recompiler/ir/type.h" + +namespace Shader::Backend::X64 { + +bool IsFloatingType(IR::Type type); +bool IsConditionalOpcode(IR::Opcode opcode); +size_t GetRegBytesOfType(IR::Type type); +u8 GetNumComponentsOfType(IR::Type type); +Xbyak::Reg ResizeRegToType(const Xbyak::Reg& reg, IR::Type type); +void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src); +void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src); +void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src); +void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src); + +} // namespace Shader::Backend::X64 \ No newline at end of file