x64 backend core and context

This commit is contained in:
Lander Gallastegi 2025-04-01 10:12:23 +02:00 committed by Lander Gallastegi
parent 7545ae33b1
commit 4224a95583
8 changed files with 947 additions and 2 deletions

View File

@ -899,8 +899,14 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
if (ARCHITECTURE STREQUAL "x86_64")
set(SHADER_RECOMPILER ${SHADER_RECOMPILER}
src/shader_recompiler/backend/asm_x64/emit_x64_condition.cpp
src/shader_recompiler/backend/asm_x64/emit_x64_condition.h
src/shader_recompiler/backend/asm_x64/emit_x64.cpp
src/shader_recompiler/backend/asm_x64/emit_x64.h)
src/shader_recompiler/backend/asm_x64/emit_x64.h
src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp
src/shader_recompiler/backend/asm_x64/x64_emit_context.h
src/shader_recompiler/backend/asm_x64/x64_utils.cpp
src/shader_recompiler/backend/asm_x64/x64_utils.h)
endif()
set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp

View File

@ -1,12 +1,163 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/func_traits.h"
#include "shader_recompiler/backend/asm_x64/emit_x64.h"
#include "shader_recompiler/backend/asm_x64/emit_x64_condition.h"
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
namespace Shader::Backend::X64 {
void EmitX64(const IR::Program& program, Xbyak::CodeGenerator& c) {
using namespace Xbyak;
using namespace Xbyak::util;
static void TestCondition(EmitContext& ctx, const IR::Inst* ref) {
IR::Value cond = ref->Arg(0);
Operand& op = ctx.Def(cond)[0];
Reg8 tmp = op.isREG() ? op.getReg().cvt8() : ctx.TempGPReg(false).cvt8();
if (!op.isREG()) {
ctx.Code().mov(tmp, op);
}
ctx.Code().test(tmp, tmp);
}
template <typename ArgType>
ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
if constexpr (std::is_same_v<ArgType, Operands>) {
return ctx.Def(arg);
} else if constexpr (std::is_same_v<ArgType, const IR::Value&>) {
return arg;
} else if constexpr (std::is_same_v<ArgType, u32>) {
return arg.U32();
} else if constexpr (std::is_same_v<ArgType, u64>) {
return arg.U64();
} else if constexpr (std::is_same_v<ArgType, bool>) {
return arg.U1();
} else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
return arg.Attribute();
} else if constexpr (std::is_same_v<ArgType, IR::ScalarReg>) {
return arg.ScalarReg();
} else if constexpr (std::is_same_v<ArgType, IR::VectorReg>) {
return arg.VectorReg();
} else if constexpr (std::is_same_v<ArgType, const char*>) {
return arg.StringLiteral();
} else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
return arg.Patch();
}
UNREACHABLE();
}
template <auto func, bool is_first_arg_inst, bool has_dest, size_t... I>
static void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
using Traits = Common::FuncTraits<decltype(func)>;
if constexpr (has_dest) {
if constexpr (is_first_arg_inst) {
func(ctx, inst, ctx.Def(inst),
Arg<typename Traits::template ArgType<I + 3>>(ctx, inst->Arg(I))...);
} else {
func(ctx, ctx.Def(inst),
Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
}
} else {
if constexpr (is_first_arg_inst) {
func(ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
} else {
func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
}
}
}
template <auto func, bool has_dest>
static void Invoke(EmitContext& ctx, IR::Inst* inst) {
using Traits = Common::FuncTraits<decltype(func)>;
static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments");
if constexpr (Traits::NUM_ARGS == 1) {
Invoke<func, false, false>(ctx, inst, std::make_index_sequence<0>{});
} else {
using FirstArgType = typename Traits::template ArgType<1>;
static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst*>;
static constexpr size_t num_inst_args = Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1);
if constexpr (num_inst_args > 0 && has_dest) {
Invoke<func, is_first_arg_inst, true>(ctx, inst,
std::make_index_sequence<num_inst_args - 1>{});
} else {
Invoke<func, is_first_arg_inst, false>(ctx, inst,
std::make_index_sequence<num_inst_args>{});
}
}
}
static void EmitInst(EmitContext& ctx, IR::Inst* inst) {
switch (inst->GetOpcode()) {
#define OPCODE(name, result_type, ...) \
case IR::Opcode::name: \
Invoke<&Emit##name, IR::Type::result_type != IR::Type::Void>(ctx, inst);
#include "shader_recompiler/ir/opcodes.inc"
#undef OPCODE
}
UNREACHABLE_MSG("Invalid opcode {}", inst->GetOpcode());
}
static void Traverse(EmitContext& ctx, const IR::Program& program) {
CodeGenerator& c = ctx.Code();
for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
ctx.ResetTempRegs();
switch (node.type) {
case IR::AbstractSyntaxNode::Type::Block: {
IR::Block* block = node.data.block;
c.L(ctx.BlockLabel(block));
for (IR::Inst& inst : *block) {
}
const auto& phi_assignments = ctx.PhiAssignments(block);
if (phi_assignments) {
for (const auto& [phi, value] : phi_assignments->get()) {
MovValue(ctx, ctx.Def(phi), value);
}
}
break;
}
case IR::AbstractSyntaxNode::Type::If: {
IR::Inst* ref = node.data.if_node.cond.InstRecursive();
Label& merge = ctx.BlockLabel(node.data.if_node.merge);
TestCondition(ctx, ref);
c.jz(merge);
break;
}
case IR::AbstractSyntaxNode::Type::Repeat: {
IR::Inst* ref = node.data.repeat.cond.InstRecursive();
Label& loop_header = ctx.BlockLabel(node.data.repeat.loop_header);
TestCondition(ctx, ref);
c.jnz(loop_header);
break;
}
case IR::AbstractSyntaxNode::Type::Break: {
IR::Inst* ref = node.data.break_node.cond.InstRecursive();
Label& merge = ctx.BlockLabel(node.data.break_node.merge);
TestCondition(ctx, ref);
c.jz(merge);
break;
}
case IR::AbstractSyntaxNode::Type::Return: {
c.jmp(ctx.EndLabel());
break;
}
case IR::AbstractSyntaxNode::Type::Unreachable: {
c.int3();
break;
}
case IR::AbstractSyntaxNode::Type::Loop:
case IR::AbstractSyntaxNode::Type::EndIf:
break;
}
}
}
void EmitX64(const IR::Program& program, Xbyak::CodeGenerator& c) {
EmitContext context(program, c);
Traverse(context, program);
context.Code().L(context.EndLabel());
context.Epilogue();
}
} // namespace Shader::Backend::X64

View File

@ -0,0 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/backend/asm_x64/emit_x64_condition.h"
namespace Shader::Backend::X64 {}

View File

@ -0,0 +1,8 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
namespace Shader::Backend::X64 {}

View File

@ -0,0 +1,353 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
using namespace Xbyak;
using namespace Xbyak::util;
namespace Shader::Backend::X64 {
EmitContext::EmitContext(const IR::Program& program_, Xbyak::CodeGenerator& code_)
: program(program_), code(code_) {
for (IR::Block* block : program.blocks) {
block_labels[block] = {};
}
AllocateRegisters();
}
Reg64& EmitContext::TempGPReg(bool reserve) {
ASSERT(temp_gp_reg_index < temp_gp_regs.size());
u64 idx = temp_gp_reg_index;
if (reserve) {
temp_gp_reg_index++;
}
Reg64& reg = temp_gp_regs[idx];
if (idx > num_scratch_gp_regs &&
std::ranges::find(preserved_regs, reg) == preserved_regs.end()) {
preserved_regs.push_back(reg);
code.push(reg);
}
return reg;
}
Xmm& EmitContext::TempXmmReg(bool reserve) {
ASSERT(temp_xmm_reg_index < temp_xmm_regs.size());
u64 idx = temp_xmm_reg_index;
if (reserve) {
temp_xmm_reg_index++;
}
Xmm& reg = temp_xmm_regs[idx];
if (idx > num_scratch_xmm_regs &&
std::ranges::find(preserved_regs, reg) == preserved_regs.end()) {
preserved_regs.push_back(reg);
code.sub(rsp, 16);
code.movdqu(ptr[rsp], reg);
}
return reg;
}
Operands EmitContext::Def(IR::Inst* inst) {
return inst_to_operands.at(inst);
}
Operands EmitContext::Def(const IR::Value& value) {
if (!value.IsImmediate()) {
return Def(value.InstRecursive());
}
Operands operands;
Reg64& tmp = TempGPReg(false);
switch (value.Type()) {
case IR::Type::U1:
operands.push_back(TempGPReg().cvt8());
code.mov(operands.back(), value.U1());
break;
case IR::Type::U8:
operands.push_back(TempGPReg().cvt8());
code.mov(operands.back(), value.U8());
break;
case IR::Type::U16:
operands.push_back(TempGPReg().cvt16());
code.mov(operands.back(), value.U16());
break;
case IR::Type::U32:
operands.push_back(TempGPReg().cvt32());
code.mov(operands.back(), value.U32());
break;
case IR::Type::F32: {
code.mov(tmp.cvt32(), std::bit_cast<u32>(value.F32()));
Xmm& xmm32 = TempXmmReg();
code.movd(xmm32, tmp.cvt32());
operands.push_back(xmm32);
break;
}
case IR::Type::U64:
operands.push_back(TempGPReg());
code.mov(operands.back(), value.U64());
break;
case IR::Type::F64: {
code.mov(tmp, std::bit_cast<u64>(value.F64()));
Xmm& xmm64 = TempXmmReg();
code.movq(xmm64, tmp);
operands.push_back(xmm64);
break;
}
case IR::Type::ScalarReg:
operands.push_back(TempGPReg().cvt32());
code.mov(operands.back(), std::bit_cast<u32>(value.ScalarReg()));
break;
case IR::Type::VectorReg:
operands.push_back(TempXmmReg().cvt32());
code.mov(operands.back(), std::bit_cast<u32>(value.VectorReg()));
break;
case IR::Type::Attribute:
operands.push_back(TempGPReg());
code.mov(operands.back(), std::bit_cast<u64>(value.Attribute()));
break;
case IR::Type::Patch:
operands.push_back(TempGPReg());
code.mov(operands.back(), std::bit_cast<u64>(value.Patch()));
break;
default:
UNREACHABLE_MSG("Unsupported value type: %s", IR::NameOf(value.Type()));
break;
}
return operands;
}
std::optional<std::reference_wrapper<const EmitContext::PhiAssignmentList>>
EmitContext::PhiAssignments(IR::Block* block) const {
auto it = phi_assignments.find(block);
if (it != phi_assignments.end()) {
return std::cref(it->second);
}
return std::nullopt;
}
void EmitContext::ResetTempRegs() {
temp_gp_reg_index = 0;
temp_xmm_reg_index = 0;
}
void EmitContext::Prologue() {
if (inst_stack_space > 0) {
code.sub(rsp, inst_stack_space);
code.mov(r11, rsp);
}
}
void EmitContext::Epilogue() {
for (auto it = preserved_regs.rbegin(); it != preserved_regs.rend(); ++it) {
Reg& reg = *it;
if (reg.isMMX()) {
code.movdqu(reg.cvt128(), ptr[rsp]);
code.add(rsp, 16);
} else {
code.pop(reg);
}
}
preserved_regs.clear();
if (inst_stack_space > 0) {
code.add(rsp, inst_stack_space);
}
}
void EmitContext::SpillInst(RegAllocContext& ctx, const ActiveInstInterval& interval,
ActiveIntervalList& active_intervals) {
const auto get_operand = [&](IR::Inst* inst) -> Operand {
size_t current_sp = inst_stack_space;
if (ctx.free_stack_slots.empty()) {
inst_stack_space += 8;
} else {
current_sp += ctx.free_stack_slots.back();
ctx.free_stack_slots.pop_back();
}
switch (GetRegBytesOfType(inst->Type())) {
case 8:
return byte[r11 + current_sp];
case 16:
return word[r11 + current_sp];
case 32:
return dword[r11 + current_sp];
case 64:
return qword[r11 + current_sp];
default:
UNREACHABLE_MSG("Unsupported register size: %zu", GetRegBytesOfType(inst->Type()));
return {};
}
};
auto spill_candidate = std::max_element(
active_intervals.begin(), active_intervals.end(),
[](const ActiveInstInterval& a, const ActiveInstInterval& b) { return a.end < b.end; });
if (spill_candidate == active_intervals.end() || spill_candidate->end <= interval.start) {
inst_to_operands[interval.inst][interval.component] = get_operand(interval.inst);
ctx.active_spill_intervals.push_back(interval);
} else {
Operands& operands = inst_to_operands[spill_candidate->inst];
Reg reg = operands[spill_candidate->component].getReg();
inst_to_operands[interval.inst][interval.component] =
reg.isXMM() ? reg : ResizeRegToType(reg, interval.inst->Type());
operands[spill_candidate->component] = get_operand(spill_candidate->inst);
ctx.active_spill_intervals.push_back(*spill_candidate);
*spill_candidate = interval;
}
}
void EmitContext::AdjustInstInterval(InstInterval& interval, const FlatInstList& insts) {
IR::Inst* inst = interval.inst;
size_t dist = std::distance(insts.begin(), std::find(insts.begin(), insts.end(), inst));
interval.start = dist;
interval.end = dist;
for (const auto& use : inst->Uses()) {
if (use.user->GetOpcode() == IR::Opcode::Phi) {
// We assign the value at the end of the phi block
IR::Inst& last_inst = use.user->PhiBlock(use.operand)->back();
dist = std::distance(insts.begin(), std::find(insts.begin(), insts.end(), &last_inst));
interval.start = std::min(interval.start, dist);
interval.end = std::max(interval.end, dist);
} else {
dist = std::distance(insts.begin(), std::find(insts.begin(), insts.end(), use.user));
interval.end = std::max(interval.end, dist);
}
}
if (inst->GetOpcode() == IR::Opcode::Phi) {
for (size_t i = 0; i < inst->NumArgs(); i++) {
IR::Block* block = inst->PhiBlock(i);
dist =
std::distance(insts.begin(), std::find(insts.begin(), insts.end(), &block->back()));
interval.start = std::min(interval.start, dist);
interval.end = std::max(interval.end, dist);
phi_assignments[block].emplace_back(inst, inst->Arg(i));
}
}
}
// Rregister utilization:
// Instruction registers:
// General purpose registers: rcx, rdx, rsi, r8, r9, r10
// XMM registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6
//
// Value / temporary registers:
// General purpose registers: rax (scratch), rbx, r12, r13, r14, r15
// XMM registers: xmm7 (scratch), xmm7 (scratch), xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14,
// xmm15
//
// r11: Stack pointer for spilled instructions
// rdi: User data pointer
// rsp: Stack pointer
//
// If instruction registers are never used, will be used as temporary registers
void EmitContext::AllocateRegisters() {
const std::array<Reg64, 6> initial_gp_inst_regs = {rcx, rdx, rsi, r8, r9, r10};
const std::array<Xmm, 7> initial_xmm_inst_regs = {xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6};
const std::array<Reg64, 6> initial_gp_temp_regs = {rax, rbx, r12, r13, r14, r15};
const std::array<Xmm, 10> initial_xmm_temp_regs = {xmm7, xmm7, xmm8, xmm9, xmm10,
xmm11, xmm12, xmm13, xmm14, xmm15};
boost::container::small_vector<InstInterval, 64> intervals;
FlatInstList insts;
// We copy insts tot the flat list for faster iteration
for (IR::Block* block : program.blocks) {
insts.reserve(insts.size() + block->size());
for (IR::Inst& inst : *block) {
insts.push_back(&inst);
}
}
for (IR::Inst* inst : insts) {
if (inst->GetOpcode() == IR::Opcode::ConditionRef || inst->Type() == IR::Type::Void) {
continue;
}
intervals.emplace_back(inst, 0, 0);
AdjustInstInterval(intervals.back(), insts);
}
std::sort(intervals.begin(), intervals.end(),
[](const InstInterval& a, const InstInterval& b) { return a.start < b.start; });
RegAllocContext ctx;
ctx.free_gp_regs.insert(ctx.free_gp_regs.end(), initial_gp_temp_regs.begin(),
initial_gp_temp_regs.end());
ctx.free_xmm_regs.insert(ctx.free_xmm_regs.end(), initial_xmm_temp_regs.begin(),
initial_xmm_temp_regs.end());
boost::container::static_vector<Reg64, 6> unused_gp_inst_regs;
boost::container::static_vector<Xmm, 7> unused_xmm_inst_regs;
unused_gp_inst_regs.insert(unused_gp_inst_regs.end(), ctx.free_gp_regs.begin(),
ctx.free_gp_regs.end());
unused_xmm_inst_regs.insert(unused_xmm_inst_regs.end(), ctx.free_xmm_regs.begin(),
ctx.free_xmm_regs.end());
for (const InstInterval& interval : intervals) {
// Free old interval resources
for (auto it = ctx.active_gp_intervals.begin(); it != ctx.active_gp_intervals.end();) {
if (it->end <= interval.start) {
Reg64 reg = inst_to_operands[it->inst][it->component].getReg().cvt64();
ctx.free_gp_regs.push_back(reg);
it = ctx.active_gp_intervals.erase(it);
} else {
++it;
}
}
for (auto it = ctx.active_xmm_intervals.begin(); it != ctx.active_xmm_intervals.end();) {
if (it->end <= interval.start) {
Xmm reg = inst_to_operands[it->inst][it->component].getReg().cvt128();
ctx.free_xmm_regs.push_back(reg);
it = ctx.active_xmm_intervals.erase(it);
} else {
++it;
}
}
for (auto it = ctx.active_spill_intervals.begin();
it != ctx.active_spill_intervals.end();) {
if (it->end <= interval.start) {
const Address& addr = inst_to_operands[it->inst][it->component].getAddress();
ctx.free_stack_slots.push_back(addr.getDisp());
it = ctx.active_spill_intervals.erase(it);
} else {
++it;
}
}
u8 num_components = GetNumComponentsOfType(interval.inst->Type());
bool is_floating = IsFloatingType(interval.inst->Type());
if (is_floating) {
for (size_t i = 0; i < num_components; ++i) {
ActiveInstInterval active(interval, i);
if (!ctx.free_xmm_regs.empty()) {
Xmm& reg = ctx.free_xmm_regs.back();
ctx.free_xmm_regs.pop_back();
inst_to_operands[active.inst][active.component] = reg;
unused_xmm_inst_regs.erase(
std::remove(unused_xmm_inst_regs.begin(), unused_xmm_inst_regs.end(), reg),
unused_xmm_inst_regs.end());
ctx.active_xmm_intervals.push_back(active);
} else {
SpillInst(ctx, active, ctx.active_xmm_intervals);
}
}
} else {
for (size_t i = 0; i < num_components; ++i) {
ActiveInstInterval active(interval, i);
if (!ctx.free_gp_regs.empty()) {
Reg64& reg = ctx.free_gp_regs.back();
ctx.free_gp_regs.pop_back();
inst_to_operands[active.inst][active.component] =
ResizeRegToType(reg, active.inst->Type());
unused_gp_inst_regs.erase(
std::remove(unused_gp_inst_regs.begin(), unused_gp_inst_regs.end(), reg),
unused_gp_inst_regs.end());
ctx.active_gp_intervals.push_back(active);
} else {
SpillInst(ctx, active, ctx.active_gp_intervals);
}
}
}
}
temp_gp_regs.insert(temp_gp_regs.end(), unused_gp_inst_regs.begin(), unused_gp_inst_regs.end());
temp_xmm_regs.insert(temp_xmm_regs.end(), unused_xmm_inst_regs.begin(),
unused_xmm_inst_regs.end());
num_scratch_gp_regs = unused_gp_inst_regs.size() + 1; // rax is scratch
num_scratch_xmm_regs = unused_xmm_inst_regs.size() + 1; // xmm7 is scratch
temp_gp_regs.insert(temp_gp_regs.end(), initial_gp_inst_regs.begin(),
initial_gp_inst_regs.end());
temp_xmm_regs.insert(temp_xmm_regs.end(), initial_xmm_inst_regs.begin(),
initial_xmm_inst_regs.end());
}
} // namespace Shader::Backend::X64

View File

@ -0,0 +1,113 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <boost/container/flat_map.hpp>
#include <boost/container/small_vector.hpp>
#include <xbyak/xbyak.h>
#include <xbyak/xbyak_util.h>
#include "shader_recompiler/ir/program.h"
namespace Shader::Backend::X64 {
using Operands = boost::container::static_vector<Xbyak::Operand, 4>;
class EmitContext {
public:
static constexpr size_t NumGPRegs = 16;
static constexpr size_t NumXmmRegs = 16;
using PhiAssignmentList = boost::container::small_vector<std::pair<IR::Inst*, IR::Value>, 4>;
EmitContext(const IR::Program& program_, Xbyak::CodeGenerator& code_);
[[nodiscard]] Xbyak::CodeGenerator& Code() const {
return code;
}
[[nodiscard]] const IR::Program& Program() const {
return program;
}
[[nodiscard]] Xbyak::Label& EndLabel() {
return end_label;
}
[[nodiscard]] Xbyak::Label& BlockLabel(IR::Block* block) {
return block_labels.at(block);
}
[[nodiscard]] Xbyak::Reg64& TempGPReg(bool reserve = true);
[[nodiscard]] Xbyak::Xmm& TempXmmReg(bool reserve = true);
[[nodiscard]] Operands Def(IR::Inst* inst);
[[nodiscard]] Operands Def(const IR::Value& value);
[[nodiscard]] std::optional<std::reference_wrapper<const EmitContext::PhiAssignmentList>>
PhiAssignments(IR::Block* block) const;
void ResetTempRegs();
void Prologue();
void Epilogue();
private:
struct InstInterval {
IR::Inst* inst;
size_t start;
size_t end;
};
struct ActiveInstInterval : InstInterval {
size_t component;
ActiveInstInterval(const InstInterval& interval, size_t component_)
: InstInterval(interval), component(component_) {}
};
using ActiveIntervalList = boost::container::small_vector<ActiveInstInterval, 8>;
struct RegAllocContext {
boost::container::static_vector<Xbyak::Reg64, NumGPRegs> free_gp_regs;
boost::container::static_vector<Xbyak::Xmm, NumXmmRegs> free_xmm_regs;
boost::container::small_vector<size_t, 8> free_stack_slots;
ActiveIntervalList active_gp_intervals;
ActiveIntervalList active_xmm_intervals;
ActiveIntervalList active_spill_intervals;
};
using FlatInstList = boost::container::small_vector<IR::Inst*, 64>;
const IR::Program& program;
Xbyak::CodeGenerator& code;
// Map of blocks to their phi assignments
boost::container::small_flat_map<IR::Block*, PhiAssignmentList, 8> phi_assignments;
// Map of instructions to their operands
boost::container::small_flat_map<IR::Inst*, Operands, 64> inst_to_operands;
// Space used for spilled instructions
size_t inst_stack_space = 0;
// Temporary register allocation
boost::container::static_vector<Xbyak::Reg64, NumGPRegs> temp_gp_regs;
boost::container::static_vector<Xbyak::Xmm, NumXmmRegs> temp_xmm_regs;
size_t temp_gp_reg_index = 0;
size_t temp_xmm_reg_index = 0;
size_t num_scratch_gp_regs = 0;
size_t num_scratch_xmm_regs = 0;
// Preseved registers
boost::container::static_vector<Xbyak::Reg, NumGPRegs + NumXmmRegs> preserved_regs;
// Labels
boost::container::small_flat_map<IR::Block*, Xbyak::Label, 8> block_labels;
Xbyak::Label end_label;
void SpillInst(RegAllocContext& ctx, const ActiveInstInterval& interval,
ActiveIntervalList& active_intervals);
void AdjustInstInterval(InstInterval& interval, const FlatInstList& insts);
void AllocateRegisters();
};
} // namespace Shader::Backend::X64

View File

@ -0,0 +1,285 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/backend/asm_x64/x64_utils.h"
using namespace Xbyak;
using namespace Xbyak::util;
namespace Shader::Backend::X64 {
bool IsFloatingType(IR::Type type) {
// We store F16 on general purpose registers since we don't do
// arithmetic on them
return type == IR::Type::F32 || type == IR::Type::F64;
}
bool IsConditionalOpcode(IR::Opcode opcode) {
switch (opcode) {
case IR::Opcode::FPOrdEqual32:
case IR::Opcode::FPOrdEqual64:
case IR::Opcode::FPUnordEqual32:
case IR::Opcode::FPUnordEqual64:
case IR::Opcode::FPOrdNotEqual32:
case IR::Opcode::FPOrdNotEqual64:
case IR::Opcode::FPUnordNotEqual32:
case IR::Opcode::FPUnordNotEqual64:
case IR::Opcode::FPOrdLessThan32:
case IR::Opcode::FPOrdLessThan64:
case IR::Opcode::FPUnordLessThan32:
case IR::Opcode::FPUnordLessThan64:
case IR::Opcode::FPOrdGreaterThan32:
case IR::Opcode::FPOrdGreaterThan64:
case IR::Opcode::FPUnordGreaterThan32:
case IR::Opcode::FPUnordGreaterThan64:
case IR::Opcode::FPOrdLessThanEqual32:
case IR::Opcode::FPOrdLessThanEqual64:
case IR::Opcode::FPUnordLessThanEqual32:
case IR::Opcode::FPUnordLessThanEqual64:
case IR::Opcode::FPOrdGreaterThanEqual32:
case IR::Opcode::FPOrdGreaterThanEqual64:
case IR::Opcode::FPUnordGreaterThanEqual32:
case IR::Opcode::FPUnordGreaterThanEqual64:
case IR::Opcode::FPIsNan32:
case IR::Opcode::FPIsNan64:
case IR::Opcode::FPIsInf32:
case IR::Opcode::FPIsInf64:
case IR::Opcode::FPCmpClass32:
case IR::Opcode::SLessThan32:
case IR::Opcode::SLessThan64:
case IR::Opcode::ULessThan32:
case IR::Opcode::ULessThan64:
case IR::Opcode::IEqual32:
case IR::Opcode::IEqual64:
case IR::Opcode::SLessThanEqual:
case IR::Opcode::ULessThanEqual:
case IR::Opcode::SGreaterThan:
case IR::Opcode::UGreaterThan:
case IR::Opcode::INotEqual32:
case IR::Opcode::INotEqual64:
case IR::Opcode::SGreaterThanEqual:
case IR::Opcode::UGreaterThanEqual:
return true;
default:
return false;
}
}
size_t GetRegBytesOfType(IR::Type type) {
switch (type) {
case IR::Type::U1:
case IR::Type::U8:
return 1;
case IR::Type::U16:
case IR::Type::F16:
case IR::Type::F16x2:
case IR::Type::F16x3:
case IR::Type::F16x4:
return 2;
case IR::Type::U32:
case IR::Type::U32x2:
case IR::Type::U32x3:
case IR::Type::U32x4:
case IR::Type::F32:
case IR::Type::F32x2:
case IR::Type::F32x3:
case IR::Type::F32x4:
case IR::Type::ScalarReg:
case IR::Type::VectorReg:
return 4;
case IR::Type::U64:
case IR::Type::F64:
case IR::Type::F64x2:
case IR::Type::F64x3:
case IR::Type::F64x4:
case IR::Type::Attribute:
case IR::Type::Patch:
return 8;
default:
break;
}
UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(type));
return 0;
}
u8 GetNumComponentsOfType(IR::Type type) {
switch (type) {
case IR::Type::U1:
case IR::Type::U8:
case IR::Type::U16:
case IR::Type::F16:
case IR::Type::U32:
case IR::Type::F32:
case IR::Type::U64:
case IR::Type::F64:
case IR::Type::ScalarReg:
case IR::Type::VectorReg:
case IR::Type::Attribute:
case IR::Type::Patch:
return 1;
case IR::Type::U32x2:
case IR::Type::F32x2:
case IR::Type::F16x2:
case IR::Type::F64x2:
return 2;
case IR::Type::U32x3:
case IR::Type::F32x3:
case IR::Type::F16x3:
case IR::Type::F64x3:
return 3;
case IR::Type::U32x4:
case IR::Type::F32x4:
case IR::Type::F16x4:
case IR::Type::F64x4:
return 4;
default:
break;
}
UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(type));
return 0;
}
Reg ResizeRegToType(const Reg& reg, IR::Type type) {
ASSERT(reg.getKind() == Operand::Kind::REG);
switch (GetRegBytesOfType(type)) {
case 1:
return reg.cvt8();
case 2:
return reg.cvt16();
case 4:
return reg.cvt32();
case 8:
return reg.cvt64();
default:
break;
}
UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(type));
return reg;
}
void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) {
CodeGenerator& c = ctx.Code();
if (src.isMEM() && dst.isMEM()) {
Reg tmp = ctx.TempGPReg(false).cvt32();
c.mov(tmp, src);
c.mov(dst, tmp);
} else if (src.isMEM() && dst.isXMM()) {
c.movss(dst.getReg().cvt128(), src.getAddress());
} else if (src.isXMM() && dst.isMEM()) {
c.movss(dst.getAddress(), src.getReg().cvt128());
} else if (src.isXMM() && dst.isXMM()) {
c.movaps(dst.getReg().cvt128(), src.getReg().cvt128());
} else {
UNREACHABLE_MSG("Unsupported mov float %s %s", src.toString(), dst.toString());
}
}
void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) {
CodeGenerator& c = ctx.Code();
if (src.isMEM() && dst.isMEM()) {
const Reg64& tmp = ctx.TempGPReg(false);
c.mov(tmp, src);
c.mov(dst, tmp);
} else if (src.isMEM() && dst.isXMM()) {
c.movsd(dst.getReg().cvt128(), src.getAddress());
} else if (src.isXMM() && dst.isMEM()) {
c.movsd(dst.getAddress(), src.getReg().cvt128());
} else if (src.isXMM() && dst.isXMM()) {
c.movapd(dst.getReg().cvt128(), src.getReg().cvt128());
} else {
UNREACHABLE_MSG("Unsupported mov double %s %s", src.toString(), dst.toString());
}
}
void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) {
CodeGenerator& c = ctx.Code();
if (src.isMEM() && dst.isMEM()) {
const Reg64& tmp = ctx.TempGPReg(false);
c.mov(tmp, src);
c.mov(dst, tmp);
} else {
c.mov(dst, src);
}
}
void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src) {
if (!src.IsImmediate()) {
const Operands& src_op = ctx.Def(src);
if (IsFloatingType(src.Type())) {
switch (GetRegBytesOfType(src.Type())) {
case 32:
for (size_t i = 0; i < src_op.size(); i++) {
MovFloat(ctx, dst[i], src_op[i]);
}
break;
case 64:
for (size_t i = 0; i < src_op.size(); i++) {
MovDouble(ctx, dst[i], src_op[i]);
}
break;
default:
UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(src.Type()));
break;
}
} else {
for (size_t i = 0; i < src_op.size(); i++) {
MovGP(ctx, dst[i], src_op[i]);
}
}
} else {
CodeGenerator& c = ctx.Code();
const bool is_mem = dst[0].isMEM();
Reg64& tmp = ctx.TempGPReg(false);
switch (src.Type()) {
case IR::Type::U1:
c.mov(is_mem ? tmp.cvt8() : dst[0], src.U1());
break;
case IR::Type::U8:
c.mov(is_mem ? tmp.cvt8() : dst[0], src.U8());
break;
case IR::Type::U16:
c.mov(is_mem ? tmp.cvt16() : dst[0], src.U16());
break;
case IR::Type::U32:
c.mov(is_mem ? tmp.cvt32() : dst[0], src.U32());
break;
case IR::Type::F32:
c.mov(tmp.cvt32(), std::bit_cast<u32>(src.F32()));
if (!is_mem) {
c.movd(dst[0].getReg().cvt128(), tmp.cvt32());
return;
}
break;
case IR::Type::U64:
c.mov(is_mem ? tmp : dst[0], src.U64());
break;
case IR::Type::F64:
c.mov(tmp, std::bit_cast<u64>(src.F64()));
if (!is_mem) {
c.movq(dst[0].getReg().cvt128(), tmp);
return;
}
break;
case IR::Type::ScalarReg:
c.mov(is_mem ? tmp.cvt32() : dst[0], std::bit_cast<u32>(src.ScalarReg()));
break;
case IR::Type::VectorReg:
c.mov(is_mem ? tmp.cvt32() : dst[0], std::bit_cast<u32>(src.VectorReg()));
break;
case IR::Type::Attribute:
c.mov(is_mem ? tmp : dst[0], std::bit_cast<u64>(src.Attribute()));
break;
case IR::Type::Patch:
c.mov(is_mem ? tmp : dst[0], std::bit_cast<u64>(src.Patch()));
break;
default:
UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(src.Type()));
break;
}
if (is_mem) {
c.mov(dst[0], tmp);
}
}
}
} // namespace Shader::Backend::X64

View File

@ -0,0 +1,23 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <xbyak/xbyak.h>
#include <xbyak/xbyak_util.h>
#include "shader_recompiler/backend/asm_x64/x64_emit_context.h"
#include "shader_recompiler/ir/type.h"
namespace Shader::Backend::X64 {
bool IsFloatingType(IR::Type type);
bool IsConditionalOpcode(IR::Opcode opcode);
size_t GetRegBytesOfType(IR::Type type);
u8 GetNumComponentsOfType(IR::Type type);
Xbyak::Reg ResizeRegToType(const Xbyak::Reg& reg, IR::Type type);
void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src);
void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src);
void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src);
void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src);
} // namespace Shader::Backend::X64