mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-02 07:22:24 +00:00
x64 fixes
This commit is contained in:
parent
f516ab2dec
commit
4fec1c7fce
@ -107,7 +107,8 @@ void EmitInst(EmitContext& ctx, IR::Inst* inst) {
|
|||||||
switch (inst->GetOpcode()) {
|
switch (inst->GetOpcode()) {
|
||||||
#define OPCODE(name, result_type, ...) \
|
#define OPCODE(name, result_type, ...) \
|
||||||
case IR::Opcode::name: \
|
case IR::Opcode::name: \
|
||||||
Invoke<&Emit##name, IR::Type::result_type != IR::Type::Void>(ctx, inst);
|
Invoke<&Emit##name, IR::Type::result_type != IR::Type::Void>(ctx, inst); \
|
||||||
|
return;
|
||||||
#include "shader_recompiler/ir/opcodes.inc"
|
#include "shader_recompiler/ir/opcodes.inc"
|
||||||
#undef OPCODE
|
#undef OPCODE
|
||||||
}
|
}
|
||||||
@ -138,6 +139,8 @@ void Traverse(EmitContext& ctx, const IR::Program& program) {
|
|||||||
IR::Block* block = node.data.block;
|
IR::Block* block = node.data.block;
|
||||||
c.L(ctx.BlockLabel(block));
|
c.L(ctx.BlockLabel(block));
|
||||||
for (IR::Inst& inst : *block) {
|
for (IR::Inst& inst : *block) {
|
||||||
|
ctx.ResetTempRegs();
|
||||||
|
EmitInst(ctx, &inst);
|
||||||
}
|
}
|
||||||
const auto& phi_assignments = ctx.PhiAssignments(block);
|
const auto& phi_assignments = ctx.PhiAssignments(block);
|
||||||
if (phi_assignments) {
|
if (phi_assignments) {
|
||||||
|
@ -14,7 +14,7 @@ void EmitGetUserData(EmitContext& ctx, const Operands& dest, IR::ScalarReg reg)
|
|||||||
const u32 offset = static_cast<u32>(reg) << 2;
|
const u32 offset = static_cast<u32>(reg) << 2;
|
||||||
Reg& tmp = ctx.TempGPReg();
|
Reg& tmp = ctx.TempGPReg();
|
||||||
ctx.Code().lea(tmp, ptr[ctx.UserData() + offset]);
|
ctx.Code().lea(tmp, ptr[ctx.UserData() + offset]);
|
||||||
MovGP( ctx, dest[0], ptr[tmp]);
|
MovGP( ctx, dest[0], dword[tmp]);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitSetUserData(EmitContext& ctx, const Operands& offset, const Operands& value) {
|
void EmitSetUserData(EmitContext& ctx, const Operands& offset, const Operands& value) {
|
||||||
@ -22,7 +22,7 @@ void EmitSetUserData(EmitContext& ctx, const Operands& offset, const Operands& v
|
|||||||
MovGP(ctx, tmp, offset[0]);
|
MovGP(ctx, tmp, offset[0]);
|
||||||
ctx.Code().shl(tmp, 2);
|
ctx.Code().shl(tmp, 2);
|
||||||
ctx.Code().lea(tmp, ptr[ctx.UserData() + tmp]);
|
ctx.Code().lea(tmp, ptr[ctx.UserData() + tmp]);
|
||||||
MovGP(ctx, ptr[tmp], value[0]);
|
MovGP(ctx, dword[tmp], value[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitGetThreadBitScalarReg(EmitContext& ctx) {
|
void EmitGetThreadBitScalarReg(EmitContext& ctx) {
|
||||||
@ -65,9 +65,9 @@ void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base,
|
|||||||
if (offset[0].isMEM()) {
|
if (offset[0].isMEM()) {
|
||||||
ctx.Code().add(tmp, offset[0]);
|
ctx.Code().add(tmp, offset[0]);
|
||||||
} else {
|
} else {
|
||||||
ctx.Code().lea(tmp, ptr[tmp + offset[0].getReg()]);
|
ctx.Code().lea(tmp, ptr[tmp + offset[0].getReg().cvt64()]);
|
||||||
}
|
}
|
||||||
MovGP(ctx, dest[0], ptr[tmp]);
|
MovGP(ctx, dest[0], dword[tmp]);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitReadConstBuffer(EmitContext& ctx) {
|
void EmitReadConstBuffer(EmitContext& ctx) {
|
||||||
|
@ -328,8 +328,8 @@ void EmitIAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, con
|
|||||||
void EmitIAddCary32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
void EmitIAddCary32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||||
void EmitISub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
void EmitISub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||||
void EmitISub64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
void EmitISub64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||||
void EmitSMulExt(EmitContext& ctx,const Operands& dest, const Operands& op1, const Operands& op2);
|
void EmitSMulExt(EmitContext& ctx);
|
||||||
void EmitUMulExt(EmitContext& ctx,const Operands& dest, const Operands& op1, const Operands& op2);
|
void EmitUMulExt(EmitContext& ctx);
|
||||||
void EmitIMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
void EmitIMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||||
void EmitIMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
void EmitIMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||||
void EmitSDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
void EmitSDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||||
@ -350,9 +350,9 @@ void EmitBitwiseAnd64(EmitContext& ctx, const Operands& dest, const Operands& op
|
|||||||
void EmitBitwiseOr32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
void EmitBitwiseOr32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||||
void EmitBitwiseOr64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
void EmitBitwiseOr64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||||
void EmitBitwiseXor32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
void EmitBitwiseXor32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2);
|
||||||
void EmitBitFieldInsert(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& insert, const Operands& offset, const Operands& count);
|
void EmitBitFieldInsert(EmitContext& ctx);
|
||||||
void EmitBitFieldSExtract(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset, const Operands& count);
|
void EmitBitFieldSExtract(EmitContext& ctx);
|
||||||
void EmitBitFieldUExtract(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset, const Operands& count);
|
void EmitBitFieldUExtract(EmitContext& ctx);
|
||||||
void EmitBitReverse32(EmitContext& ctx);
|
void EmitBitReverse32(EmitContext& ctx);
|
||||||
void EmitBitCount32(EmitContext& ctx);
|
void EmitBitCount32(EmitContext& ctx);
|
||||||
void EmitBitCount64(EmitContext& ctx);
|
void EmitBitCount64(EmitContext& ctx);
|
||||||
|
@ -125,7 +125,7 @@ Operands EmitContext::Def(const IR::Value& value) {
|
|||||||
code.mov(operands.back(), std::bit_cast<u64>(value.Patch()));
|
code.mov(operands.back(), std::bit_cast<u64>(value.Patch()));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
UNREACHABLE_MSG("Unsupported value type: %s", IR::NameOf(value.Type()));
|
UNREACHABLE_MSG("Unsupported value type: {}", IR::NameOf(value.Type()));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return operands;
|
return operands;
|
||||||
@ -173,17 +173,17 @@ void EmitContext::SpillInst(RegAllocContext& ctx, const ActiveInstInterval& inte
|
|||||||
current_sp += ctx.free_stack_slots.back();
|
current_sp += ctx.free_stack_slots.back();
|
||||||
ctx.free_stack_slots.pop_back();
|
ctx.free_stack_slots.pop_back();
|
||||||
}
|
}
|
||||||
switch (GetRegBytesOfType(inst->Type())) {
|
switch (GetRegBytesOfType(IR::Value(inst))) {
|
||||||
case 8:
|
case 1:
|
||||||
return byte[r11 + current_sp];
|
return byte[r11 + current_sp];
|
||||||
case 16:
|
case 2:
|
||||||
return word[r11 + current_sp];
|
return word[r11 + current_sp];
|
||||||
case 32:
|
case 4:
|
||||||
return dword[r11 + current_sp];
|
return dword[r11 + current_sp];
|
||||||
case 64:
|
case 8:
|
||||||
return qword[r11 + current_sp];
|
return qword[r11 + current_sp];
|
||||||
default:
|
default:
|
||||||
UNREACHABLE_MSG("Unsupported register size: %zu", GetRegBytesOfType(inst->Type()));
|
UNREACHABLE_MSG("Unsupported register size: {}", GetRegBytesOfType(inst));
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -197,7 +197,7 @@ void EmitContext::SpillInst(RegAllocContext& ctx, const ActiveInstInterval& inte
|
|||||||
Operands& operands = inst_to_operands[spill_candidate->inst];
|
Operands& operands = inst_to_operands[spill_candidate->inst];
|
||||||
Reg reg = operands[spill_candidate->component].getReg();
|
Reg reg = operands[spill_candidate->component].getReg();
|
||||||
inst_to_operands[interval.inst][interval.component] =
|
inst_to_operands[interval.inst][interval.component] =
|
||||||
reg.isXMM() ? reg : ResizeRegToType(reg, interval.inst->Type());
|
reg.isXMM() ? reg : ResizeRegToType(reg, interval.inst);
|
||||||
operands[spill_candidate->component] = get_operand(spill_candidate->inst);
|
operands[spill_candidate->component] = get_operand(spill_candidate->inst);
|
||||||
ctx.active_spill_intervals.push_back(*spill_candidate);
|
ctx.active_spill_intervals.push_back(*spill_candidate);
|
||||||
*spill_candidate = interval;
|
*spill_candidate = interval;
|
||||||
@ -252,8 +252,8 @@ void EmitContext::AllocateRegisters() {
|
|||||||
const std::array<Reg64, 6> initial_gp_inst_regs = {rcx, rdx, rsi, r8, r9, r10};
|
const std::array<Reg64, 6> initial_gp_inst_regs = {rcx, rdx, rsi, r8, r9, r10};
|
||||||
const std::array<Xmm, 7> initial_xmm_inst_regs = {xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6};
|
const std::array<Xmm, 7> initial_xmm_inst_regs = {xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6};
|
||||||
const std::array<Reg64, 6> initial_gp_temp_regs = {rax, rbx, r12, r13, r14, r15};
|
const std::array<Reg64, 6> initial_gp_temp_regs = {rax, rbx, r12, r13, r14, r15};
|
||||||
const std::array<Xmm, 10> initial_xmm_temp_regs = {xmm7, xmm7, xmm8, xmm9, xmm10,
|
const std::array<Xmm, 9> initial_xmm_temp_regs = {xmm7, xmm8, xmm9, xmm10, xmm11,
|
||||||
xmm11, xmm12, xmm13, xmm14, xmm15};
|
xmm12, xmm13, xmm14, xmm15};
|
||||||
|
|
||||||
boost::container::small_vector<InstInterval, 64> intervals;
|
boost::container::small_vector<InstInterval, 64> intervals;
|
||||||
FlatInstList insts;
|
FlatInstList insts;
|
||||||
@ -274,10 +274,10 @@ void EmitContext::AllocateRegisters() {
|
|||||||
std::sort(intervals.begin(), intervals.end(),
|
std::sort(intervals.begin(), intervals.end(),
|
||||||
[](const InstInterval& a, const InstInterval& b) { return a.start < b.start; });
|
[](const InstInterval& a, const InstInterval& b) { return a.start < b.start; });
|
||||||
RegAllocContext ctx;
|
RegAllocContext ctx;
|
||||||
ctx.free_gp_regs.insert(ctx.free_gp_regs.end(), initial_gp_temp_regs.begin(),
|
ctx.free_gp_regs.insert(ctx.free_gp_regs.end(), initial_gp_inst_regs.begin(),
|
||||||
initial_gp_temp_regs.end());
|
initial_gp_inst_regs.end());
|
||||||
ctx.free_xmm_regs.insert(ctx.free_xmm_regs.end(), initial_xmm_temp_regs.begin(),
|
ctx.free_xmm_regs.insert(ctx.free_xmm_regs.end(), initial_xmm_inst_regs.begin(),
|
||||||
initial_xmm_temp_regs.end());
|
initial_xmm_inst_regs.end());
|
||||||
boost::container::static_vector<Reg64, 6> unused_gp_inst_regs;
|
boost::container::static_vector<Reg64, 6> unused_gp_inst_regs;
|
||||||
boost::container::static_vector<Xmm, 7> unused_xmm_inst_regs;
|
boost::container::static_vector<Xmm, 7> unused_xmm_inst_regs;
|
||||||
unused_gp_inst_regs.insert(unused_gp_inst_regs.end(), ctx.free_gp_regs.begin(),
|
unused_gp_inst_regs.insert(unused_gp_inst_regs.end(), ctx.free_gp_regs.begin(),
|
||||||
@ -287,7 +287,7 @@ void EmitContext::AllocateRegisters() {
|
|||||||
for (const InstInterval& interval : intervals) {
|
for (const InstInterval& interval : intervals) {
|
||||||
// Free old interval resources
|
// Free old interval resources
|
||||||
for (auto it = ctx.active_gp_intervals.begin(); it != ctx.active_gp_intervals.end();) {
|
for (auto it = ctx.active_gp_intervals.begin(); it != ctx.active_gp_intervals.end();) {
|
||||||
if (it->end <= interval.start) {
|
if (it->end < interval.start) {
|
||||||
Reg64 reg = inst_to_operands[it->inst][it->component].getReg().cvt64();
|
Reg64 reg = inst_to_operands[it->inst][it->component].getReg().cvt64();
|
||||||
ctx.free_gp_regs.push_back(reg);
|
ctx.free_gp_regs.push_back(reg);
|
||||||
it = ctx.active_gp_intervals.erase(it);
|
it = ctx.active_gp_intervals.erase(it);
|
||||||
@ -296,7 +296,7 @@ void EmitContext::AllocateRegisters() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (auto it = ctx.active_xmm_intervals.begin(); it != ctx.active_xmm_intervals.end();) {
|
for (auto it = ctx.active_xmm_intervals.begin(); it != ctx.active_xmm_intervals.end();) {
|
||||||
if (it->end <= interval.start) {
|
if (it->end < interval.start) {
|
||||||
Xmm reg = inst_to_operands[it->inst][it->component].getReg().cvt128();
|
Xmm reg = inst_to_operands[it->inst][it->component].getReg().cvt128();
|
||||||
ctx.free_xmm_regs.push_back(reg);
|
ctx.free_xmm_regs.push_back(reg);
|
||||||
it = ctx.active_xmm_intervals.erase(it);
|
it = ctx.active_xmm_intervals.erase(it);
|
||||||
@ -306,7 +306,7 @@ void EmitContext::AllocateRegisters() {
|
|||||||
}
|
}
|
||||||
for (auto it = ctx.active_spill_intervals.begin();
|
for (auto it = ctx.active_spill_intervals.begin();
|
||||||
it != ctx.active_spill_intervals.end();) {
|
it != ctx.active_spill_intervals.end();) {
|
||||||
if (it->end <= interval.start) {
|
if (it->end < interval.start) {
|
||||||
const Address& addr = inst_to_operands[it->inst][it->component].getAddress();
|
const Address& addr = inst_to_operands[it->inst][it->component].getAddress();
|
||||||
ctx.free_stack_slots.push_back(addr.getDisp());
|
ctx.free_stack_slots.push_back(addr.getDisp());
|
||||||
it = ctx.active_spill_intervals.erase(it);
|
it = ctx.active_spill_intervals.erase(it);
|
||||||
@ -314,15 +314,17 @@ void EmitContext::AllocateRegisters() {
|
|||||||
++it;
|
++it;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
u8 num_components = GetNumComponentsOfType(interval.inst->Type());
|
u8 num_components = GetNumComponentsOfType(interval.inst);
|
||||||
bool is_floating = IsFloatingType(interval.inst->Type());
|
bool is_floating = IsFloatingType(interval.inst);
|
||||||
|
auto& operands = inst_to_operands[interval.inst];
|
||||||
|
operands.resize(num_components);
|
||||||
if (is_floating) {
|
if (is_floating) {
|
||||||
for (size_t i = 0; i < num_components; ++i) {
|
for (size_t i = 0; i < num_components; ++i) {
|
||||||
ActiveInstInterval active(interval, i);
|
ActiveInstInterval active(interval, i);
|
||||||
if (!ctx.free_xmm_regs.empty()) {
|
if (!ctx.free_xmm_regs.empty()) {
|
||||||
Xmm& reg = ctx.free_xmm_regs.back();
|
Xmm& reg = ctx.free_xmm_regs.back();
|
||||||
ctx.free_xmm_regs.pop_back();
|
ctx.free_xmm_regs.pop_back();
|
||||||
inst_to_operands[active.inst][active.component] = reg;
|
operands[active.component] = reg;
|
||||||
unused_xmm_inst_regs.erase(
|
unused_xmm_inst_regs.erase(
|
||||||
std::remove(unused_xmm_inst_regs.begin(), unused_xmm_inst_regs.end(), reg),
|
std::remove(unused_xmm_inst_regs.begin(), unused_xmm_inst_regs.end(), reg),
|
||||||
unused_xmm_inst_regs.end());
|
unused_xmm_inst_regs.end());
|
||||||
@ -337,8 +339,7 @@ void EmitContext::AllocateRegisters() {
|
|||||||
if (!ctx.free_gp_regs.empty()) {
|
if (!ctx.free_gp_regs.empty()) {
|
||||||
Reg64& reg = ctx.free_gp_regs.back();
|
Reg64& reg = ctx.free_gp_regs.back();
|
||||||
ctx.free_gp_regs.pop_back();
|
ctx.free_gp_regs.pop_back();
|
||||||
inst_to_operands[active.inst][active.component] =
|
operands[active.component] = ResizeRegToType(reg, active.inst);
|
||||||
ResizeRegToType(reg, active.inst->Type());
|
|
||||||
unused_gp_inst_regs.erase(
|
unused_gp_inst_regs.erase(
|
||||||
std::remove(unused_gp_inst_regs.begin(), unused_gp_inst_regs.end(), reg),
|
std::remove(unused_gp_inst_regs.begin(), unused_gp_inst_regs.end(), reg),
|
||||||
unused_gp_inst_regs.end());
|
unused_gp_inst_regs.end());
|
||||||
@ -354,10 +355,10 @@ void EmitContext::AllocateRegisters() {
|
|||||||
unused_xmm_inst_regs.end());
|
unused_xmm_inst_regs.end());
|
||||||
num_scratch_gp_regs = unused_gp_inst_regs.size() + 1; // rax is scratch
|
num_scratch_gp_regs = unused_gp_inst_regs.size() + 1; // rax is scratch
|
||||||
num_scratch_xmm_regs = unused_xmm_inst_regs.size() + 1; // xmm7 is scratch
|
num_scratch_xmm_regs = unused_xmm_inst_regs.size() + 1; // xmm7 is scratch
|
||||||
temp_gp_regs.insert(temp_gp_regs.end(), initial_gp_inst_regs.begin(),
|
temp_gp_regs.insert(temp_gp_regs.end(), initial_gp_temp_regs.begin(),
|
||||||
initial_gp_inst_regs.end());
|
initial_gp_temp_regs.end());
|
||||||
temp_xmm_regs.insert(temp_xmm_regs.end(), initial_xmm_inst_regs.begin(),
|
temp_xmm_regs.insert(temp_xmm_regs.end(), initial_xmm_temp_regs.begin(),
|
||||||
initial_xmm_inst_regs.end());
|
initial_xmm_temp_regs.end());
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Shader::Backend::X64
|
} // namespace Shader::Backend::X64
|
@ -54,7 +54,7 @@ public:
|
|||||||
void PopTempXmmReg();
|
void PopTempXmmReg();
|
||||||
void ResetTempRegs();
|
void ResetTempRegs();
|
||||||
|
|
||||||
[[nodiscard]] const Xbyak::Reg64& UserData() const {return Xbyak::util::r11;}
|
[[nodiscard]] const Xbyak::Reg64& UserData() const {return Xbyak::util::rdi;}
|
||||||
|
|
||||||
[[nodiscard]] const Operands& Def(IR::Inst* inst);
|
[[nodiscard]] const Operands& Def(IR::Inst* inst);
|
||||||
[[nodiscard]] Operands Def(const IR::Value& value);
|
[[nodiscard]] Operands Def(const IR::Value& value);
|
||||||
|
@ -8,65 +8,15 @@ using namespace Xbyak::util;
|
|||||||
|
|
||||||
namespace Shader::Backend::X64 {
|
namespace Shader::Backend::X64 {
|
||||||
|
|
||||||
bool IsFloatingType(IR::Type type) {
|
bool IsFloatingType(const IR::Value& value) {
|
||||||
// We store F16 on general purpose registers since we don't do
|
// We store F16 on general purpose registers since we don't do
|
||||||
// arithmetic on them
|
// arithmetic on them
|
||||||
|
IR::Type type = value.Type();
|
||||||
return type == IR::Type::F32 || type == IR::Type::F64;
|
return type == IR::Type::F32 || type == IR::Type::F64;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsConditionalOpcode(IR::Opcode opcode) {
|
size_t GetRegBytesOfType(const IR::Value& value) {
|
||||||
switch (opcode) {
|
switch (value.Type()) {
|
||||||
case IR::Opcode::FPOrdEqual32:
|
|
||||||
case IR::Opcode::FPOrdEqual64:
|
|
||||||
case IR::Opcode::FPUnordEqual32:
|
|
||||||
case IR::Opcode::FPUnordEqual64:
|
|
||||||
case IR::Opcode::FPOrdNotEqual32:
|
|
||||||
case IR::Opcode::FPOrdNotEqual64:
|
|
||||||
case IR::Opcode::FPUnordNotEqual32:
|
|
||||||
case IR::Opcode::FPUnordNotEqual64:
|
|
||||||
case IR::Opcode::FPOrdLessThan32:
|
|
||||||
case IR::Opcode::FPOrdLessThan64:
|
|
||||||
case IR::Opcode::FPUnordLessThan32:
|
|
||||||
case IR::Opcode::FPUnordLessThan64:
|
|
||||||
case IR::Opcode::FPOrdGreaterThan32:
|
|
||||||
case IR::Opcode::FPOrdGreaterThan64:
|
|
||||||
case IR::Opcode::FPUnordGreaterThan32:
|
|
||||||
case IR::Opcode::FPUnordGreaterThan64:
|
|
||||||
case IR::Opcode::FPOrdLessThanEqual32:
|
|
||||||
case IR::Opcode::FPOrdLessThanEqual64:
|
|
||||||
case IR::Opcode::FPUnordLessThanEqual32:
|
|
||||||
case IR::Opcode::FPUnordLessThanEqual64:
|
|
||||||
case IR::Opcode::FPOrdGreaterThanEqual32:
|
|
||||||
case IR::Opcode::FPOrdGreaterThanEqual64:
|
|
||||||
case IR::Opcode::FPUnordGreaterThanEqual32:
|
|
||||||
case IR::Opcode::FPUnordGreaterThanEqual64:
|
|
||||||
case IR::Opcode::FPIsNan32:
|
|
||||||
case IR::Opcode::FPIsNan64:
|
|
||||||
case IR::Opcode::FPIsInf32:
|
|
||||||
case IR::Opcode::FPIsInf64:
|
|
||||||
case IR::Opcode::FPCmpClass32:
|
|
||||||
case IR::Opcode::SLessThan32:
|
|
||||||
case IR::Opcode::SLessThan64:
|
|
||||||
case IR::Opcode::ULessThan32:
|
|
||||||
case IR::Opcode::ULessThan64:
|
|
||||||
case IR::Opcode::IEqual32:
|
|
||||||
case IR::Opcode::IEqual64:
|
|
||||||
case IR::Opcode::SLessThanEqual:
|
|
||||||
case IR::Opcode::ULessThanEqual:
|
|
||||||
case IR::Opcode::SGreaterThan:
|
|
||||||
case IR::Opcode::UGreaterThan:
|
|
||||||
case IR::Opcode::INotEqual32:
|
|
||||||
case IR::Opcode::INotEqual64:
|
|
||||||
case IR::Opcode::SGreaterThanEqual:
|
|
||||||
case IR::Opcode::UGreaterThanEqual:
|
|
||||||
return true;
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t GetRegBytesOfType(IR::Type type) {
|
|
||||||
switch (type) {
|
|
||||||
case IR::Type::U1:
|
case IR::Type::U1:
|
||||||
case IR::Type::U8:
|
case IR::Type::U8:
|
||||||
return 1;
|
return 1;
|
||||||
@ -98,12 +48,12 @@ size_t GetRegBytesOfType(IR::Type type) {
|
|||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(type));
|
UNREACHABLE_MSG("Unsupported type {}", IR::NameOf(value.Type()));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
u8 GetNumComponentsOfType(IR::Type type) {
|
u8 GetNumComponentsOfType(const IR::Value& value) {
|
||||||
switch (type) {
|
switch (value.Type()) {
|
||||||
case IR::Type::U1:
|
case IR::Type::U1:
|
||||||
case IR::Type::U8:
|
case IR::Type::U8:
|
||||||
case IR::Type::U16:
|
case IR::Type::U16:
|
||||||
@ -135,13 +85,13 @@ u8 GetNumComponentsOfType(IR::Type type) {
|
|||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(type));
|
UNREACHABLE_MSG("Unsupported type {}", IR::NameOf(value.Type()));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
Reg ResizeRegToType(const Reg& reg, IR::Type type) {
|
Reg ResizeRegToType(const Reg& reg, const IR::Value& value) {
|
||||||
ASSERT(reg.getKind() == Operand::Kind::REG);
|
ASSERT(reg.getKind() == Operand::Kind::REG);
|
||||||
switch (GetRegBytesOfType(type)) {
|
switch (GetRegBytesOfType(value)) {
|
||||||
case 1:
|
case 1:
|
||||||
return reg.cvt8();
|
return reg.cvt8();
|
||||||
case 2:
|
case 2:
|
||||||
@ -153,7 +103,7 @@ Reg ResizeRegToType(const Reg& reg, IR::Type type) {
|
|||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(type));
|
UNREACHABLE_MSG("Unsupported type {}", IR::NameOf(value.Type()));
|
||||||
return reg;
|
return reg;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -173,7 +123,7 @@ void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand&
|
|||||||
} else if (src.isXMM() && dst.isXMM()) {
|
} else if (src.isXMM() && dst.isXMM()) {
|
||||||
c.movaps(dst.getReg().cvt128(), src.getReg().cvt128());
|
c.movaps(dst.getReg().cvt128(), src.getReg().cvt128());
|
||||||
} else {
|
} else {
|
||||||
UNREACHABLE_MSG("Unsupported mov float %s %s", src.toString(), dst.toString());
|
UNREACHABLE_MSG("Unsupported mov float {} {}", src.toString(), dst.toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -193,7 +143,7 @@ void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand
|
|||||||
} else if (src.isXMM() && dst.isXMM()) {
|
} else if (src.isXMM() && dst.isXMM()) {
|
||||||
c.movapd(dst.getReg().cvt128(), src.getReg().cvt128());
|
c.movapd(dst.getReg().cvt128(), src.getReg().cvt128());
|
||||||
} else {
|
} else {
|
||||||
UNREACHABLE_MSG("Unsupported mov double %s %s", src.toString(), dst.toString());
|
UNREACHABLE_MSG("Unsupported mov double {} {}", src.toString(), dst.toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -202,26 +152,27 @@ void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& sr
|
|||||||
if (src == dst) {
|
if (src == dst) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Reg tmp = (src.isMEM() && dst.isMEM()) ? ctx.TempGPReg(false).changeBit(dst.getBit()) : dst.getReg();
|
Reg tmp = dst.isMEM() ? ctx.TempGPReg(false).changeBit(dst.getBit()) : dst.getReg();
|
||||||
if (src.getBit() == dst.getBit()) {
|
if (src.getBit() < dst.getBit() && !src.isBit(32)) {
|
||||||
c.mov(tmp, src);
|
|
||||||
} else if (src.getBit() < dst.getBit()) {
|
|
||||||
c.movzx(tmp, src);
|
c.movzx(tmp, src);
|
||||||
} else {
|
} else if (src.getBit() > dst.getBit()) {
|
||||||
Operand src_tmp = src;
|
Operand src_tmp = src;
|
||||||
src_tmp.setBit(dst.getBit());
|
src_tmp.setBit(dst.getBit());
|
||||||
c.mov(tmp, src_tmp);
|
c.mov(tmp, src_tmp);
|
||||||
|
} else {
|
||||||
|
c.mov(tmp, src);
|
||||||
}
|
}
|
||||||
if (src.isMEM() && dst.isMEM()) {
|
if (dst.isMEM()) {
|
||||||
c.mov(dst, tmp);
|
c.mov(dst, tmp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src) {
|
void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src) {
|
||||||
if (!src.IsImmediate()) {
|
if (!src.IsImmediate()) {
|
||||||
const Operands& src_op = ctx.Def(src);
|
IR::Inst* src_inst = src.InstRecursive();
|
||||||
if (IsFloatingType(src.Type())) {
|
const Operands& src_op = ctx.Def(src_inst);
|
||||||
switch (GetRegBytesOfType(src.Type())) {
|
if (IsFloatingType(src)) {
|
||||||
|
switch (GetRegBytesOfType(src)) {
|
||||||
case 32:
|
case 32:
|
||||||
for (size_t i = 0; i < src_op.size(); i++) {
|
for (size_t i = 0; i < src_op.size(); i++) {
|
||||||
MovFloat(ctx, dst[i], src_op[i]);
|
MovFloat(ctx, dst[i], src_op[i]);
|
||||||
@ -233,7 +184,7 @@ void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src) {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(src.Type()));
|
UNREACHABLE_MSG("Unsupported type {}", IR::NameOf(src.Type()));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -288,7 +239,7 @@ void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src) {
|
|||||||
c.mov(is_mem ? tmp : dst[0], std::bit_cast<u64>(src.Patch()));
|
c.mov(is_mem ? tmp : dst[0], std::bit_cast<u64>(src.Patch()));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(src.Type()));
|
UNREACHABLE_MSG("Unsupported type {}", IR::NameOf(src.Type()));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (is_mem) {
|
if (is_mem) {
|
||||||
|
@ -10,11 +10,10 @@
|
|||||||
|
|
||||||
namespace Shader::Backend::X64 {
|
namespace Shader::Backend::X64 {
|
||||||
|
|
||||||
bool IsFloatingType(IR::Type type);
|
bool IsFloatingType(const IR::Value& value);
|
||||||
bool IsConditionalOpcode(IR::Opcode opcode);
|
size_t GetRegBytesOfType(const IR::Value& value);
|
||||||
size_t GetRegBytesOfType(IR::Type type);
|
u8 GetNumComponentsOfType(const IR::Value& value);
|
||||||
u8 GetNumComponentsOfType(IR::Type type);
|
Xbyak::Reg ResizeRegToType(const Xbyak::Reg& reg, const IR::Value& value);
|
||||||
Xbyak::Reg ResizeRegToType(const Xbyak::Reg& reg, IR::Type type);
|
|
||||||
void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src);
|
void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src);
|
||||||
void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src);
|
void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src);
|
||||||
void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src);
|
void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src);
|
||||||
@ -22,4 +21,20 @@ void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src);
|
|||||||
void EmitInlineF16ToF32(EmitContext& ctx, const Xbyak::Operand& dest, const Xbyak::Operand& src);
|
void EmitInlineF16ToF32(EmitContext& ctx, const Xbyak::Operand& dest, const Xbyak::Operand& src);
|
||||||
void EmitInlineF32ToF16(EmitContext& ctx, const Xbyak::Operand& dest, const Xbyak::Operand& src);
|
void EmitInlineF32ToF16(EmitContext& ctx, const Xbyak::Operand& dest, const Xbyak::Operand& src);
|
||||||
|
|
||||||
|
inline bool IsFloatingType(IR::Inst* inst) {
|
||||||
|
return IsFloatingType(IR::Value(inst));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline size_t GetRegBytesOfType(IR::Inst* inst) {
|
||||||
|
return GetRegBytesOfType(IR::Value(inst));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline u8 GetNumComponentsOfType(IR::Inst* inst) {
|
||||||
|
return GetNumComponentsOfType(IR::Value(inst));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Xbyak::Reg ResizeRegToType(const Xbyak::Reg& reg, IR::Inst* inst) {
|
||||||
|
return ResizeRegToType(reg, IR::Value(inst));
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Shader::Backend::X64
|
} // namespace Shader::Backend::X64
|
@ -214,7 +214,8 @@ static void GenerateSrtProgram(IR::Program& program, PassInfo& pass_info, Pools&
|
|||||||
ASSERT(pass_info.dst_off_dw == info.srt_info.flattened_bufsize_dw);
|
ASSERT(pass_info.dst_off_dw == info.srt_info.flattened_bufsize_dw);
|
||||||
|
|
||||||
if (!pass_info.all_readconsts.empty()) {
|
if (!pass_info.all_readconsts.empty()) {
|
||||||
GenerateSrtReadConstsSubProgram(program, pass_info, pools);
|
IR::Program sub_program = GenerateSrtReadConstsSubProgram(program, pass_info, pools);
|
||||||
|
Backend::X64::EmitX64(sub_program, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
info.srt_info.flattened_bufsize_dw = pass_info.dst_off_dw;
|
info.srt_info.flattened_bufsize_dw = pass_info.dst_off_dw;
|
||||||
|
Loading…
Reference in New Issue
Block a user