mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-15 16:18:56 +00:00
shader_recompiler: Better branch detection + more opcodes
This commit is contained in:
@@ -206,9 +206,12 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
const u32 dword_offset = inst_info.inst_offset.Value() / sizeof(u32);
|
||||
IR::U32 address = ir.Imm32(dword_offset);
|
||||
if (inst_info.index_enable && inst_info.offset_enable) {
|
||||
UNREACHABLE();
|
||||
const IR::U32 offset{ir.CompositeExtract(inst.Arg(1), 0)};
|
||||
const IR::U32 index{ir.CompositeExtract(inst.Arg(1), 1)};
|
||||
address = ir.IAdd(ir.IMul(index, ir.Imm32(dword_stride)), address);
|
||||
address = ir.IAdd(address, ir.ShiftRightLogical(offset, ir.Imm32(2)));
|
||||
} else if (inst_info.index_enable) {
|
||||
IR::U32 index{inst.Arg(1)};
|
||||
const IR::U32 index{inst.Arg(1)};
|
||||
address = ir.IAdd(ir.IMul(index, ir.Imm32(dword_stride)), address);
|
||||
} else if (inst_info.offset_enable) {
|
||||
const IR::U32 offset{inst.Arg(1)};
|
||||
@@ -216,6 +219,17 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
inst.SetArg(1, address);
|
||||
}
|
||||
|
||||
IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t,
|
||||
const IR::Value& z) {
|
||||
// We need to fix x and y coordinate,
|
||||
// because the s and t coordinate will be scaled and plus 1.5 by v_madak_f32.
|
||||
// We already force the scale value to be 1.0 when handling v_cubema_f32,
|
||||
// here we subtract 1.5 to recover the original value.
|
||||
const IR::Value x = ir.FPSub(IR::F32{s}, ir.Imm32(1.5f));
|
||||
const IR::Value y = ir.FPSub(IR::F32{t}, ir.Imm32(1.5f));
|
||||
return ir.CompositeConstruct(x, y, z);
|
||||
}
|
||||
|
||||
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
IR::Inst* producer = inst.Arg(0).InstRecursive();
|
||||
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
|
||||
@@ -256,8 +270,9 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
||||
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1)), body->Arg(2)};
|
||||
case AmdGpu::ImageType::Color2DArray:
|
||||
case AmdGpu::ImageType::Color3D:
|
||||
case AmdGpu::ImageType::Cube:
|
||||
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
|
||||
case AmdGpu::ImageType::Cube:
|
||||
return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
@@ -276,6 +291,7 @@ void ResourceTrackingPass(IR::Program& program) {
|
||||
// Most of the time it is float so that is the default. This pass detects float buffer loads
|
||||
// combined with bitcasts and patches them to be integer loads.
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
break;
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (inst.GetOpcode() != IR::Opcode::BitCastU32F32) {
|
||||
continue;
|
||||
|
||||
@@ -32,6 +32,7 @@ struct SccFlagTag : FlagTag {};
|
||||
struct ExecFlagTag : FlagTag {};
|
||||
struct VccFlagTag : FlagTag {};
|
||||
struct VccLoTag : FlagTag {};
|
||||
struct VccHiTag : FlagTag {};
|
||||
|
||||
struct GotoVariable : FlagTag {
|
||||
GotoVariable() = default;
|
||||
@@ -43,7 +44,7 @@ struct GotoVariable : FlagTag {
|
||||
};
|
||||
|
||||
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, SccFlagTag, ExecFlagTag,
|
||||
VccFlagTag, VccLoTag>;
|
||||
VccFlagTag, VccLoTag, VccHiTag>;
|
||||
using ValueMap = std::unordered_map<IR::Block*, IR::Value>;
|
||||
|
||||
struct DefTable {
|
||||
@@ -89,6 +90,13 @@ struct DefTable {
|
||||
vcc_lo_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, VccHiTag) {
|
||||
return vcc_hi_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, VccHiTag, const IR::Value& value) {
|
||||
vcc_hi_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, VccFlagTag) {
|
||||
return vcc_flag[block];
|
||||
}
|
||||
@@ -101,6 +109,7 @@ struct DefTable {
|
||||
ValueMap exec_flag;
|
||||
ValueMap vcc_flag;
|
||||
ValueMap vcc_lo_flag;
|
||||
ValueMap vcc_hi_flag;
|
||||
};
|
||||
|
||||
IR::Opcode UndefOpcode(IR::ScalarReg) noexcept {
|
||||
@@ -111,6 +120,14 @@ IR::Opcode UndefOpcode(IR::VectorReg) noexcept {
|
||||
return IR::Opcode::UndefU32;
|
||||
}
|
||||
|
||||
IR::Opcode UndefOpcode(const VccLoTag&) noexcept {
|
||||
return IR::Opcode::UndefU32;
|
||||
}
|
||||
|
||||
IR::Opcode UndefOpcode(const VccHiTag&) noexcept {
|
||||
return IR::Opcode::UndefU32;
|
||||
}
|
||||
|
||||
IR::Opcode UndefOpcode(const FlagTag&) noexcept {
|
||||
return IR::Opcode::UndefU1;
|
||||
}
|
||||
@@ -281,6 +298,7 @@ private:
|
||||
void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
||||
const IR::Opcode opcode{inst.GetOpcode()};
|
||||
switch (opcode) {
|
||||
case IR::Opcode::SetThreadBitScalarReg:
|
||||
case IR::Opcode::SetScalarRegister: {
|
||||
const IR::ScalarReg reg{inst.Arg(0).ScalarReg()};
|
||||
pass.WriteVariable(reg, block, inst.Arg(1));
|
||||
@@ -306,6 +324,10 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
||||
case IR::Opcode::SetVccLo:
|
||||
pass.WriteVariable(VccLoTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::SetVccHi:
|
||||
pass.WriteVariable(VccHiTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::GetThreadBitScalarReg:
|
||||
case IR::Opcode::GetScalarRegister: {
|
||||
const IR::ScalarReg reg{inst.Arg(0).ScalarReg()};
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(reg, block));
|
||||
@@ -331,6 +353,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
||||
case IR::Opcode::GetVccLo:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(VccLoTag{}, block));
|
||||
break;
|
||||
case IR::Opcode::GetVccHi:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(VccHiTag{}, block));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user