mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-05 00:42:48 +00:00
refactor and handle wider DS instructions
This commit is contained in:
parent
fffe27a135
commit
634b04c517
@ -574,6 +574,19 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Value IREmitter::CompositeConstruct(std::span<const Value> elements) {
|
||||||
|
switch (elements.size()) {
|
||||||
|
case 2:
|
||||||
|
return CompositeConstruct(elements[0], elements[1]);
|
||||||
|
case 3:
|
||||||
|
return CompositeConstruct(elements[0], elements[1], elements[2]);
|
||||||
|
case 4:
|
||||||
|
return CompositeConstruct(elements[0], elements[1], elements[2], elements[3]);
|
||||||
|
default:
|
||||||
|
UNREACHABLE_MSG("Composite construct with greater than 4 elements");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
|
Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
|
||||||
const auto read{[&](Opcode opcode, size_t limit) -> Value {
|
const auto read{[&](Opcode opcode, size_t limit) -> Value {
|
||||||
if (element >= limit) {
|
if (element >= limit) {
|
||||||
|
@ -148,6 +148,8 @@ public:
|
|||||||
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
|
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
|
||||||
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
|
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
|
||||||
const Value& e4);
|
const Value& e4);
|
||||||
|
[[nodiscard]] Value CompositeConstruct(std::span<const Value> values);
|
||||||
|
|
||||||
[[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
|
[[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
|
||||||
[[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);
|
[[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);
|
||||||
|
|
||||||
|
@ -257,7 +257,7 @@ private:
|
|||||||
|
|
||||||
enum class AttributeRegion : u32 { InputCP, OutputCP, PatchConst };
|
enum class AttributeRegion : u32 { InputCP, OutputCP, PatchConst };
|
||||||
|
|
||||||
static AttributeRegion FindRegionKind(IR::Inst* ring_access, const Shader::Info& info,
|
static AttributeRegion GetAttributeRegionKind(IR::Inst* ring_access, const Shader::Info& info,
|
||||||
const Shader::RuntimeInfo& runtime_info) {
|
const Shader::RuntimeInfo& runtime_info) {
|
||||||
u32 count = ring_access->Flags<u32>();
|
u32 count = ring_access->Flags<u32>();
|
||||||
if (count == 0) {
|
if (count == 0) {
|
||||||
@ -327,6 +327,21 @@ static IR::U32 TryOptimizeAddressModulo(IR::U32 addr, u32 stride, IR::IREmitter&
|
|||||||
return addr;
|
return addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Read a TCS input (InputCP region) or TES input (OutputCP region)
|
||||||
|
static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmitter& ir,
|
||||||
|
u32 off_dw) {
|
||||||
|
if (off_dw > 0) {
|
||||||
|
addr = ir.IAdd(addr, ir.Imm32(off_dw));
|
||||||
|
}
|
||||||
|
const IR::U32 control_point_index = ir.IDiv(addr, ir.Imm32(stride));
|
||||||
|
const IR::U32 addr_for_attrs = TryOptimizeAddressModulo(addr, stride, ir);
|
||||||
|
const IR::U32 attr_index =
|
||||||
|
ir.ShiftRightLogical(ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u));
|
||||||
|
const IR::U32 comp_index =
|
||||||
|
ir.ShiftRightLogical(ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u));
|
||||||
|
return ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
|
void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
|
||||||
@ -391,96 +406,77 @@ void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// case IR::Opcode::WriteSharedU128: // TODO
|
|
||||||
case IR::Opcode::WriteSharedU32:
|
case IR::Opcode::WriteSharedU32:
|
||||||
case IR::Opcode::WriteSharedU64: {
|
case IR::Opcode::WriteSharedU64:
|
||||||
// DumpIR(program, "before_walk");
|
case IR::Opcode::WriteSharedU128: {
|
||||||
// RingAddressInfo address_info = pass.WalkRingAccess(&inst, ir);
|
|
||||||
|
|
||||||
const u32 num_dwords = opcode == IR::Opcode::WriteSharedU32
|
const u32 num_dwords = opcode == IR::Opcode::WriteSharedU32
|
||||||
? 1
|
? 1
|
||||||
: (opcode == IR::Opcode::WriteSharedU64 ? 2 : 4);
|
: (opcode == IR::Opcode::WriteSharedU64 ? 2 : 4);
|
||||||
const IR::U32 addr{inst.Arg(0)};
|
const IR::U32 addr{inst.Arg(0)};
|
||||||
const IR::U32 data{inst.Arg(1)};
|
const IR::U32 data{inst.Arg(1).Resolve()};
|
||||||
const auto [data_lo, data_hi] = [&] -> std::pair<IR::U32, IR::U32> {
|
|
||||||
if (num_dwords == 1) {
|
|
||||||
return {IR::U32{data}, IR::U32{}};
|
|
||||||
}
|
|
||||||
const auto* prod = data.InstRecursive();
|
|
||||||
return {IR::U32{prod->Arg(0)}, IR::U32{prod->Arg(1)}};
|
|
||||||
}();
|
|
||||||
|
|
||||||
const auto SetOutput = [&](IR::U32 addr, IR::U32 value, AttributeRegion output_kind,
|
const auto SetOutput = [&](IR::U32 addr, IR::U32 value, AttributeRegion output_kind,
|
||||||
u32 off_dw = 0) {
|
u32 off_dw) {
|
||||||
const IR::F32 data = ir.BitCast<IR::F32, IR::U32>(value);
|
const IR::F32 data_component = ir.BitCast<IR::F32, IR::U32>(value);
|
||||||
|
|
||||||
|
if (output_kind == AttributeRegion::OutputCP) {
|
||||||
if (off_dw > 0) {
|
if (off_dw > 0) {
|
||||||
addr = ir.IAdd(addr, ir.Imm32(off_dw));
|
addr = ir.IAdd(addr, ir.Imm32(off_dw));
|
||||||
}
|
}
|
||||||
|
u32 stride = runtime_info.hs_info.hs_output_cp_stride;
|
||||||
if (output_kind == AttributeRegion::OutputCP) {
|
|
||||||
// Invocation ID array index is implicit, handled by SPIRV backend
|
// Invocation ID array index is implicit, handled by SPIRV backend
|
||||||
IR::U32 addr_for_attrs = TryOptimizeAddressModulo(
|
const IR::U32 addr_for_attrs = TryOptimizeAddressModulo(addr, stride, ir);
|
||||||
addr, runtime_info.hs_info.hs_output_cp_stride, ir);
|
const IR::U32 attr_index = ir.ShiftRightLogical(
|
||||||
|
ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u));
|
||||||
IR::U32 attr_index = ir.ShiftRightLogical(
|
const IR::U32 comp_index = ir.ShiftRightLogical(
|
||||||
ir.IMod(addr_for_attrs,
|
|
||||||
ir.Imm32(runtime_info.hs_info.hs_output_cp_stride)),
|
|
||||||
ir.Imm32(4u));
|
|
||||||
IR::U32 comp_index = ir.ShiftRightLogical(
|
|
||||||
ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u));
|
ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u));
|
||||||
ir.SetTcsGenericAttribute(data, attr_index, comp_index);
|
ir.SetTcsGenericAttribute(data_component, attr_index, comp_index);
|
||||||
} else {
|
} else {
|
||||||
ASSERT(output_kind == AttributeRegion::PatchConst);
|
ASSERT(output_kind == AttributeRegion::PatchConst);
|
||||||
ASSERT_MSG(addr.IsImmediate(), "patch addr non imm, inst {}",
|
ASSERT_MSG(addr.IsImmediate(), "patch addr non imm, inst {}",
|
||||||
fmt::ptr(addr.Inst()));
|
fmt::ptr(addr.Inst()));
|
||||||
ir.SetPatch(IR::PatchGeneric(addr.U32() >> 2), data);
|
ir.SetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw), data_component);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
AttributeRegion region = FindRegionKind(&inst, info, runtime_info);
|
AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
|
||||||
SetOutput(addr, data, region);
|
if (num_dwords == 1) {
|
||||||
if (num_dwords > 1) {
|
SetOutput(addr, data, region, 0);
|
||||||
// TODO handle WriteSharedU128
|
} else {
|
||||||
SetOutput(addr, data_hi, region, 1);
|
for (auto i = 0; i < num_dwords; i++) {
|
||||||
|
SetOutput(addr, IR::U32{data.Inst()->Arg(i)}, region, i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
inst.Invalidate();
|
inst.Invalidate();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case IR::Opcode::LoadSharedU32: {
|
case IR::Opcode::LoadSharedU32: {
|
||||||
// case IR::Opcode::LoadSharedU64:
|
case IR::Opcode::LoadSharedU64:
|
||||||
// case IR::Opcode::LoadSharedU128:
|
case IR::Opcode::LoadSharedU128:
|
||||||
const IR::U32 addr{inst.Arg(0)};
|
const IR::U32 addr{inst.Arg(0)};
|
||||||
AttributeRegion region = FindRegionKind(&inst, info, runtime_info);
|
AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
|
||||||
|
const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32
|
||||||
ASSERT(region == AttributeRegion::InputCP || region == AttributeRegion::OutputCP);
|
? 1
|
||||||
switch (region) {
|
: (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4);
|
||||||
case AttributeRegion::InputCP: {
|
ASSERT_MSG(region == AttributeRegion::InputCP,
|
||||||
IR::U32 control_point_index =
|
"Unhandled read of output or patchconst attribute in hull shader");
|
||||||
ir.IDiv(addr, ir.Imm32(runtime_info.hs_info.ls_stride));
|
IR::Value attr_read;
|
||||||
|
if (num_dwords == 1) {
|
||||||
IR::U32 addr_for_attrs =
|
attr_read = ir.BitCast<IR::U32>(
|
||||||
TryOptimizeAddressModulo(addr, runtime_info.hs_info.ls_stride, ir);
|
ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, 0));
|
||||||
|
} else {
|
||||||
IR::U32 attr_index = ir.ShiftRightLogical(
|
boost::container::static_vector<IR::Value, 4> read_components;
|
||||||
ir.IMod(addr_for_attrs, ir.Imm32(runtime_info.hs_info.ls_stride)),
|
for (auto i = 0; i < num_dwords; i++) {
|
||||||
ir.Imm32(4u));
|
const IR::F32 component =
|
||||||
IR::U32 comp_index = ir.ShiftRightLogical(
|
ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, i);
|
||||||
ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u));
|
read_components.push_back(ir.BitCast<IR::U32>(component));
|
||||||
IR::Value attr_read =
|
}
|
||||||
ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index);
|
attr_read = ir.CompositeConstruct(read_components);
|
||||||
attr_read = ir.BitCast<IR::U32>(IR::F32{attr_read});
|
}
|
||||||
inst.ReplaceUsesWithAndRemove(attr_read);
|
inst.ReplaceUsesWithAndRemove(attr_read);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case AttributeRegion::OutputCP: {
|
|
||||||
UNREACHABLE_MSG("Unhandled output control point read");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
@ -534,46 +530,36 @@ void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
|
|||||||
const auto opcode = inst.GetOpcode();
|
const auto opcode = inst.GetOpcode();
|
||||||
switch (inst.GetOpcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
case IR::Opcode::LoadSharedU32: {
|
case IR::Opcode::LoadSharedU32: {
|
||||||
// case IR::Opcode::LoadSharedU64:
|
case IR::Opcode::LoadSharedU64:
|
||||||
// case IR::Opcode::LoadSharedU128: // TODO
|
case IR::Opcode::LoadSharedU128:
|
||||||
const IR::U32 addr{inst.Arg(0)};
|
const IR::U32 addr{inst.Arg(0)};
|
||||||
AttributeRegion region = FindRegionKind(&inst, info, runtime_info);
|
AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
|
||||||
|
const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32
|
||||||
ASSERT(region == AttributeRegion::OutputCP ||
|
? 1
|
||||||
region == AttributeRegion::PatchConst);
|
: (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4);
|
||||||
switch (region) {
|
const auto GetInput = [&](IR::U32 addr, u32 off_dw) -> IR::F32 {
|
||||||
case AttributeRegion::OutputCP: {
|
if (region == AttributeRegion::OutputCP) {
|
||||||
IR::U32 control_point_index =
|
return ReadTessInputComponent(
|
||||||
ir.IDiv(addr, ir.Imm32(runtime_info.vs_info.hs_output_cp_stride));
|
addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw);
|
||||||
|
} else {
|
||||||
IR::U32 addr_for_attrs = TryOptimizeAddressModulo(
|
ASSERT(region == AttributeRegion::PatchConst);
|
||||||
addr, runtime_info.vs_info.hs_output_cp_stride, ir);
|
return ir.GetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw));
|
||||||
|
}
|
||||||
IR::U32 attr_index = ir.ShiftRightLogical(
|
};
|
||||||
ir.IMod(addr_for_attrs, ir.Imm32(runtime_info.vs_info.hs_output_cp_stride)),
|
IR::Value attr_read;
|
||||||
ir.Imm32(4u));
|
if (num_dwords == 1) {
|
||||||
IR::U32 comp_index = ir.ShiftRightLogical(
|
attr_read = ir.BitCast<IR::U32>(GetInput(addr, 0));
|
||||||
ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u));
|
} else {
|
||||||
IR::Value attr_read =
|
boost::container::static_vector<IR::Value, 4> read_components;
|
||||||
ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index);
|
for (auto i = 0; i < num_dwords; i++) {
|
||||||
attr_read = ir.BitCast<IR::U32>(IR::F32{attr_read});
|
const IR::F32 component = GetInput(addr, i);
|
||||||
|
read_components.push_back(ir.BitCast<IR::U32>(component));
|
||||||
|
}
|
||||||
|
attr_read = ir.CompositeConstruct(read_components);
|
||||||
|
}
|
||||||
inst.ReplaceUsesWithAndRemove(attr_read);
|
inst.ReplaceUsesWithAndRemove(attr_read);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case AttributeRegion::PatchConst: {
|
|
||||||
// TODO if assert fails then make generic patch attrs into array and dyn index
|
|
||||||
ASSERT_MSG(addr.IsImmediate(), "patch addr non imm, inst {}",
|
|
||||||
fmt::ptr(addr.Inst()));
|
|
||||||
IR::Value get_patch = ir.GetPatch(IR::PatchGeneric(addr.U32() >> 2));
|
|
||||||
inst.ReplaceUsesWithAndRemove(get_patch);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -638,8 +624,8 @@ void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info) {
|
|||||||
auto sharp_location = FindTessConstantSharp(&inst);
|
auto sharp_location = FindTessConstantSharp(&inst);
|
||||||
if (sharp_location && sharp_location->ptr_base == info.tess_consts_ptr_base &&
|
if (sharp_location && sharp_location->ptr_base == info.tess_consts_ptr_base &&
|
||||||
sharp_location->dword_off == info.tess_consts_dword_offset) {
|
sharp_location->dword_off == info.tess_consts_dword_offset) {
|
||||||
// Replace the load with a special attribute load (for readability and easier
|
// Replace the load with a special attribute load (for readability and
|
||||||
// pattern matching)
|
// easier pattern matching)
|
||||||
IR::Value index = inst.Arg(1);
|
IR::Value index = inst.Arg(1);
|
||||||
|
|
||||||
ASSERT_MSG(index.IsImmediate(),
|
ASSERT_MSG(index.IsImmediate(),
|
||||||
@ -766,6 +752,7 @@ void TessellationPostprocess(IR::Program& program, RuntimeInfo& runtime_info) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO delete
|
||||||
for (IR::Block* block : program.blocks) {
|
for (IR::Block* block : program.blocks) {
|
||||||
for (IR::Inst& inst : block->Instructions()) {
|
for (IR::Inst& inst : block->Instructions()) {
|
||||||
switch (inst.GetOpcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
|
Loading…
Reference in New Issue
Block a user