refactor and handle wider DS instructions

This commit is contained in:
Frodo Baggins 2024-12-11 00:26:03 -08:00
parent fffe27a135
commit 634b04c517
3 changed files with 105 additions and 103 deletions

View File

@ -574,6 +574,19 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu
} }
} }
Value IREmitter::CompositeConstruct(std::span<const Value> elements) {
switch (elements.size()) {
case 2:
return CompositeConstruct(elements[0], elements[1]);
case 3:
return CompositeConstruct(elements[0], elements[1], elements[2]);
case 4:
return CompositeConstruct(elements[0], elements[1], elements[2], elements[3]);
default:
UNREACHABLE_MSG("Composite construct with greater than 4 elements");
}
}
Value IREmitter::CompositeExtract(const Value& vector, size_t element) { Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
const auto read{[&](Opcode opcode, size_t limit) -> Value { const auto read{[&](Opcode opcode, size_t limit) -> Value {
if (element >= limit) { if (element >= limit) {

View File

@ -148,6 +148,8 @@ public:
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
const Value& e4); const Value& e4);
[[nodiscard]] Value CompositeConstruct(std::span<const Value> values);
[[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
[[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element); [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);

View File

@ -257,7 +257,7 @@ private:
enum class AttributeRegion : u32 { InputCP, OutputCP, PatchConst }; enum class AttributeRegion : u32 { InputCP, OutputCP, PatchConst };
static AttributeRegion FindRegionKind(IR::Inst* ring_access, const Shader::Info& info, static AttributeRegion GetAttributeRegionKind(IR::Inst* ring_access, const Shader::Info& info,
const Shader::RuntimeInfo& runtime_info) { const Shader::RuntimeInfo& runtime_info) {
u32 count = ring_access->Flags<u32>(); u32 count = ring_access->Flags<u32>();
if (count == 0) { if (count == 0) {
@ -327,6 +327,21 @@ static IR::U32 TryOptimizeAddressModulo(IR::U32 addr, u32 stride, IR::IREmitter&
return addr; return addr;
} }
// Read a TCS input (InputCP region) or TES input (OutputCP region)
static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmitter& ir,
u32 off_dw) {
if (off_dw > 0) {
addr = ir.IAdd(addr, ir.Imm32(off_dw));
}
const IR::U32 control_point_index = ir.IDiv(addr, ir.Imm32(stride));
const IR::U32 addr_for_attrs = TryOptimizeAddressModulo(addr, stride, ir);
const IR::U32 attr_index =
ir.ShiftRightLogical(ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u));
const IR::U32 comp_index =
ir.ShiftRightLogical(ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u));
return ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index);
}
} // namespace } // namespace
void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
@ -391,96 +406,77 @@ void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
break; break;
} }
// case IR::Opcode::WriteSharedU128: // TODO
case IR::Opcode::WriteSharedU32: case IR::Opcode::WriteSharedU32:
case IR::Opcode::WriteSharedU64: { case IR::Opcode::WriteSharedU64:
// DumpIR(program, "before_walk"); case IR::Opcode::WriteSharedU128: {
// RingAddressInfo address_info = pass.WalkRingAccess(&inst, ir);
const u32 num_dwords = opcode == IR::Opcode::WriteSharedU32 const u32 num_dwords = opcode == IR::Opcode::WriteSharedU32
? 1 ? 1
: (opcode == IR::Opcode::WriteSharedU64 ? 2 : 4); : (opcode == IR::Opcode::WriteSharedU64 ? 2 : 4);
const IR::U32 addr{inst.Arg(0)}; const IR::U32 addr{inst.Arg(0)};
const IR::U32 data{inst.Arg(1)}; const IR::U32 data{inst.Arg(1).Resolve()};
const auto [data_lo, data_hi] = [&] -> std::pair<IR::U32, IR::U32> {
if (num_dwords == 1) {
return {IR::U32{data}, IR::U32{}};
}
const auto* prod = data.InstRecursive();
return {IR::U32{prod->Arg(0)}, IR::U32{prod->Arg(1)}};
}();
const auto SetOutput = [&](IR::U32 addr, IR::U32 value, AttributeRegion output_kind, const auto SetOutput = [&](IR::U32 addr, IR::U32 value, AttributeRegion output_kind,
u32 off_dw = 0) { u32 off_dw) {
const IR::F32 data = ir.BitCast<IR::F32, IR::U32>(value); const IR::F32 data_component = ir.BitCast<IR::F32, IR::U32>(value);
if (output_kind == AttributeRegion::OutputCP) {
if (off_dw > 0) { if (off_dw > 0) {
addr = ir.IAdd(addr, ir.Imm32(off_dw)); addr = ir.IAdd(addr, ir.Imm32(off_dw));
} }
u32 stride = runtime_info.hs_info.hs_output_cp_stride;
if (output_kind == AttributeRegion::OutputCP) {
// Invocation ID array index is implicit, handled by SPIRV backend // Invocation ID array index is implicit, handled by SPIRV backend
IR::U32 addr_for_attrs = TryOptimizeAddressModulo( const IR::U32 addr_for_attrs = TryOptimizeAddressModulo(addr, stride, ir);
addr, runtime_info.hs_info.hs_output_cp_stride, ir); const IR::U32 attr_index = ir.ShiftRightLogical(
ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u));
IR::U32 attr_index = ir.ShiftRightLogical( const IR::U32 comp_index = ir.ShiftRightLogical(
ir.IMod(addr_for_attrs,
ir.Imm32(runtime_info.hs_info.hs_output_cp_stride)),
ir.Imm32(4u));
IR::U32 comp_index = ir.ShiftRightLogical(
ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u)); ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u));
ir.SetTcsGenericAttribute(data, attr_index, comp_index); ir.SetTcsGenericAttribute(data_component, attr_index, comp_index);
} else { } else {
ASSERT(output_kind == AttributeRegion::PatchConst); ASSERT(output_kind == AttributeRegion::PatchConst);
ASSERT_MSG(addr.IsImmediate(), "patch addr non imm, inst {}", ASSERT_MSG(addr.IsImmediate(), "patch addr non imm, inst {}",
fmt::ptr(addr.Inst())); fmt::ptr(addr.Inst()));
ir.SetPatch(IR::PatchGeneric(addr.U32() >> 2), data); ir.SetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw), data_component);
} }
}; };
AttributeRegion region = FindRegionKind(&inst, info, runtime_info); AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
SetOutput(addr, data, region); if (num_dwords == 1) {
if (num_dwords > 1) { SetOutput(addr, data, region, 0);
// TODO handle WriteSharedU128 } else {
SetOutput(addr, data_hi, region, 1); for (auto i = 0; i < num_dwords; i++) {
SetOutput(addr, IR::U32{data.Inst()->Arg(i)}, region, i);
}
} }
inst.Invalidate(); inst.Invalidate();
break; break;
} }
case IR::Opcode::LoadSharedU32: { case IR::Opcode::LoadSharedU32: {
// case IR::Opcode::LoadSharedU64: case IR::Opcode::LoadSharedU64:
// case IR::Opcode::LoadSharedU128: case IR::Opcode::LoadSharedU128:
const IR::U32 addr{inst.Arg(0)}; const IR::U32 addr{inst.Arg(0)};
AttributeRegion region = FindRegionKind(&inst, info, runtime_info); AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32
ASSERT(region == AttributeRegion::InputCP || region == AttributeRegion::OutputCP); ? 1
switch (region) { : (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4);
case AttributeRegion::InputCP: { ASSERT_MSG(region == AttributeRegion::InputCP,
IR::U32 control_point_index = "Unhandled read of output or patchconst attribute in hull shader");
ir.IDiv(addr, ir.Imm32(runtime_info.hs_info.ls_stride)); IR::Value attr_read;
if (num_dwords == 1) {
IR::U32 addr_for_attrs = attr_read = ir.BitCast<IR::U32>(
TryOptimizeAddressModulo(addr, runtime_info.hs_info.ls_stride, ir); ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, 0));
} else {
IR::U32 attr_index = ir.ShiftRightLogical( boost::container::static_vector<IR::Value, 4> read_components;
ir.IMod(addr_for_attrs, ir.Imm32(runtime_info.hs_info.ls_stride)), for (auto i = 0; i < num_dwords; i++) {
ir.Imm32(4u)); const IR::F32 component =
IR::U32 comp_index = ir.ShiftRightLogical( ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, i);
ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u)); read_components.push_back(ir.BitCast<IR::U32>(component));
IR::Value attr_read = }
ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index); attr_read = ir.CompositeConstruct(read_components);
attr_read = ir.BitCast<IR::U32>(IR::F32{attr_read}); }
inst.ReplaceUsesWithAndRemove(attr_read); inst.ReplaceUsesWithAndRemove(attr_read);
break; break;
} }
case AttributeRegion::OutputCP: {
UNREACHABLE_MSG("Unhandled output control point read");
break;
}
default:
break;
}
}
default: default:
break; break;
@ -534,46 +530,36 @@ void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
const auto opcode = inst.GetOpcode(); const auto opcode = inst.GetOpcode();
switch (inst.GetOpcode()) { switch (inst.GetOpcode()) {
case IR::Opcode::LoadSharedU32: { case IR::Opcode::LoadSharedU32: {
// case IR::Opcode::LoadSharedU64: case IR::Opcode::LoadSharedU64:
// case IR::Opcode::LoadSharedU128: // TODO case IR::Opcode::LoadSharedU128:
const IR::U32 addr{inst.Arg(0)}; const IR::U32 addr{inst.Arg(0)};
AttributeRegion region = FindRegionKind(&inst, info, runtime_info); AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32
ASSERT(region == AttributeRegion::OutputCP || ? 1
region == AttributeRegion::PatchConst); : (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4);
switch (region) { const auto GetInput = [&](IR::U32 addr, u32 off_dw) -> IR::F32 {
case AttributeRegion::OutputCP: { if (region == AttributeRegion::OutputCP) {
IR::U32 control_point_index = return ReadTessInputComponent(
ir.IDiv(addr, ir.Imm32(runtime_info.vs_info.hs_output_cp_stride)); addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw);
} else {
IR::U32 addr_for_attrs = TryOptimizeAddressModulo( ASSERT(region == AttributeRegion::PatchConst);
addr, runtime_info.vs_info.hs_output_cp_stride, ir); return ir.GetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw));
}
IR::U32 attr_index = ir.ShiftRightLogical( };
ir.IMod(addr_for_attrs, ir.Imm32(runtime_info.vs_info.hs_output_cp_stride)), IR::Value attr_read;
ir.Imm32(4u)); if (num_dwords == 1) {
IR::U32 comp_index = ir.ShiftRightLogical( attr_read = ir.BitCast<IR::U32>(GetInput(addr, 0));
ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u)); } else {
IR::Value attr_read = boost::container::static_vector<IR::Value, 4> read_components;
ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index); for (auto i = 0; i < num_dwords; i++) {
attr_read = ir.BitCast<IR::U32>(IR::F32{attr_read}); const IR::F32 component = GetInput(addr, i);
read_components.push_back(ir.BitCast<IR::U32>(component));
}
attr_read = ir.CompositeConstruct(read_components);
}
inst.ReplaceUsesWithAndRemove(attr_read); inst.ReplaceUsesWithAndRemove(attr_read);
break; break;
} }
case AttributeRegion::PatchConst: {
// TODO if assert fails then make generic patch attrs into array and dyn index
ASSERT_MSG(addr.IsImmediate(), "patch addr non imm, inst {}",
fmt::ptr(addr.Inst()));
IR::Value get_patch = ir.GetPatch(IR::PatchGeneric(addr.U32() >> 2));
inst.ReplaceUsesWithAndRemove(get_patch);
break;
}
default:
break;
}
break;
}
default: default:
break; break;
} }
@ -638,8 +624,8 @@ void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info) {
auto sharp_location = FindTessConstantSharp(&inst); auto sharp_location = FindTessConstantSharp(&inst);
if (sharp_location && sharp_location->ptr_base == info.tess_consts_ptr_base && if (sharp_location && sharp_location->ptr_base == info.tess_consts_ptr_base &&
sharp_location->dword_off == info.tess_consts_dword_offset) { sharp_location->dword_off == info.tess_consts_dword_offset) {
// Replace the load with a special attribute load (for readability and easier // Replace the load with a special attribute load (for readability and
// pattern matching) // easier pattern matching)
IR::Value index = inst.Arg(1); IR::Value index = inst.Arg(1);
ASSERT_MSG(index.IsImmediate(), ASSERT_MSG(index.IsImmediate(),
@ -766,6 +752,7 @@ void TessellationPostprocess(IR::Program& program, RuntimeInfo& runtime_info) {
} }
} }
// TODO delete
for (IR::Block* block : program.blocks) { for (IR::Block* block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) { for (IR::Inst& inst : block->Instructions()) {
switch (inst.GetOpcode()) { switch (inst.GetOpcode()) {