Allow vector and scalar offset in buffer address arg to LoadBuffer/StoreBuffer (#3439)

* Allow vector and scalar offset in buffer address arg to
LoadBuffer/StoreBuffer

* remove is_ring check

* fix atomics and update pattern matching for tess factor stores

* remove old asserts about soffset

* small fixes

* copyright

* Handle sgpr initialization for 2 special hull shader values, including tess factor buffer offset
This commit is contained in:
baggins183
2025-09-03 20:54:23 -07:00
committed by GitHub
parent 59eea3b49e
commit df52585086
14 changed files with 135 additions and 87 deletions

View File

@@ -179,6 +179,14 @@ void Translator::EmitPrologue(IR::Block* first_block) {
// [8:12]: output control point id
ir.SetVectorReg(IR::VectorReg::V1,
ir.GetAttributeU32(IR::Attribute::PackedHullInvocationInfo));
if (runtime_info.hs_info.offchip_lds_enable) {
// No off-chip tessellation has been observed yet. If this survives dead code elim,
// revisit
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::OffChipLdsBase));
}
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::TessFactorsBufferBase));
break;
}
case LogicalStage::TessellationEval:

View File

@@ -202,39 +202,18 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
void Translator::BUFFER_LOAD(u32 num_dwords, bool is_inst_typed, bool is_buffer_typed,
const GcnInst& inst, u32 scalar_width, bool is_signed) {
const auto& mubuf = inst.control.mubuf;
const bool is_ring = mubuf.glc && mubuf.slc && info.l_stage != LogicalStage::Vertex &&
info.l_stage != LogicalStage::Fragment;
const IR::VectorReg vaddr{inst.src[0].code};
const IR::ScalarReg sharp{inst.src[2].code * 4};
const IR::Value soffset{GetSrc(inst.src[3])};
const bool has_soffset = !soffset.IsImmediate() || soffset.U32() != 0;
if (info.stage != Stage::Geometry) {
ASSERT_MSG(!has_soffset || !mubuf.offen,
"Having both scalar and vector offsets is not supported");
}
const IR::Value address = [&] -> IR::Value {
if (is_ring) {
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset);
}
if (mubuf.idxen && mubuf.offen) {
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
}
if (mubuf.idxen && has_soffset) {
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset);
}
if (mubuf.idxen || mubuf.offen) {
return ir.GetVectorReg(vaddr);
}
if (has_soffset) {
return soffset;
}
return {};
}();
const IR::U32 index = mubuf.idxen ? ir.GetVectorReg(vaddr) : ir.Imm32(0);
const IR::VectorReg voffset_vgpr = mubuf.idxen ? vaddr + 1 : vaddr;
const IR::U32 voffset = mubuf.offen ? ir.GetVectorReg(voffset_vgpr) : ir.Imm32(0);
const IR::U32 soffset{GetSrc(inst.src[3])};
const IR::Value address = ir.CompositeConstruct(index, voffset, soffset);
IR::BufferInstInfo buffer_info{};
buffer_info.index_enable.Assign(mubuf.idxen);
buffer_info.offset_enable.Assign(mubuf.offen || has_soffset);
buffer_info.voffset_enable.Assign(mubuf.offen);
buffer_info.inst_offset.Assign(mubuf.offset);
buffer_info.globally_coherent.Assign(mubuf.glc);
buffer_info.system_coherent.Assign(mubuf.slc);
@@ -290,35 +269,18 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_inst_typed, bool is_buffer_
void Translator::BUFFER_STORE(u32 num_dwords, bool is_inst_typed, bool is_buffer_typed,
const GcnInst& inst, u32 scalar_width) {
const auto& mubuf = inst.control.mubuf;
const bool is_ring =
mubuf.glc && mubuf.slc && info.l_stage != LogicalStage::Fragment &&
info.stage !=
Stage::Vertex; // VS passes attributes down with EXPORT, VS HW stage is always present
const IR::VectorReg vaddr{inst.src[0].code};
const IR::ScalarReg sharp{inst.src[2].code * 4};
const IR::Value soffset{GetSrc(inst.src[3])};
if (info.stage != Stage::Export && info.stage != Stage::Hull && info.stage != Stage::Geometry) {
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0,
"Non immediate offset not supported");
}
IR::Value address = [&] -> IR::Value {
if (is_ring) {
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset);
}
if (mubuf.idxen && mubuf.offen) {
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
}
if (mubuf.idxen || mubuf.offen) {
return ir.GetVectorReg(vaddr);
}
return {};
}();
const IR::U32 index = mubuf.idxen ? ir.GetVectorReg(vaddr) : ir.Imm32(0);
const IR::VectorReg voffset_vgpr = mubuf.idxen ? vaddr + 1 : vaddr;
const IR::U32 voffset = mubuf.offen ? ir.GetVectorReg(voffset_vgpr) : ir.Imm32(0);
const IR::U32 soffset{GetSrc(inst.src[3])};
const IR::Value address = ir.CompositeConstruct(index, voffset, soffset);
IR::BufferInstInfo buffer_info{};
buffer_info.index_enable.Assign(mubuf.idxen);
buffer_info.offset_enable.Assign(mubuf.offen);
buffer_info.voffset_enable.Assign(mubuf.offen);
buffer_info.inst_offset.Assign(mubuf.offset);
buffer_info.globally_coherent.Assign(mubuf.glc);
buffer_info.system_coherent.Assign(mubuf.slc);
@@ -377,21 +339,15 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
const IR::VectorReg vaddr{inst.src[0].code};
const IR::VectorReg vdata{inst.src[1].code};
const IR::ScalarReg srsrc{inst.src[2].code * 4};
const IR::Value address = [&] -> IR::Value {
if (mubuf.idxen && mubuf.offen) {
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
}
if (mubuf.idxen || mubuf.offen) {
return ir.GetVectorReg(vaddr);
}
return {};
}();
const IR::U32 index = mubuf.idxen ? ir.GetVectorReg(vaddr) : ir.Imm32(0);
const IR::VectorReg voffset_vgpr = mubuf.idxen ? vaddr + 1 : vaddr;
const IR::U32 voffset = mubuf.offen ? ir.GetVectorReg(voffset_vgpr) : ir.Imm32(0);
const IR::U32 soffset{GetSrc(inst.src[3])};
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
const IR::Value address = ir.CompositeConstruct(index, voffset, soffset);
IR::BufferInstInfo buffer_info{};
buffer_info.index_enable.Assign(mubuf.idxen);
buffer_info.offset_enable.Assign(mubuf.offen);
buffer_info.voffset_enable.Assign(mubuf.offen);
buffer_info.inst_offset.Assign(mubuf.offset);
buffer_info.globally_coherent.Assign(mubuf.glc);
buffer_info.system_coherent.Assign(mubuf.slc);