shader_recompiler: Simplify dma types

Only U32 is needed for S_LOAD_DWORD
This commit is contained in:
IndecisiveTurtle 2025-06-25 22:38:17 +03:00
parent 6eaec7a004
commit a0c1542691
6 changed files with 23 additions and 84 deletions

View File

@ -300,7 +300,7 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) {
ctx.AddCapability(spv::Capability::Tessellation);
}
if (info.dma_types != IR::Type::Void) {
if (info.uses_dma) {
ctx.AddCapability(spv::Capability::PhysicalStorageBufferAddresses);
ctx.AddExtension("SPV_KHR_physical_storage_buffer");
}

View File

@ -71,7 +71,7 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf
Bindings& binding_)
: Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} {
if (info.dma_types != IR::Type::Void) {
if (info.uses_dma) {
SetMemoryModel(spv::AddressingModel::PhysicalStorageBuffer64, spv::MemoryModel::GLSL450);
} else {
SetMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450);
@ -169,34 +169,8 @@ void EmitContext::DefineArithmeticTypes() {
if (info.uses_fp64) {
frexp_result_f64 = Name(TypeStruct(F64[1], S32[1]), "frexp_result_f64");
}
if (True(info.dma_types & IR::Type::F64)) {
physical_pointer_types[PointerType::F64] =
TypePointer(spv::StorageClass::PhysicalStorageBuffer, F64[1]);
}
if (True(info.dma_types & IR::Type::U64)) {
physical_pointer_types[PointerType::U64] =
TypePointer(spv::StorageClass::PhysicalStorageBuffer, U64);
}
if (True(info.dma_types & IR::Type::F32)) {
physical_pointer_types[PointerType::F32] =
TypePointer(spv::StorageClass::PhysicalStorageBuffer, F32[1]);
}
if (True(info.dma_types & IR::Type::U32)) {
physical_pointer_types[PointerType::U32] =
TypePointer(spv::StorageClass::PhysicalStorageBuffer, U32[1]);
}
if (True(info.dma_types & IR::Type::F16)) {
physical_pointer_types[PointerType::F16] =
TypePointer(spv::StorageClass::PhysicalStorageBuffer, F16[1]);
}
if (True(info.dma_types & IR::Type::U16)) {
physical_pointer_types[PointerType::U16] =
TypePointer(spv::StorageClass::PhysicalStorageBuffer, U16);
}
if (True(info.dma_types & IR::Type::U8)) {
physical_pointer_types[PointerType::U8] =
TypePointer(spv::StorageClass::PhysicalStorageBuffer, U8);
if (info.uses_dma) {
physical_pointer_type_u32 = TypePointer(spv::StorageClass::PhysicalStorageBuffer, U32[1]);
}
}
@ -272,8 +246,7 @@ void EmitContext::DefineBufferProperties() {
buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U));
Name(buffer.offset_dwords, fmt::format("buf{}_dword_off", binding));
// Only need to load size if performing bounds checks and the buffer is both guest and not
// inline.
// Only load size if performing bounds checks and the buffer is both guest and not inline.
if (!profile.supports_robust_buffer_access && buffer.buffer_type == BufferType::Guest) {
const BufferResource& desc = info.buffers[i];
if (desc.sharp_idx == std::numeric_limits<u32>::max()) {
@ -1211,7 +1184,7 @@ Id EmitContext::DefineReadConst(bool dynamic) {
const auto offset_bytes{OpShiftLeftLogical(U32[1], offset, ConstU32(2U))};
const auto addr{OpIAdd(U64, base_addr, OpUConvert(U64, offset_bytes))};
const auto result = EmitMemoryRead(U32[1], addr, [&]() {
const auto result = EmitDwordMemoryRead(addr, [&]() {
if (dynamic) {
return u32_zero_value;
} else {
@ -1239,7 +1212,7 @@ void EmitContext::DefineFunctions() {
uf11_to_f32 = DefineUfloatM5ToFloat32(6, "uf11_to_f32");
uf10_to_f32 = DefineUfloatM5ToFloat32(5, "uf10_to_f32");
}
if (info.dma_types != IR::Type::Void) {
if (info.uses_dma) {
get_bda_pointer = DefineGetBdaPointer();
}

View File

@ -155,25 +155,7 @@ public:
return last_label;
}
PointerType PointerTypeFromType(Id type) {
if (type.value == U8.value)
return PointerType::U8;
if (type.value == U16.value)
return PointerType::U16;
if (type.value == F16[1].value)
return PointerType::F16;
if (type.value == U32[1].value)
return PointerType::U32;
if (type.value == F32[1].value)
return PointerType::F32;
if (type.value == U64.value)
return PointerType::U64;
if (type.value == F64[1].value)
return PointerType::F64;
UNREACHABLE_MSG("Unknown type for pointer");
}
Id EmitMemoryRead(Id type, Id address, auto&& fallback) {
Id EmitDwordMemoryRead(Id address, auto&& fallback) {
const Id available_label = OpLabel();
const Id fallback_label = OpLabel();
const Id merge_label = OpLabel();
@ -185,10 +167,8 @@ public:
// Available
AddLabel(available_label);
const auto pointer_type = PointerTypeFromType(type);
const Id pointer_type_id = physical_pointer_types[pointer_type];
const Id addr_ptr = OpConvertUToPtr(pointer_type_id, addr);
const Id result = OpLoad(type, addr_ptr, spv::MemoryAccessMask::Aligned, 4u);
const Id addr_ptr = OpConvertUToPtr(physical_pointer_type_u32, addr);
const Id result = OpLoad(U32[1], addr_ptr, spv::MemoryAccessMask::Aligned, 4u);
OpBranch(merge_label);
// Fallback
@ -199,7 +179,7 @@ public:
// Merge
AddLabel(merge_label);
const Id final_result =
OpPhi(type, fallback_result, fallback_label, result, available_label);
OpPhi(U32[1], fallback_result, fallback_label, result, available_label);
return final_result;
}
@ -339,29 +319,17 @@ public:
}
};
struct PhysicalPointerTypes {
std::array<Id, u32(PointerType::NumAlias)> types;
const Id& operator[](PointerType type) const {
return types[u32(type)];
}
Id& operator[](PointerType type) {
return types[u32(type)];
}
};
Bindings& binding;
boost::container::small_vector<Id, 16> buf_type_ids;
boost::container::small_vector<BufferDefinition, 16> buffers;
boost::container::small_vector<TextureDefinition, 8> images;
boost::container::small_vector<Id, 4> samplers;
PhysicalPointerTypes physical_pointer_types;
std::unordered_map<u32, Id> first_to_last_label_map;
size_t flatbuf_index{};
size_t bda_pagetable_index{};
size_t fault_buffer_index{};
Id physical_pointer_type_u32;
Id sampler_type{};
Id sampler_pointer_type{};

View File

@ -238,7 +238,7 @@ struct Info {
Dynamic = 1 << 1,
};
ReadConstType readconst_types{};
IR::Type dma_types{IR::Type::Void};
bool uses_dma{false};
explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params)
: stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},

View File

@ -102,7 +102,7 @@ void Visit(Info& info, const IR::Inst& inst) {
info.uses_lane_id = true;
break;
case IR::Opcode::ReadConst:
if (info.readconst_types == Info::ReadConstType::None) {
if (!info.uses_dma) {
info.buffers.push_back({
.used_types = IR::Type::U32,
// We can't guarantee that flatbuf will not grow past UBO
@ -116,7 +116,7 @@ void Visit(Info& info, const IR::Inst& inst) {
} else {
info.readconst_types |= Info::ReadConstType::Dynamic;
}
info.dma_types |= IR::Type::U32;
info.uses_dma = true;
break;
case IR::Opcode::PackUfloat10_11_11:
info.uses_pack_10_11_11 = true;
@ -130,20 +130,21 @@ void Visit(Info& info, const IR::Inst& inst) {
}
void CollectShaderInfoPass(IR::Program& program) {
auto& info = program.info;
for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) {
Visit(program.info, inst);
Visit(info, inst);
}
}
if (program.info.dma_types != IR::Type::Void) {
program.info.buffers.push_back({
if (info.uses_dma) {
info.buffers.push_back({
.used_types = IR::Type::U64,
.inline_cbuf = AmdGpu::Buffer::Placeholder(VideoCore::BufferCache::BDA_PAGETABLE_SIZE),
.buffer_type = BufferType::BdaPagetable,
.is_written = true,
});
program.info.buffers.push_back({
info.buffers.push_back({
.used_types = IR::Type::U8,
.inline_cbuf = AmdGpu::Buffer::Placeholder(VideoCore::BufferCache::FAULT_BUFFER_SIZE),
.buffer_type = BufferType::FaultBuffer,

View File

@ -468,17 +468,12 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
stage->PushUd(binding, push_data);
BindBuffers(*stage, binding, push_data);
BindTextures(*stage, binding);
uses_dma |= stage->dma_types != Shader::IR::Type::Void;
uses_dma |= stage->uses_dma;
}
pipeline->BindResources(set_writes, buffer_barriers, push_data);
if (uses_dma && !fault_process_pending) {
// We only use fault buffer for DMA right now.
{
// TODO: GPU might have written to memory (for example with EVENT_WRITE_EOP)
// we need to account for that and synchronize.
Common::RecursiveSharedLock lock{mapped_ranges_mutex};
for (auto& range : mapped_ranges) {
buffer_cache.SynchronizeBuffersInRange(range.lower(),
@ -490,6 +485,8 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
fault_process_pending |= uses_dma;
pipeline->BindResources(set_writes, buffer_barriers, push_data);
return true;
}