Base DMA implementation

This commit is contained in:
Lander Gallastegi 2025-04-19 00:17:16 +02:00
parent 94a078207f
commit c077fb97da
10 changed files with 82 additions and 17 deletions

View File

@ -164,7 +164,7 @@ using BufferAlias = EmitContext::BufferAlias;
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) {
const u32 flatbuf_off_dw = inst->Flags<u32>();
const auto& srt_flatbuf = ctx.buffers.back();
const auto& srt_flatbuf = ctx.buffers[ctx.flatbuf_index];
ASSERT(srt_flatbuf.binding >= 0 && flatbuf_off_dw > 0 &&
srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32];

View File

@ -195,7 +195,8 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
}
Id EmitContext::GetBufferSize(const u32 sharp_idx) {
const auto& srt_flatbuf = buffers.back();
// Can this be done with memory access? Like we do now with ReadConst
const auto& srt_flatbuf = buffers[flatbuf_index];
ASSERT(srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32];
@ -693,6 +694,12 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte
case Shader::BufferType::ReadConstUbo:
Name(id, "srt_flatbuf_ubo");
break;
case Shader::BufferType::BdaPagetable:
Name(id, "bda_pagetable");
break;
case Shader::BufferType::FaultReadback:
Name(id, "fault_readback");
break;
case Shader::BufferType::SharedMemory:
Name(id, "ssbo_shmem");
break;
@ -718,6 +725,15 @@ void EmitContext::DefineBuffers() {
const auto buf_sharp = desc.GetSharp(info);
const bool is_storage = desc.IsStorage(buf_sharp, profile);
// Set indexes for special buffers.
if (desc.buffer_type == BufferType::ReadConstUbo) {
flatbuf_index = buffers.size();
} else if (desc.buffer_type == BufferType::BdaPagetable) {
bda_pagetable_index = buffers.size();
} else if (desc.buffer_type == BufferType::FaultReadback) {
fault_readback_index = buffers.size();
}
// Define aliases depending on the shader usage.
auto& spv_buffer = buffers.emplace_back(binding.buffer++, desc.buffer_type);
if (True(desc.used_types & IR::Type::U32)) {

View File

@ -269,6 +269,10 @@ public:
boost::container::small_vector<TextureDefinition, 8> images;
boost::container::small_vector<Id, 4> samplers;
size_t flatbuf_index{};
size_t bda_pagetable_index{};
size_t fault_readback_index{};
Id sampler_type{};
Id sampler_pointer_type{};

View File

@ -39,21 +39,22 @@ void Translator::EmitScalarMemory(const GcnInst& inst) {
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
const auto& smrd = inst.control.smrd;
const u32 dword_offset = [&] -> u32 {
const IR::ScalarReg sbase{inst.src[0].code * 2};
const IR::U32 dword_offset = [&] -> IR::U32 {
if (smrd.imm) {
return smrd.offset;
return ir.Imm32(smrd.offset);
}
if (smrd.offset == SQ_SRC_LITERAL) {
return inst.src[1].code;
return ir.Imm32(inst.src[1].code);
}
UNREACHABLE();
return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2));
}();
const IR::ScalarReg sbase{inst.src[0].code * 2};
const IR::Value base =
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1));
IR::ScalarReg dst_reg{inst.dst[0].code};
for (u32 i = 0; i < num_dwords; i++) {
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(dword_offset + i)));
IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i));
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, index));
}
}

View File

@ -42,6 +42,8 @@ constexpr u32 NUM_TEXTURE_TYPES = 7;
enum class BufferType : u32 {
Guest,
ReadConstUbo,
BdaPagetable,
FaultReadback,
GdsBuffer,
SharedMemory,
};
@ -209,6 +211,7 @@ struct Info {
bool stores_tess_level_inner{};
bool translation_failed{};
bool has_readconst{};
bool uses_dma{};
u8 mrt_mask{0u};
bool has_fetch_shader{false};
u32 fetch_shader_sgpr_base{0u};

View File

@ -87,6 +87,21 @@ void Visit(Info& info, const IR::Inst& inst) {
});
info.has_readconst = true;
}
if (!info.uses_dma) {
// For now, we only need U32, but we may need
// to add more types in the future for other porposes.
info.buffers.push_back({
.used_types = IR::Type::U32,
.inline_cbuf = AmdGpu::Buffer::Null(),
.buffer_type = BufferType::BdaPagetable,
});
info.buffers.push_back({
.used_types = IR::Type::U32,
.inline_cbuf = AmdGpu::Buffer::Null(),
.buffer_type = BufferType::FaultReadback,
});
info.uses_dma = true;
}
break;
case IR::Opcode::PackUfloat10_11_11:
info.uses_pack_10_11_11 = true;

View File

@ -379,10 +379,7 @@ void BufferCache::ImportMemory(u64 start, u64 end) {
}
WriteDataBuffer(bda_pagetable_buffer, range_start * sizeof(vk::DeviceAddress), bda_addrs.data(),
bda_addrs.size() * sizeof(vk::DeviceAddress));
{
std::scoped_lock lk{mutex};
imported_buffers.emplace_back(std::move(buffer));
}
imported_buffers.emplace_back(std::move(buffer));
// Mark the pages as covered
imported_regions += range;
}
@ -850,7 +847,7 @@ void BufferCache::WriteDataBuffer(Buffer& buffer, VAddr address, const void* val
vk::Buffer src_buffer = staging_buffer.Handle();
if (num_bytes < StagingBufferSize) {
const auto [staging, offset] = staging_buffer.Map(num_bytes);
std::memcpy(staging + offset, value, num_bytes);
std::memcpy(staging, value, num_bytes);
copy.srcOffset = offset;
staging_buffer.Commit();
} else {

View File

@ -79,13 +79,13 @@ public:
}
/// Retrieves the device local DBA page table buffer.
[[nodiscard]] Buffer& GetBdaPageTableBuffer() noexcept {
return bda_pagetable_buffer;
[[nodiscard]] Buffer* GetBdaPageTableBuffer() noexcept {
return &bda_pagetable_buffer;
}
/// Retrieves the fault readback buffer.
[[nodiscard]] Buffer& GetFaultReadbackBuffer() noexcept {
return fault_readback_buffer;
[[nodiscard]] Buffer* GetFaultReadbackBuffer() noexcept {
return &fault_readback_buffer;
}
/// Retrieves the buffer with the specified id.

View File

@ -459,9 +459,24 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
stage->PushUd(binding, push_data);
BindBuffers(*stage, binding, push_data);
BindTextures(*stage, binding);
dma_enabled |= stage->uses_dma;
}
pipeline->BindResources(set_writes, buffer_barriers, push_data);
if (dma_enabled) {
// First, import any queued host memory, then sync every mapped
// region that is cached on GPU memory.
buffer_cache.ImportQueuedRegions();
{
std::shared_lock lock(mapped_ranges_mutex);
for (const auto& range : mapped_ranges) {
buffer_cache.SynchronizeRange(range.lower(), range.upper() - range.lower());
}
}
}
return true;
}
@ -526,6 +541,12 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
const u64 offset = vk_buffer.Copy(stage.flattened_ud_buf.data(), ubo_size,
instance.UniformMinAlignment());
buffer_infos.emplace_back(vk_buffer.Handle(), offset, ubo_size);
} else if (desc.buffer_type == Shader::BufferType::BdaPagetable) {
const auto* bda_buffer = buffer_cache.GetBdaPageTableBuffer();
buffer_infos.emplace_back(bda_buffer->Handle(), 0, bda_buffer->SizeBytes());
} else if (desc.buffer_type == Shader::BufferType::FaultReadback) {
const auto* fault_buffer = buffer_cache.GetFaultReadbackBuffer();
buffer_infos.emplace_back(fault_buffer->Handle(), 0, fault_buffer->SizeBytes());
} else if (desc.buffer_type == Shader::BufferType::SharedMemory) {
auto& lds_buffer = buffer_cache.GetStreamBuffer();
const auto& cs_program = liverpool->GetCsRegs();

View File

@ -95,6 +95,13 @@ private:
texture_cache.GetImage(image_id).binding.Reset();
}
bound_images.clear();
if (dma_enabled) {
dma_enabled = false;
// If a shader accesses a buffer that is not cached, we need to
// cache it.
buffer_cache.CreateFaultBuffers();
}
}
bool IsComputeMetaClear(const Pipeline* pipeline);
@ -126,6 +133,7 @@ private:
boost::container::static_vector<BufferBindingInfo, Shader::NumBuffers> buffer_bindings;
using ImageBindingInfo = std::pair<VideoCore::ImageId, VideoCore::TextureCache::TextureDesc>;
boost::container::static_vector<ImageBindingInfo, Shader::NumImages> image_bindings;
bool dma_enabled{false};
};
} // namespace Vulkan