Preparations for implementing SPV DMA access

This commit is contained in:
Lander Gallastegi 2025-04-19 15:13:14 +02:00
parent c077fb97da
commit 9356779bb3
9 changed files with 127 additions and 62 deletions

View File

@ -298,6 +298,10 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) {
ctx.AddCapability(spv::Capability::Tessellation);
}
if (info.dma_types != IR::Type::Void) {
ctx.AddCapability(spv::Capability::PhysicalStorageBufferAddresses);
ctx.AddExtension("SPV_KHR_physical_storage_buffer");
}
}
void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {

View File

@ -60,7 +60,7 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
}
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32];
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
const auto [scope, semantics]{AtomicArgs(ctx)};
return BufferAtomicU32BoundsCheck(ctx, index, buffer.size_dwords, [&] {
@ -257,7 +257,7 @@ Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id co
Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) {
const auto& buffer = ctx.buffers[binding];
const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32];
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr));
const auto [scope, semantics]{AtomicArgs(ctx)};
return ctx.OpAtomicIIncrement(ctx.U32[1], ptr, scope, semantics);
@ -265,7 +265,7 @@ Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) {
Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding) {
const auto& buffer = ctx.buffers[binding];
const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32];
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr));
const auto [scope, semantics]{AtomicArgs(ctx)};
return ctx.OpAtomicIDecrement(ctx.U32[1], ptr, scope, semantics);

View File

@ -160,14 +160,14 @@ void EmitGetGotoVariable(EmitContext&) {
UNREACHABLE_MSG("Unreachable instruction");
}
using BufferAlias = EmitContext::BufferAlias;
using PointerType = EmitContext::PointerType;
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) {
const u32 flatbuf_off_dw = inst->Flags<u32>();
const auto& srt_flatbuf = ctx.buffers[ctx.flatbuf_index];
ASSERT(srt_flatbuf.binding >= 0 && flatbuf_off_dw > 0 &&
srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32];
const auto [id, pointer_type] = srt_flatbuf[PointerType::U32];
const Id ptr{
ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(flatbuf_off_dw))};
return ctx.OpLoad(ctx.U32[1], ptr);
@ -176,7 +176,7 @@ Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) {
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
const auto& buffer = ctx.buffers[handle];
index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords);
const auto [id, pointer_type] = buffer[BufferAlias::U32];
const auto [id, pointer_type] = buffer[PointerType::U32];
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
const Id result{ctx.OpLoad(ctx.U32[1], ptr)};
@ -430,7 +430,7 @@ static Id EmitLoadBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size,
return result;
}
template <u32 N, BufferAlias alias>
template <u32 N, PointerType alias>
static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
const auto flags = inst->Flags<IR::BufferInstInfo>();
const auto& spv_buffer = ctx.buffers[handle];
@ -438,7 +438,7 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32;
const auto& data_types = alias == PointerType::U32 ? ctx.U32 : ctx.F32;
const auto [id, pointer_type] = spv_buffer[alias];
boost::container::static_vector<Id, N> ids;
@ -449,7 +449,7 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a
if (!flags.typed) {
// Untyped loads have bounds checking per-component.
ids.push_back(EmitLoadBufferBoundsCheck<1>(ctx, index_i, spv_buffer.size_dwords,
result_i, alias == BufferAlias::F32));
result_i, alias == PointerType::F32));
} else {
ids.push_back(result_i);
}
@ -459,7 +459,7 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a
if (flags.typed) {
// Typed loads have single bounds check for the whole load.
return EmitLoadBufferBoundsCheck<N>(ctx, index, spv_buffer.size_dwords, result,
alias == BufferAlias::F32);
alias == PointerType::F32);
}
return result;
}
@ -469,7 +469,7 @@ Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
const auto [id, pointer_type] = spv_buffer[BufferAlias::U8];
const auto [id, pointer_type] = spv_buffer[PointerType::U8];
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, ptr))};
return EmitLoadBufferBoundsCheck<1>(ctx, address, spv_buffer.size, result, false);
@ -480,7 +480,7 @@ Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
const auto [id, pointer_type] = spv_buffer[BufferAlias::U16];
const auto [id, pointer_type] = spv_buffer[PointerType::U16];
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, ptr))};
@ -488,35 +488,35 @@ Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
}
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address);
return EmitLoadBufferB32xN<1, PointerType::U32>(ctx, inst, handle, address);
}
Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address);
return EmitLoadBufferB32xN<2, PointerType::U32>(ctx, inst, handle, address);
}
Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address);
return EmitLoadBufferB32xN<3, PointerType::U32>(ctx, inst, handle, address);
}
Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address);
return EmitLoadBufferB32xN<4, PointerType::U32>(ctx, inst, handle, address);
}
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address);
return EmitLoadBufferB32xN<1, PointerType::F32>(ctx, inst, handle, address);
}
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address);
return EmitLoadBufferB32xN<2, PointerType::F32>(ctx, inst, handle, address);
}
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address);
return EmitLoadBufferB32xN<3, PointerType::F32>(ctx, inst, handle, address);
}
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address);
return EmitLoadBufferB32xN<4, PointerType::F32>(ctx, inst, handle, address);
}
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
@ -546,7 +546,7 @@ void EmitStoreBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto
emit_func();
}
template <u32 N, BufferAlias alias>
template <u32 N, PointerType alias>
static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
Id value) {
const auto flags = inst->Flags<IR::BufferInstInfo>();
@ -555,7 +555,7 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, I
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32;
const auto& data_types = alias == PointerType::U32 ? ctx.U32 : ctx.F32;
const auto [id, pointer_type] = spv_buffer[alias];
auto store = [&] {
@ -586,7 +586,7 @@ void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id v
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
const auto [id, pointer_type] = spv_buffer[BufferAlias::U8];
const auto [id, pointer_type] = spv_buffer[PointerType::U8];
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
const Id result{ctx.OpUConvert(ctx.U8, value)};
EmitStoreBufferBoundsCheck<1>(ctx, address, spv_buffer.size, [&] { ctx.OpStore(ptr, result); });
@ -597,7 +597,7 @@ void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
const auto [id, pointer_type] = spv_buffer[BufferAlias::U16];
const auto [id, pointer_type] = spv_buffer[PointerType::U16];
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
const Id result{ctx.OpUConvert(ctx.U16, value)};
@ -606,35 +606,35 @@ void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id
}
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address, value);
EmitStoreBufferB32xN<1, PointerType::U32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address, value);
EmitStoreBufferB32xN<2, PointerType::U32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address, value);
EmitStoreBufferB32xN<3, PointerType::U32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address, value);
EmitStoreBufferB32xN<4, PointerType::U32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address, value);
EmitStoreBufferB32xN<1, PointerType::F32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address, value);
EmitStoreBufferB32xN<2, PointerType::F32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address, value);
EmitStoreBufferB32xN<3, PointerType::F32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address, value);
EmitStoreBufferB32xN<4, PointerType::F32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {

View File

@ -70,6 +70,12 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf
Bindings& binding_)
: Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} {
if (info.dma_types != IR::Type::Void) {
SetMemoryModel(spv::AddressingModel::PhysicalStorageBuffer64, spv::MemoryModel::GLSL450);
} else {
SetMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450);
}
AddCapability(spv::Capability::Shader);
DefineArithmeticTypes();
DefineInterfaces();
@ -157,6 +163,30 @@ void EmitContext::DefineArithmeticTypes() {
if (info.uses_fp64) {
frexp_result_f64 = Name(TypeStruct(F64[1], S32[1]), "frexp_result_f64");
}
if (True(info.dma_types & IR::Type::F64)) {
physical_pointer_types[PointerType::F64] = TypePointer(spv::StorageClass::PhysicalStorageBuffer, F64[1]);
}
if (True(info.dma_types & IR::Type::U64)) {
physical_pointer_types[PointerType::U64] = TypePointer(spv::StorageClass::PhysicalStorageBuffer, U64);
}
if (True(info.dma_types & IR::Type::F32)) {
physical_pointer_types[PointerType::F32] = TypePointer(spv::StorageClass::PhysicalStorageBuffer, F32[1]);
}
if (True(info.dma_types & IR::Type::U32)) {
physical_pointer_types[PointerType::U32] = TypePointer(spv::StorageClass::PhysicalStorageBuffer, U32[1]);
}
if (True(info.dma_types & IR::Type::F16)) {
physical_pointer_types[PointerType::F16] = TypePointer(spv::StorageClass::PhysicalStorageBuffer, F16[1]);
}
if (True(info.dma_types & IR::Type::U16)) {
physical_pointer_types[PointerType::U32] = TypePointer(spv::StorageClass::PhysicalStorageBuffer, U16);
}
// We allways want U8 if using DMA, for the fault readback buffer
if (info.dma_types != IR::Type::Void) {
physical_pointer_types[PointerType::U32] = TypePointer(spv::StorageClass::PhysicalStorageBuffer, U8);
}
}
void EmitContext::DefineInterfaces() {
@ -198,7 +228,7 @@ Id EmitContext::GetBufferSize(const u32 sharp_idx) {
// Can this be done with memory access? Like we do now with ReadConst
const auto& srt_flatbuf = buffers[flatbuf_index];
ASSERT(srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32];
const auto [id, pointer_type] = srt_flatbuf[PointerType::U32];
const auto rsrc1{
OpLoad(U32[1], OpAccessChain(pointer_type, id, u32_zero_value, ConstU32(sharp_idx + 1)))};
@ -736,20 +766,24 @@ void EmitContext::DefineBuffers() {
// Define aliases depending on the shader usage.
auto& spv_buffer = buffers.emplace_back(binding.buffer++, desc.buffer_type);
if (True(desc.used_types & IR::Type::U64)) {
spv_buffer[PointerType::U64] =
DefineBuffer(is_storage, desc.is_written, 0, desc.buffer_type, U64);
}
if (True(desc.used_types & IR::Type::U32)) {
spv_buffer[BufferAlias::U32] =
spv_buffer[PointerType::U32] =
DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, U32[1]);
}
if (True(desc.used_types & IR::Type::F32)) {
spv_buffer[BufferAlias::F32] =
spv_buffer[PointerType::F32] =
DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, F32[1]);
}
if (True(desc.used_types & IR::Type::U16)) {
spv_buffer[BufferAlias::U16] =
spv_buffer[PointerType::U16] =
DefineBuffer(is_storage, desc.is_written, 1, desc.buffer_type, U16);
}
if (True(desc.used_types & IR::Type::U8)) {
spv_buffer[BufferAlias::U8] =
spv_buffer[PointerType::U8] =
DefineBuffer(is_storage, desc.is_written, 0, desc.buffer_type, U8);
}
++binding.unified;

View File

@ -133,12 +133,24 @@ public:
return ConstantComposite(type, constituents);
}
inline Id OpLabel(std::string_view label_name) {
last_label = Module::OpLabel(label_name);
return last_label;
}
inline Id OpLabel() {
last_label = Module::OpLabel();
return last_label;
}
Info& info;
const RuntimeInfo& runtime_info;
const Profile& profile;
Stage stage;
LogicalStage l_stage{};
Id last_label{};
Id void_id{};
Id U8{};
Id S8{};
@ -231,11 +243,14 @@ public:
bool is_storage = false;
};
enum class BufferAlias : u32 {
enum class PointerType : u32 {
U8,
U16,
F16,
U32,
F32,
U64,
F64,
NumAlias,
};
@ -252,22 +267,35 @@ public:
Id size;
Id size_shorts;
Id size_dwords;
std::array<BufferSpv, u32(BufferAlias::NumAlias)> aliases;
std::array<BufferSpv, u32(PointerType::NumAlias)> aliases;
const BufferSpv& operator[](BufferAlias alias) const {
const BufferSpv& operator[](PointerType alias) const {
return aliases[u32(alias)];
}
BufferSpv& operator[](BufferAlias alias) {
BufferSpv& operator[](PointerType alias) {
return aliases[u32(alias)];
}
};
struct PhysicalPointerTypes {
std::array<Id, u32(PointerType::NumAlias)> types;
const Id& operator[](PointerType type) const {
return types[u32(type)];
}
Id& operator[](PointerType type) {
return types[u32(type)];
}
};
Bindings& binding;
boost::container::small_vector<Id, 16> buf_type_ids;
boost::container::small_vector<BufferDefinition, 16> buffers;
boost::container::small_vector<TextureDefinition, 8> images;
boost::container::small_vector<Id, 4> samplers;
PhysicalPointerTypes physical_pointer_types;
size_t flatbuf_index{};
size_t bda_pagetable_index{};

View File

@ -211,7 +211,7 @@ struct Info {
bool stores_tess_level_inner{};
bool translation_failed{};
bool has_readconst{};
bool uses_dma{};
IR::Type dma_types{IR::Type::Void};
u8 mrt_mask{0u};
bool has_fetch_shader{false};
u32 fetch_shader_sgpr_base{0u};

View File

@ -87,21 +87,7 @@ void Visit(Info& info, const IR::Inst& inst) {
});
info.has_readconst = true;
}
if (!info.uses_dma) {
// For now, we only need U32, but we may need
// to add more types in the future for other porposes.
info.buffers.push_back({
.used_types = IR::Type::U32,
.inline_cbuf = AmdGpu::Buffer::Null(),
.buffer_type = BufferType::BdaPagetable,
});
info.buffers.push_back({
.used_types = IR::Type::U32,
.inline_cbuf = AmdGpu::Buffer::Null(),
.buffer_type = BufferType::FaultReadback,
});
info.uses_dma = true;
}
info.dma_types |= IR::Type::U32;
break;
case IR::Opcode::PackUfloat10_11_11:
info.uses_pack_10_11_11 = true;
@ -120,6 +106,19 @@ void CollectShaderInfoPass(IR::Program& program) {
Visit(program.info, inst);
}
}
if (program.info.dma_types != IR::Type::Void) {
program.info.buffers.push_back({
.used_types = IR::Type::U64,
.inline_cbuf = AmdGpu::Buffer::Null(),
.buffer_type = BufferType::BdaPagetable,
});
program.info.buffers.push_back({
.used_types = IR::Type::U8,
.inline_cbuf = AmdGpu::Buffer::Null(),
.buffer_type = BufferType::FaultReadback,
});
}
}
} // namespace Shader::Optimization

View File

@ -372,10 +372,10 @@ void BufferCache::ImportMemory(u64 start, u64 end) {
bda_addrs.clear();
bda_addrs.reserve(range_pages);
for (u64 i = 0; i < range_pages; ++i) {
// Mark the page as host imported to let the shader know
// Don't mark the page as GPU local to let the shader know
// so that it can notify us if it accesses the page, so we can
// create a GPU local buffer.
bda_addrs.push_back((bda_addr + (i << CACHING_PAGEBITS)) | 0x1);
bda_addrs.push_back(bda_addr + (i << CACHING_PAGEBITS));
}
WriteDataBuffer(bda_pagetable_buffer, range_start * sizeof(vk::DeviceAddress), bda_addrs.data(),
bda_addrs.size() * sizeof(vk::DeviceAddress));
@ -531,8 +531,8 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
const u64 size_pages = size >> CACHING_PAGEBITS;
bda_addrs.reserve(size_pages);
for (u64 i = 0; i < size_pages; ++i) {
// Here, we do not set the host imported bit.
bda_addrs.push_back(new_buffer.BufferDeviceAddress() + (i << CACHING_PAGEBITS));
// Here, we mark the page as backed by a GPU local buffer
bda_addrs.push_back((new_buffer.BufferDeviceAddress() + (i << CACHING_PAGEBITS)) | 0x1);
}
WriteDataBuffer(bda_pagetable_buffer, start_page * sizeof(vk::DeviceAddress), bda_addrs.data(),
bda_addrs.size() * sizeof(vk::DeviceAddress));

View File

@ -460,7 +460,7 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
BindBuffers(*stage, binding, push_data);
BindTextures(*stage, binding);
dma_enabled |= stage->uses_dma;
dma_enabled |= stage->dma_types != Shader::IR::Type::Void;
}
pipeline->BindResources(set_writes, buffer_barriers, push_data);