mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-27 12:34:37 +00:00
Preparations for implementing SPV DMA access
This commit is contained in:
parent
c077fb97da
commit
9356779bb3
@ -298,6 +298,10 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
|
||||
if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) {
|
||||
ctx.AddCapability(spv::Capability::Tessellation);
|
||||
}
|
||||
if (info.dma_types != IR::Type::Void) {
|
||||
ctx.AddCapability(spv::Capability::PhysicalStorageBufferAddresses);
|
||||
ctx.AddExtension("SPV_KHR_physical_storage_buffer");
|
||||
}
|
||||
}
|
||||
|
||||
void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
|
||||
|
@ -60,7 +60,7 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
}
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32];
|
||||
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return BufferAtomicU32BoundsCheck(ctx, index, buffer.size_dwords, [&] {
|
||||
@ -257,7 +257,7 @@ Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id co
|
||||
|
||||
Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) {
|
||||
const auto& buffer = ctx.buffers[binding];
|
||||
const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32];
|
||||
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr));
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return ctx.OpAtomicIIncrement(ctx.U32[1], ptr, scope, semantics);
|
||||
@ -265,7 +265,7 @@ Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) {
|
||||
|
||||
Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding) {
|
||||
const auto& buffer = ctx.buffers[binding];
|
||||
const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32];
|
||||
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr));
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return ctx.OpAtomicIDecrement(ctx.U32[1], ptr, scope, semantics);
|
||||
|
@ -160,14 +160,14 @@ void EmitGetGotoVariable(EmitContext&) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
using BufferAlias = EmitContext::BufferAlias;
|
||||
using PointerType = EmitContext::PointerType;
|
||||
|
||||
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) {
|
||||
const u32 flatbuf_off_dw = inst->Flags<u32>();
|
||||
const auto& srt_flatbuf = ctx.buffers[ctx.flatbuf_index];
|
||||
ASSERT(srt_flatbuf.binding >= 0 && flatbuf_off_dw > 0 &&
|
||||
srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
|
||||
const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32];
|
||||
const auto [id, pointer_type] = srt_flatbuf[PointerType::U32];
|
||||
const Id ptr{
|
||||
ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(flatbuf_off_dw))};
|
||||
return ctx.OpLoad(ctx.U32[1], ptr);
|
||||
@ -176,7 +176,7 @@ Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) {
|
||||
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords);
|
||||
const auto [id, pointer_type] = buffer[BufferAlias::U32];
|
||||
const auto [id, pointer_type] = buffer[PointerType::U32];
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||
const Id result{ctx.OpLoad(ctx.U32[1], ptr)};
|
||||
|
||||
@ -430,7 +430,7 @@ static Id EmitLoadBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size,
|
||||
return result;
|
||||
}
|
||||
|
||||
template <u32 N, BufferAlias alias>
|
||||
template <u32 N, PointerType alias>
|
||||
static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
const auto flags = inst->Flags<IR::BufferInstInfo>();
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
@ -438,7 +438,7 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32;
|
||||
const auto& data_types = alias == PointerType::U32 ? ctx.U32 : ctx.F32;
|
||||
const auto [id, pointer_type] = spv_buffer[alias];
|
||||
|
||||
boost::container::static_vector<Id, N> ids;
|
||||
@ -449,7 +449,7 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a
|
||||
if (!flags.typed) {
|
||||
// Untyped loads have bounds checking per-component.
|
||||
ids.push_back(EmitLoadBufferBoundsCheck<1>(ctx, index_i, spv_buffer.size_dwords,
|
||||
result_i, alias == BufferAlias::F32));
|
||||
result_i, alias == PointerType::F32));
|
||||
} else {
|
||||
ids.push_back(result_i);
|
||||
}
|
||||
@ -459,7 +459,7 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a
|
||||
if (flags.typed) {
|
||||
// Typed loads have single bounds check for the whole load.
|
||||
return EmitLoadBufferBoundsCheck<N>(ctx, index, spv_buffer.size_dwords, result,
|
||||
alias == BufferAlias::F32);
|
||||
alias == PointerType::F32);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@ -469,7 +469,7 @@ Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const auto [id, pointer_type] = spv_buffer[BufferAlias::U8];
|
||||
const auto [id, pointer_type] = spv_buffer[PointerType::U8];
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
|
||||
const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, ptr))};
|
||||
return EmitLoadBufferBoundsCheck<1>(ctx, address, spv_buffer.size, result, false);
|
||||
@ -480,7 +480,7 @@ Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const auto [id, pointer_type] = spv_buffer[BufferAlias::U16];
|
||||
const auto [id, pointer_type] = spv_buffer[PointerType::U16];
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||
const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, ptr))};
|
||||
@ -488,35 +488,35 @@ Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address);
|
||||
return EmitLoadBufferB32xN<1, PointerType::U32>(ctx, inst, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address);
|
||||
return EmitLoadBufferB32xN<2, PointerType::U32>(ctx, inst, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address);
|
||||
return EmitLoadBufferB32xN<3, PointerType::U32>(ctx, inst, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address);
|
||||
return EmitLoadBufferB32xN<4, PointerType::U32>(ctx, inst, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address);
|
||||
return EmitLoadBufferB32xN<1, PointerType::F32>(ctx, inst, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address);
|
||||
return EmitLoadBufferB32xN<2, PointerType::F32>(ctx, inst, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address);
|
||||
return EmitLoadBufferB32xN<3, PointerType::F32>(ctx, inst, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return EmitLoadBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address);
|
||||
return EmitLoadBufferB32xN<4, PointerType::F32>(ctx, inst, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
@ -546,7 +546,7 @@ void EmitStoreBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto
|
||||
emit_func();
|
||||
}
|
||||
|
||||
template <u32 N, BufferAlias alias>
|
||||
template <u32 N, PointerType alias>
|
||||
static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
|
||||
Id value) {
|
||||
const auto flags = inst->Flags<IR::BufferInstInfo>();
|
||||
@ -555,7 +555,7 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, I
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32;
|
||||
const auto& data_types = alias == PointerType::U32 ? ctx.U32 : ctx.F32;
|
||||
const auto [id, pointer_type] = spv_buffer[alias];
|
||||
|
||||
auto store = [&] {
|
||||
@ -586,7 +586,7 @@ void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id v
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const auto [id, pointer_type] = spv_buffer[BufferAlias::U8];
|
||||
const auto [id, pointer_type] = spv_buffer[PointerType::U8];
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
|
||||
const Id result{ctx.OpUConvert(ctx.U8, value)};
|
||||
EmitStoreBufferBoundsCheck<1>(ctx, address, spv_buffer.size, [&] { ctx.OpStore(ptr, result); });
|
||||
@ -597,7 +597,7 @@ void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const auto [id, pointer_type] = spv_buffer[BufferAlias::U16];
|
||||
const auto [id, pointer_type] = spv_buffer[PointerType::U16];
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||
const Id result{ctx.OpUConvert(ctx.U16, value)};
|
||||
@ -606,35 +606,35 @@ void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address, value);
|
||||
EmitStoreBufferB32xN<1, PointerType::U32>(ctx, inst, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address, value);
|
||||
EmitStoreBufferB32xN<2, PointerType::U32>(ctx, inst, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address, value);
|
||||
EmitStoreBufferB32xN<3, PointerType::U32>(ctx, inst, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address, value);
|
||||
EmitStoreBufferB32xN<4, PointerType::U32>(ctx, inst, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address, value);
|
||||
EmitStoreBufferB32xN<1, PointerType::F32>(ctx, inst, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address, value);
|
||||
EmitStoreBufferB32xN<2, PointerType::F32>(ctx, inst, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address, value);
|
||||
EmitStoreBufferB32xN<3, PointerType::F32>(ctx, inst, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address, value);
|
||||
EmitStoreBufferB32xN<4, PointerType::F32>(ctx, inst, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
|
@ -70,6 +70,12 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf
|
||||
Bindings& binding_)
|
||||
: Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
|
||||
profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} {
|
||||
if (info.dma_types != IR::Type::Void) {
|
||||
SetMemoryModel(spv::AddressingModel::PhysicalStorageBuffer64, spv::MemoryModel::GLSL450);
|
||||
} else {
|
||||
SetMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450);
|
||||
}
|
||||
|
||||
AddCapability(spv::Capability::Shader);
|
||||
DefineArithmeticTypes();
|
||||
DefineInterfaces();
|
||||
@ -157,6 +163,30 @@ void EmitContext::DefineArithmeticTypes() {
|
||||
if (info.uses_fp64) {
|
||||
frexp_result_f64 = Name(TypeStruct(F64[1], S32[1]), "frexp_result_f64");
|
||||
}
|
||||
|
||||
if (True(info.dma_types & IR::Type::F64)) {
|
||||
physical_pointer_types[PointerType::F64] = TypePointer(spv::StorageClass::PhysicalStorageBuffer, F64[1]);
|
||||
}
|
||||
if (True(info.dma_types & IR::Type::U64)) {
|
||||
physical_pointer_types[PointerType::U64] = TypePointer(spv::StorageClass::PhysicalStorageBuffer, U64);
|
||||
}
|
||||
if (True(info.dma_types & IR::Type::F32)) {
|
||||
physical_pointer_types[PointerType::F32] = TypePointer(spv::StorageClass::PhysicalStorageBuffer, F32[1]);
|
||||
}
|
||||
if (True(info.dma_types & IR::Type::U32)) {
|
||||
physical_pointer_types[PointerType::U32] = TypePointer(spv::StorageClass::PhysicalStorageBuffer, U32[1]);
|
||||
}
|
||||
if (True(info.dma_types & IR::Type::F16)) {
|
||||
physical_pointer_types[PointerType::F16] = TypePointer(spv::StorageClass::PhysicalStorageBuffer, F16[1]);
|
||||
}
|
||||
if (True(info.dma_types & IR::Type::U16)) {
|
||||
physical_pointer_types[PointerType::U32] = TypePointer(spv::StorageClass::PhysicalStorageBuffer, U16);
|
||||
}
|
||||
|
||||
// We allways want U8 if using DMA, for the fault readback buffer
|
||||
if (info.dma_types != IR::Type::Void) {
|
||||
physical_pointer_types[PointerType::U32] = TypePointer(spv::StorageClass::PhysicalStorageBuffer, U8);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineInterfaces() {
|
||||
@ -198,7 +228,7 @@ Id EmitContext::GetBufferSize(const u32 sharp_idx) {
|
||||
// Can this be done with memory access? Like we do now with ReadConst
|
||||
const auto& srt_flatbuf = buffers[flatbuf_index];
|
||||
ASSERT(srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
|
||||
const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32];
|
||||
const auto [id, pointer_type] = srt_flatbuf[PointerType::U32];
|
||||
|
||||
const auto rsrc1{
|
||||
OpLoad(U32[1], OpAccessChain(pointer_type, id, u32_zero_value, ConstU32(sharp_idx + 1)))};
|
||||
@ -736,20 +766,24 @@ void EmitContext::DefineBuffers() {
|
||||
|
||||
// Define aliases depending on the shader usage.
|
||||
auto& spv_buffer = buffers.emplace_back(binding.buffer++, desc.buffer_type);
|
||||
if (True(desc.used_types & IR::Type::U64)) {
|
||||
spv_buffer[PointerType::U64] =
|
||||
DefineBuffer(is_storage, desc.is_written, 0, desc.buffer_type, U64);
|
||||
}
|
||||
if (True(desc.used_types & IR::Type::U32)) {
|
||||
spv_buffer[BufferAlias::U32] =
|
||||
spv_buffer[PointerType::U32] =
|
||||
DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, U32[1]);
|
||||
}
|
||||
if (True(desc.used_types & IR::Type::F32)) {
|
||||
spv_buffer[BufferAlias::F32] =
|
||||
spv_buffer[PointerType::F32] =
|
||||
DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, F32[1]);
|
||||
}
|
||||
if (True(desc.used_types & IR::Type::U16)) {
|
||||
spv_buffer[BufferAlias::U16] =
|
||||
spv_buffer[PointerType::U16] =
|
||||
DefineBuffer(is_storage, desc.is_written, 1, desc.buffer_type, U16);
|
||||
}
|
||||
if (True(desc.used_types & IR::Type::U8)) {
|
||||
spv_buffer[BufferAlias::U8] =
|
||||
spv_buffer[PointerType::U8] =
|
||||
DefineBuffer(is_storage, desc.is_written, 0, desc.buffer_type, U8);
|
||||
}
|
||||
++binding.unified;
|
||||
|
@ -133,12 +133,24 @@ public:
|
||||
return ConstantComposite(type, constituents);
|
||||
}
|
||||
|
||||
inline Id OpLabel(std::string_view label_name) {
|
||||
last_label = Module::OpLabel(label_name);
|
||||
return last_label;
|
||||
}
|
||||
|
||||
inline Id OpLabel() {
|
||||
last_label = Module::OpLabel();
|
||||
return last_label;
|
||||
}
|
||||
|
||||
Info& info;
|
||||
const RuntimeInfo& runtime_info;
|
||||
const Profile& profile;
|
||||
Stage stage;
|
||||
LogicalStage l_stage{};
|
||||
|
||||
Id last_label{};
|
||||
|
||||
Id void_id{};
|
||||
Id U8{};
|
||||
Id S8{};
|
||||
@ -231,11 +243,14 @@ public:
|
||||
bool is_storage = false;
|
||||
};
|
||||
|
||||
enum class BufferAlias : u32 {
|
||||
enum class PointerType : u32 {
|
||||
U8,
|
||||
U16,
|
||||
F16,
|
||||
U32,
|
||||
F32,
|
||||
U64,
|
||||
F64,
|
||||
NumAlias,
|
||||
};
|
||||
|
||||
@ -252,22 +267,35 @@ public:
|
||||
Id size;
|
||||
Id size_shorts;
|
||||
Id size_dwords;
|
||||
std::array<BufferSpv, u32(BufferAlias::NumAlias)> aliases;
|
||||
std::array<BufferSpv, u32(PointerType::NumAlias)> aliases;
|
||||
|
||||
const BufferSpv& operator[](BufferAlias alias) const {
|
||||
const BufferSpv& operator[](PointerType alias) const {
|
||||
return aliases[u32(alias)];
|
||||
}
|
||||
|
||||
BufferSpv& operator[](BufferAlias alias) {
|
||||
BufferSpv& operator[](PointerType alias) {
|
||||
return aliases[u32(alias)];
|
||||
}
|
||||
};
|
||||
|
||||
struct PhysicalPointerTypes {
|
||||
std::array<Id, u32(PointerType::NumAlias)> types;
|
||||
|
||||
const Id& operator[](PointerType type) const {
|
||||
return types[u32(type)];
|
||||
}
|
||||
|
||||
Id& operator[](PointerType type) {
|
||||
return types[u32(type)];
|
||||
}
|
||||
};
|
||||
|
||||
Bindings& binding;
|
||||
boost::container::small_vector<Id, 16> buf_type_ids;
|
||||
boost::container::small_vector<BufferDefinition, 16> buffers;
|
||||
boost::container::small_vector<TextureDefinition, 8> images;
|
||||
boost::container::small_vector<Id, 4> samplers;
|
||||
PhysicalPointerTypes physical_pointer_types;
|
||||
|
||||
size_t flatbuf_index{};
|
||||
size_t bda_pagetable_index{};
|
||||
|
@ -211,7 +211,7 @@ struct Info {
|
||||
bool stores_tess_level_inner{};
|
||||
bool translation_failed{};
|
||||
bool has_readconst{};
|
||||
bool uses_dma{};
|
||||
IR::Type dma_types{IR::Type::Void};
|
||||
u8 mrt_mask{0u};
|
||||
bool has_fetch_shader{false};
|
||||
u32 fetch_shader_sgpr_base{0u};
|
||||
|
@ -87,21 +87,7 @@ void Visit(Info& info, const IR::Inst& inst) {
|
||||
});
|
||||
info.has_readconst = true;
|
||||
}
|
||||
if (!info.uses_dma) {
|
||||
// For now, we only need U32, but we may need
|
||||
// to add more types in the future for other porposes.
|
||||
info.buffers.push_back({
|
||||
.used_types = IR::Type::U32,
|
||||
.inline_cbuf = AmdGpu::Buffer::Null(),
|
||||
.buffer_type = BufferType::BdaPagetable,
|
||||
});
|
||||
info.buffers.push_back({
|
||||
.used_types = IR::Type::U32,
|
||||
.inline_cbuf = AmdGpu::Buffer::Null(),
|
||||
.buffer_type = BufferType::FaultReadback,
|
||||
});
|
||||
info.uses_dma = true;
|
||||
}
|
||||
info.dma_types |= IR::Type::U32;
|
||||
break;
|
||||
case IR::Opcode::PackUfloat10_11_11:
|
||||
info.uses_pack_10_11_11 = true;
|
||||
@ -120,6 +106,19 @@ void CollectShaderInfoPass(IR::Program& program) {
|
||||
Visit(program.info, inst);
|
||||
}
|
||||
}
|
||||
|
||||
if (program.info.dma_types != IR::Type::Void) {
|
||||
program.info.buffers.push_back({
|
||||
.used_types = IR::Type::U64,
|
||||
.inline_cbuf = AmdGpu::Buffer::Null(),
|
||||
.buffer_type = BufferType::BdaPagetable,
|
||||
});
|
||||
program.info.buffers.push_back({
|
||||
.used_types = IR::Type::U8,
|
||||
.inline_cbuf = AmdGpu::Buffer::Null(),
|
||||
.buffer_type = BufferType::FaultReadback,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
@ -372,10 +372,10 @@ void BufferCache::ImportMemory(u64 start, u64 end) {
|
||||
bda_addrs.clear();
|
||||
bda_addrs.reserve(range_pages);
|
||||
for (u64 i = 0; i < range_pages; ++i) {
|
||||
// Mark the page as host imported to let the shader know
|
||||
// Don't mark the page as GPU local to let the shader know
|
||||
// so that it can notify us if it accesses the page, so we can
|
||||
// create a GPU local buffer.
|
||||
bda_addrs.push_back((bda_addr + (i << CACHING_PAGEBITS)) | 0x1);
|
||||
bda_addrs.push_back(bda_addr + (i << CACHING_PAGEBITS));
|
||||
}
|
||||
WriteDataBuffer(bda_pagetable_buffer, range_start * sizeof(vk::DeviceAddress), bda_addrs.data(),
|
||||
bda_addrs.size() * sizeof(vk::DeviceAddress));
|
||||
@ -531,8 +531,8 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
|
||||
const u64 size_pages = size >> CACHING_PAGEBITS;
|
||||
bda_addrs.reserve(size_pages);
|
||||
for (u64 i = 0; i < size_pages; ++i) {
|
||||
// Here, we do not set the host imported bit.
|
||||
bda_addrs.push_back(new_buffer.BufferDeviceAddress() + (i << CACHING_PAGEBITS));
|
||||
// Here, we mark the page as backed by a GPU local buffer
|
||||
bda_addrs.push_back((new_buffer.BufferDeviceAddress() + (i << CACHING_PAGEBITS)) | 0x1);
|
||||
}
|
||||
WriteDataBuffer(bda_pagetable_buffer, start_page * sizeof(vk::DeviceAddress), bda_addrs.data(),
|
||||
bda_addrs.size() * sizeof(vk::DeviceAddress));
|
||||
|
@ -460,7 +460,7 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
||||
BindBuffers(*stage, binding, push_data);
|
||||
BindTextures(*stage, binding);
|
||||
|
||||
dma_enabled |= stage->uses_dma;
|
||||
dma_enabled |= stage->dma_types != Shader::IR::Type::Void;
|
||||
}
|
||||
|
||||
pipeline->BindResources(set_writes, buffer_barriers, push_data);
|
||||
|
Loading…
Reference in New Issue
Block a user