shader_recompiler: Perform address shift on IR level

Buffer instructions now expect address in the data unit they work on. Doing the shift on IR level will allow us to optimize some operations away on common case
This commit is contained in:
IndecisiveTurtle 2025-06-26 01:18:01 +03:00
parent a0c1542691
commit 6fa5f51702
8 changed files with 216 additions and 144 deletions

View File

@ -7,7 +7,11 @@
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
namespace Shader::Backend::SPIRV {
namespace {
using PointerType = EmitContext::PointerType;
using PointerSize = EmitContext::PointerSize;
std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
const Id scope{ctx.ConstU32(static_cast<u32>(spv::Scope::Device))};
const Id semantics{ctx.u32_zero_value};
@ -61,14 +65,13 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
return ctx.U32[1];
}
}();
if (Sirit::ValidId(buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, offset);
}
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
const auto [id, pointer_type] = buffer.Alias(PointerType::U32);
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address);
const auto [scope, semantics]{AtomicArgs(ctx)};
return AccessBoundsCheck<32, 1, is_float>(ctx, index, buffer.size_dwords, [&] {
return AccessBoundsCheck<32, 1, is_float>(ctx, address, buffer.Size(PointerSize::B32), [&] {
return (ctx.*atomic_func)(type, ptr, scope, semantics, value);
});
}
@ -76,14 +79,13 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
Id BufferAtomicU32IncDec(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) {
const auto& buffer = ctx.buffers[handle];
if (Sirit::ValidId(buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, offset);
}
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
const auto [id, pointer_type] = buffer.Alias(PointerType::U32);
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address);
const auto [scope, semantics]{AtomicArgs(ctx)};
return AccessBoundsCheck<32>(ctx, index, buffer.size_dwords, [&] {
return AccessBoundsCheck<32>(ctx, address, buffer.Size(PointerSize::B32), [&] {
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics);
});
}
@ -92,14 +94,13 @@ Id BufferAtomicU32CmpSwap(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
Id cmp_value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id, Id, Id)) {
const auto& buffer = ctx.buffers[handle];
if (Sirit::ValidId(buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, offset);
}
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
const auto [id, pointer_type] = buffer.Alias(PointerType::U32);
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address);
const auto [scope, semantics]{AtomicArgs(ctx)};
return AccessBoundsCheck<32>(ctx, index, buffer.size_dwords, [&] {
return AccessBoundsCheck<32>(ctx, address, buffer.Size(PointerSize::B32), [&] {
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, semantics, value, cmp_value);
});
}
@ -107,14 +108,13 @@ Id BufferAtomicU32CmpSwap(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
Id BufferAtomicU64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
const auto& buffer = ctx.buffers[handle];
if (Sirit::ValidId(buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
if (const Id offset = buffer.Offset(PointerSize::B64); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, offset);
}
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(3u));
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U64];
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
const auto [id, pointer_type] = buffer.Alias(PointerType::U64);
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address);
const auto [scope, semantics]{AtomicArgs(ctx)};
return AccessBoundsCheck<64>(ctx, index, buffer.size_qwords, [&] {
return AccessBoundsCheck<64>(ctx, address, buffer.Size(PointerSize::B64), [&] {
return (ctx.*atomic_func)(ctx.U64, ptr, scope, semantics, value);
});
}
@ -360,7 +360,7 @@ Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id co
Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) {
const auto& buffer = ctx.buffers[binding];
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
const auto [id, pointer_type] = buffer.Alias(PointerType::U32);
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr));
const auto [scope, semantics]{AtomicArgs(ctx)};
return ctx.OpAtomicIIncrement(ctx.U32[1], ptr, scope, semantics);
@ -368,7 +368,7 @@ Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) {
Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding) {
const auto& buffer = ctx.buffers[binding];
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
const auto [id, pointer_type] = buffer.Alias(PointerType::U32);
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr));
const auto [scope, semantics]{AtomicArgs(ctx)};
return ctx.OpAtomicIDecrement(ctx.U32[1], ptr, scope, semantics);

View File

@ -3,6 +3,7 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "shader_recompiler/backend/spirv/emit_spirv_bounds.h"
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
#include "shader_recompiler/ir/attribute.h"
@ -11,8 +12,6 @@
#include <magic_enum/magic_enum.hpp>
#include "emit_spirv_bounds.h"
namespace Shader::Backend::SPIRV {
namespace {
@ -164,6 +163,7 @@ void EmitGetGotoVariable(EmitContext&) {
}
using PointerType = EmitContext::PointerType;
using PointerSize = EmitContext::PointerSize;
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) {
const u32 flatbuf_off_dw = inst->Flags<u32>();
@ -179,14 +179,15 @@ Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) {
template <PointerType type>
Id ReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
const auto& buffer = ctx.buffers[handle];
index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords);
const auto [id, pointer_type] = buffer[type];
if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
index = ctx.OpIAdd(ctx.U32[1], index, offset);
}
const auto [id, pointer_type] = buffer.Alias(type);
const auto value_type = type == PointerType::U32 ? ctx.U32[1] : ctx.F32[1];
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
const Id result{ctx.OpLoad(value_type, ptr)};
if (Sirit::ValidId(buffer.size_dwords)) {
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer.size_dwords);
if (const Id size = buffer.Size(PointerSize::B32); Sirit::ValidId(size)) {
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, size);
return ctx.OpSelect(value_type, in_bounds, result, ctx.u32_zero_value);
}
return result;
@ -419,25 +420,24 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
template <u32 N, PointerType alias>
static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
constexpr bool is_float = alias == PointerType::F32;
const auto flags = inst->Flags<IR::BufferInstInfo>();
const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
if (const Id offset = spv_buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, offset);
}
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const auto& data_types = alias == PointerType::U32 ? ctx.U32 : ctx.F32;
const auto [id, pointer_type] = spv_buffer[alias];
const auto [id, pointer_type] = spv_buffer.Alias(alias);
boost::container::static_vector<Id, N> ids;
for (u32 i = 0; i < N; i++) {
const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
const Id index_i = i == 0 ? address : ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i));
const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i);
const Id result_i = ctx.OpLoad(data_types[1], ptr_i);
if (!flags.typed) {
// Untyped loads have bounds checking per-component.
ids.push_back(LoadAccessBoundsCheck < 32, 1,
alias ==
PointerType::F32 > (ctx, index_i, spv_buffer.size_dwords, result_i));
ids.push_back(LoadAccessBoundsCheck<32, 1, is_float>(
ctx, index_i, spv_buffer.Size(PointerSize::B32), result_i));
} else {
ids.push_back(result_i);
}
@ -446,33 +446,32 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a
const Id result = N == 1 ? ids[0] : ctx.OpCompositeConstruct(data_types[N], ids);
if (flags.typed) {
// Typed loads have single bounds check for the whole load.
return LoadAccessBoundsCheck < 32, N,
alias == PointerType::F32 > (ctx, index, spv_buffer.size_dwords, result);
return LoadAccessBoundsCheck<32, N, is_float>(ctx, address,
spv_buffer.Size(PointerSize::B32), result);
}
return result;
}
Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
if (const Id offset = spv_buffer.Offset(PointerSize::B8); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, offset);
}
const auto [id, pointer_type] = spv_buffer[PointerType::U8];
const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U8);
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
const Id result{ctx.OpLoad(ctx.U8, ptr)};
return LoadAccessBoundsCheck<8>(ctx, address, spv_buffer.size, result);
return LoadAccessBoundsCheck<8>(ctx, address, spv_buffer.Size(PointerSize::B8), result);
}
Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
if (const Id offset = spv_buffer.Offset(PointerSize::B16); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, offset);
}
const auto [id, pointer_type] = spv_buffer[PointerType::U16];
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U16);
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
const Id result{ctx.OpLoad(ctx.U16, ptr)};
return LoadAccessBoundsCheck<16>(ctx, index, spv_buffer.size_shorts, result);
return LoadAccessBoundsCheck<16>(ctx, address, spv_buffer.Size(PointerSize::B16), result);
}
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
@ -493,14 +492,13 @@ Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address)
Id EmitLoadBufferU64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
if (const Id offset = spv_buffer.Offset(PointerSize::B64); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, offset);
}
const auto [id, pointer_type] = spv_buffer[PointerType::U64];
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(3u));
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u64_zero_value, index)};
const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U64);
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u64_zero_value, address)};
const Id result{ctx.OpLoad(ctx.U64, ptr)};
return LoadAccessBoundsCheck<64>(ctx, index, spv_buffer.size_qwords, result);
return LoadAccessBoundsCheck<64>(ctx, address, spv_buffer.Size(PointerSize::B64), result);
}
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
@ -526,18 +524,18 @@ Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addr
template <u32 N, PointerType alias>
static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
Id value) {
constexpr bool is_float = alias == PointerType::F32;
const auto flags = inst->Flags<IR::BufferInstInfo>();
const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
if (const Id offset = spv_buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, offset);
}
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const auto& data_types = alias == PointerType::U32 ? ctx.U32 : ctx.F32;
const auto [id, pointer_type] = spv_buffer[alias];
const auto [id, pointer_type] = spv_buffer.Alias(alias);
auto store = [&] {
for (u32 i = 0; i < N; i++) {
const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
const Id index_i = i == 0 ? address : ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i));
const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i);
const Id value_i = N == 1 ? value : ctx.OpCompositeExtract(data_types[1], value, i);
auto store_i = [&] {
@ -546,8 +544,8 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, I
};
if (!flags.typed) {
// Untyped stores have bounds checking per-component.
AccessBoundsCheck<32, 1, alias == PointerType::F32>(
ctx, index_i, spv_buffer.size_dwords, store_i);
AccessBoundsCheck<32, 1, is_float>(ctx, index_i, spv_buffer.Size(PointerSize::B32),
store_i);
} else {
store_i();
}
@ -557,8 +555,7 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, I
if (flags.typed) {
// Typed stores have single bounds check for the whole store.
AccessBoundsCheck<32, N, alias == PointerType::F32>(ctx, index, spv_buffer.size_dwords,
store);
AccessBoundsCheck<32, N, is_float>(ctx, address, spv_buffer.Size(PointerSize::B32), store);
} else {
store();
}
@ -566,12 +563,12 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, I
void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
if (const Id offset = spv_buffer.Offset(PointerSize::B8); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, offset);
}
const auto [id, pointer_type] = spv_buffer[PointerType::U8];
const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U8);
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
AccessBoundsCheck<8>(ctx, address, spv_buffer.size, [&] {
AccessBoundsCheck<8>(ctx, address, spv_buffer.Size(PointerSize::B8), [&] {
ctx.OpStore(ptr, value);
return Id{};
});
@ -579,13 +576,12 @@ void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id v
void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
if (const Id offset = spv_buffer.Offset(PointerSize::B16); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, offset);
}
const auto [id, pointer_type] = spv_buffer[PointerType::U16];
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
AccessBoundsCheck<16>(ctx, index, spv_buffer.size_shorts, [&] {
const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U16);
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
AccessBoundsCheck<16>(ctx, address, spv_buffer.Size(PointerSize::B16), [&] {
ctx.OpStore(ptr, value);
return Id{};
});
@ -609,13 +605,12 @@ void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
void EmitStoreBufferU64(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
if (const Id offset = spv_buffer.Offset(PointerSize::B64); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, offset);
}
const auto [id, pointer_type] = spv_buffer[PointerType::U64];
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(3u));
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u64_zero_value, index)};
AccessBoundsCheck<64>(ctx, index, spv_buffer.size_qwords, [&] {
const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U64);
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u64_zero_value, address)};
AccessBoundsCheck<64>(ctx, address, spv_buffer.Size(PointerSize::B64), [&] {
ctx.OpStore(ptr, value);
return Id{};
});

View File

@ -213,7 +213,7 @@ Id EmitContext::GetBufferSize(const u32 sharp_idx) {
// Can this be done with memory access? Like we do now with ReadConst
const auto& srt_flatbuf = buffers[flatbuf_index];
ASSERT(srt_flatbuf.buffer_type == BufferType::Flatbuf);
const auto [id, pointer_type] = srt_flatbuf[PointerType::U32];
const auto [id, pointer_type] = srt_flatbuf.Alias(PointerType::U32);
const auto rsrc1{
OpLoad(U32[1], OpAccessChain(pointer_type, id, u32_zero_value, ConstU32(sharp_idx + 1)))};
@ -229,38 +229,70 @@ Id EmitContext::GetBufferSize(const u32 sharp_idx) {
}
void EmitContext::DefineBufferProperties() {
if (!profile.needs_buffer_offsets && profile.supports_robust_buffer_access) {
return;
}
for (u32 i = 0; i < buffers.size(); i++) {
BufferDefinition& buffer = buffers[i];
auto& buffer = buffers[i];
const auto& desc = info.buffers[i];
const u32 binding = buffer.binding;
if (buffer.buffer_type != BufferType::Guest) {
continue;
}
const u32 binding = buffer.binding;
const u32 half = PushData::BufOffsetIndex + (binding >> 4);
const u32 comp = (binding & 0xf) >> 2;
const u32 offset = (binding & 0x3) << 3;
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
push_data_block, ConstU32(half), ConstU32(comp))};
const Id value{OpLoad(U32[1], ptr)};
buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
Name(buffer.offset, fmt::format("buf{}_off", binding));
buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U));
Name(buffer.offset_dwords, fmt::format("buf{}_dword_off", binding));
// Only load size if performing bounds checks and the buffer is both guest and not inline.
if (!profile.supports_robust_buffer_access && buffer.buffer_type == BufferType::Guest) {
const BufferResource& desc = info.buffers[i];
if (desc.sharp_idx == std::numeric_limits<u32>::max()) {
buffer.size = ConstU32(desc.inline_cbuf.GetSize());
} else {
buffer.size = GetBufferSize(desc.sharp_idx);
// Only load and apply buffer offsets if host GPU alignment is larger than guest.
if (profile.needs_buffer_offsets) {
const u32 half = PushData::BufOffsetIndex + (binding >> 4);
const u32 comp = (binding & 0xf) >> 2;
const u32 offset = (binding & 0x3) << 3;
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
push_data_block, ConstU32(half), ConstU32(comp))};
const Id value{OpLoad(U32[1], ptr)};
const Id buf_offset{OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U))};
Name(buf_offset, fmt::format("buf{}_off", binding));
buffer.Offset(PointerSize::B8) = buf_offset;
if (True(desc.used_types & IR::Type::U16)) {
const Id buf_word_offset{OpShiftRightLogical(U32[1], buf_offset, ConstU32(1U))};
Name(buf_word_offset, fmt::format("buf{}_word_off", binding));
buffer.Offset(PointerSize::B16) = buf_word_offset;
}
if (True(desc.used_types & IR::Type::U32)) {
const Id buf_dword_offset{OpShiftRightLogical(U32[1], buf_offset, ConstU32(2U))};
Name(buf_dword_offset, fmt::format("buf{}_dword_off", binding));
buffer.Offset(PointerSize::B32) = buf_dword_offset;
}
if (True(desc.used_types & IR::Type::U64)) {
const Id buf_qword_offset{OpShiftRightLogical(U32[1], buf_offset, ConstU32(3U))};
Name(buf_qword_offset, fmt::format("buf{}_qword_off", binding));
buffer.Offset(PointerSize::B64) = buf_qword_offset;
}
}
// Only load size if performing bounds checks.
if (!profile.supports_robust_buffer_access) {
const Id buf_size{desc.sharp_idx == std::numeric_limits<u32>::max()
? ConstU32(desc.inline_cbuf.GetSize())
: GetBufferSize(desc.sharp_idx)};
Name(buf_size, fmt::format("buf{}_size", binding));
buffer.Size(PointerSize::B8) = buf_size;
if (True(desc.used_types & IR::Type::U16)) {
const Id buf_word_size{OpShiftRightLogical(U32[1], buf_size, ConstU32(1U))};
Name(buf_word_size, fmt::format("buf{}_short_size", binding));
buffer.Size(PointerSize::B16) = buf_word_size;
}
if (True(desc.used_types & IR::Type::U32)) {
const Id buf_dword_size{OpShiftRightLogical(U32[1], buf_size, ConstU32(2U))};
Name(buf_dword_size, fmt::format("buf{}_dword_size", binding));
buffer.Size(PointerSize::B32) = buf_dword_size;
}
if (True(desc.used_types & IR::Type::U64)) {
const Id buf_qword_size{OpShiftRightLogical(U32[1], buf_size, ConstU32(3U))};
Name(buf_qword_size, fmt::format("buf{}_qword_size", binding));
buffer.Size(PointerSize::B64) = buf_qword_size;
}
Name(buffer.size, fmt::format("buf{}_size", binding));
buffer.size_shorts = OpShiftRightLogical(U32[1], buffer.size, ConstU32(1U));
Name(buffer.size_shorts, fmt::format("buf{}_short_size", binding));
buffer.size_dwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(2U));
Name(buffer.size_dwords, fmt::format("buf{}_dword_size", binding));
buffer.size_qwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(3U));
Name(buffer.size_qwords, fmt::format("buf{}_qword_size", binding));
}
}
}
@ -752,8 +784,7 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte
};
void EmitContext::DefineBuffers() {
if (!profile.supports_robust_buffer_access &&
info.readconst_types == Info::ReadConstType::None) {
if (!profile.supports_robust_buffer_access && !info.uses_dma) {
// In case Flatbuf has not already been bound by IR and is needed
// to query buffer sizes, bind it now.
info.buffers.push_back({
@ -782,23 +813,23 @@ void EmitContext::DefineBuffers() {
// Define aliases depending on the shader usage.
auto& spv_buffer = buffers.emplace_back(binding.buffer++, desc.buffer_type);
if (True(desc.used_types & IR::Type::U64)) {
spv_buffer[PointerType::U64] =
spv_buffer.Alias(PointerType::U64) =
DefineBuffer(is_storage, desc.is_written, 3, desc.buffer_type, U64);
}
if (True(desc.used_types & IR::Type::U32)) {
spv_buffer[PointerType::U32] =
spv_buffer.Alias(PointerType::U32) =
DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, U32[1]);
}
if (True(desc.used_types & IR::Type::F32)) {
spv_buffer[PointerType::F32] =
spv_buffer.Alias(PointerType::F32) =
DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, F32[1]);
}
if (True(desc.used_types & IR::Type::U16)) {
spv_buffer[PointerType::U16] =
spv_buffer.Alias(PointerType::U16) =
DefineBuffer(is_storage, desc.is_written, 1, desc.buffer_type, U16);
}
if (True(desc.used_types & IR::Type::U8)) {
spv_buffer[PointerType::U8] =
spv_buffer.Alias(PointerType::U8) =
DefineBuffer(is_storage, desc.is_written, 0, desc.buffer_type, U8);
}
++binding.unified;
@ -1127,7 +1158,7 @@ Id EmitContext::DefineGetBdaPointer() {
const auto page{OpShiftRightLogical(U64, address, caching_pagebits)};
const auto page32{OpUConvert(U32[1], page)};
const auto& bda_buffer{buffers[bda_pagetable_index]};
const auto [bda_buffer_id, bda_pointer_type] = bda_buffer[PointerType::U64];
const auto [bda_buffer_id, bda_pointer_type] = bda_buffer.Alias(PointerType::U64);
const auto bda_ptr{OpAccessChain(bda_pointer_type, bda_buffer_id, u32_zero_value, page32)};
const auto bda{OpLoad(U64, bda_ptr)};
@ -1139,7 +1170,7 @@ Id EmitContext::DefineGetBdaPointer() {
// First time acces, mark as fault
AddLabel(fault_label);
const auto& fault_buffer{buffers[fault_buffer_index]};
const auto [fault_buffer_id, fault_pointer_type] = fault_buffer[PointerType::U8];
const auto [fault_buffer_id, fault_pointer_type] = fault_buffer.Alias(PointerType::U8);
const auto page_div8{OpShiftRightLogical(U32[1], page32, ConstU32(3U))};
const auto page_mod8{OpBitwiseAnd(U32[1], page32, ConstU32(7U))};
const auto page_mask{OpShiftLeftLogical(U8, u8_one_value, page_mod8)};
@ -1191,7 +1222,8 @@ Id EmitContext::DefineReadConst(bool dynamic) {
const auto& flatbuf_buffer{buffers[flatbuf_index]};
ASSERT(flatbuf_buffer.binding >= 0 &&
flatbuf_buffer.buffer_type == BufferType::Flatbuf);
const auto [flatbuf_buffer_id, flatbuf_pointer_type] = flatbuf_buffer[PointerType::U32];
const auto [flatbuf_buffer_id, flatbuf_pointer_type] =
flatbuf_buffer.Alias(PointerType::U32);
const auto ptr{OpAccessChain(flatbuf_pointer_type, flatbuf_buffer_id, u32_zero_value,
flatbuf_offset)};
return OpLoad(U32[1], ptr);

View File

@ -42,17 +42,6 @@ public:
Bindings& binding);
~EmitContext();
enum class PointerType : u32 {
U8,
U16,
F16,
U32,
F32,
U64,
F64,
NumAlias,
};
Id Def(const IR::Value& value);
void DefineBufferProperties();
@ -294,6 +283,24 @@ public:
bool is_storage = false;
};
enum class PointerType : u32 {
U8,
U16,
U32,
F32,
U64,
F64,
NumAlias,
};
enum class PointerSize : u32 {
B8,
B16,
B32,
B64,
NumClass,
};
struct BufferSpv {
Id id;
Id pointer_type;
@ -302,20 +309,23 @@ public:
struct BufferDefinition {
u32 binding;
BufferType buffer_type;
Id offset;
Id offset_dwords;
Id size;
Id size_shorts;
Id size_dwords;
Id size_qwords;
std::array<Id, u32(PointerSize::NumClass)> offsets;
std::array<Id, u32(PointerSize::NumClass)> sizes;
std::array<BufferSpv, u32(PointerType::NumAlias)> aliases;
const BufferSpv& operator[](PointerType alias) const {
return aliases[u32(alias)];
template <class Self>
auto& Alias(this Self& self, PointerType alias) {
return self.aliases[u32(alias)];
}
BufferSpv& operator[](PointerType alias) {
return aliases[u32(alias)];
template <class Self>
auto& Offset(this Self& self, PointerSize size) {
return self.offsets[u32(size)];
}
template <class Self>
auto& Size(this Self& self, PointerSize size) {
return self.sizes[u32(size)];
}
};

View File

@ -1,7 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <bit>
#include "common/assert.h"
#include "shader_recompiler/frontend/translate/translate.h"

View File

@ -105,6 +105,32 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
}
}
u32 BufferAddressShift(const IR::Inst& inst, AmdGpu::DataFormat data_format) {
switch (inst.GetOpcode()) {
case IR::Opcode::LoadBufferU8:
case IR::Opcode::StoreBufferU8:
return 0;
case IR::Opcode::LoadBufferU16:
case IR::Opcode::StoreBufferU16:
return 1;
case IR::Opcode::LoadBufferU64:
case IR::Opcode::StoreBufferU64:
case IR::Opcode::BufferAtomicIAdd64:
return 3;
case IR::Opcode::LoadBufferFormatF32:
case IR::Opcode::StoreBufferFormatF32: {
const auto num_comps = AmdGpu::NumComponents(data_format);
const auto num_bytes = (AmdGpu::NumBitsPerBlock(data_format) >> 3) / num_comps;
return std::bit_width(num_bytes) - 1;
}
case IR::Opcode::ReadConstBuffer:
// Provided address is already in dwords
return 0;
default:
return 2;
}
}
bool IsImageAtomicInstruction(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::ImageAtomicIAdd32:
@ -545,6 +571,15 @@ IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const In
// buffer_offset = index * const_stride + offset
buffer_offset = ir.IAdd(ir.IMul(index, const_stride), offset);
}
const auto is_inst_typed = inst_info.inst_data_fmt != AmdGpu::DataFormat::FormatInvalid;
const auto data_format = is_inst_typed
? AmdGpu::RemapDataFormat(inst_info.inst_data_fmt.Value())
: buffer.GetDataFmt();
const u32 shift = BufferAddressShift(inst, data_format);
if (shift != 0) {
buffer_offset = ir.ShiftRightLogical(buffer_offset, ir.Imm32(shift));
}
return buffer_offset;
}

View File

@ -35,7 +35,7 @@ struct Profile {
bool lower_left_origin_mode{};
bool needs_manual_interpolation{};
bool needs_lds_barriers{};
u64 min_ssbo_alignment{};
bool needs_buffer_offsets{};
u64 max_ubo_size{};
u32 max_viewport_width{};
u32 max_viewport_height{};

View File

@ -225,6 +225,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||
instance.GetDriverID() == vk::DriverId::eMoltenvk,
.needs_buffer_offsets = instance.StorageMinAlignment() > 4,
// When binding a UBO, we calculate its size considering the offset in the larger buffer
// cache underlying resource. In some cases, it may produce sizes exceeding the system
// maximum allowed UBO range, so we need to reduce the threshold to prevent issues.