From 6fa5f51702e9ec98cfd85cc5ed4df59a09bf13ad Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Thu, 26 Jun 2025 01:18:01 +0300 Subject: [PATCH] shader_recompiler: Perform address shift on IR level Buffer instructions now expect address in the data unit they work on. Doing the shift on IR level will allow us to optimize some operations away on common case --- .../backend/spirv/emit_spirv_atomic.cpp | 52 ++++---- .../spirv/emit_spirv_context_get_set.cpp | 111 +++++++++--------- .../backend/spirv/spirv_emit_context.cpp | 106 +++++++++++------ .../backend/spirv/spirv_emit_context.h | 52 ++++---- .../frontend/translate/scalar_alu.cpp | 1 - .../ir/passes/resource_tracking_pass.cpp | 35 ++++++ src/shader_recompiler/profile.h | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 1 + 8 files changed, 216 insertions(+), 144 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 97e455ff8..3c833b87d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -7,7 +7,11 @@ #include "shader_recompiler/backend/spirv/spirv_emit_context.h" namespace Shader::Backend::SPIRV { + namespace { +using PointerType = EmitContext::PointerType; +using PointerSize = EmitContext::PointerSize; + std::pair AtomicArgs(EmitContext& ctx) { const Id scope{ctx.ConstU32(static_cast(spv::Scope::Device))}; const Id semantics{ctx.u32_zero_value}; @@ -61,14 +65,13 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id return ctx.U32[1]; } }(); - if (Sirit::ValidId(buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); + if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); - const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32]; - const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index); + const auto [id, pointer_type] = buffer.Alias(PointerType::U32); + const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address); const auto [scope, semantics]{AtomicArgs(ctx)}; - return AccessBoundsCheck<32, 1, is_float>(ctx, index, buffer.size_dwords, [&] { + return AccessBoundsCheck<32, 1, is_float>(ctx, address, buffer.Size(PointerSize::B32), [&] { return (ctx.*atomic_func)(type, ptr, scope, semantics, value); }); } @@ -76,14 +79,13 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id Id BufferAtomicU32IncDec(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) { const auto& buffer = ctx.buffers[handle]; - if (Sirit::ValidId(buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); + if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); - const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32]; - const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index); + const auto [id, pointer_type] = buffer.Alias(PointerType::U32); + const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address); const auto [scope, semantics]{AtomicArgs(ctx)}; - return AccessBoundsCheck<32>(ctx, index, buffer.size_dwords, [&] { + return AccessBoundsCheck<32>(ctx, address, buffer.Size(PointerSize::B32), [&] { return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics); }); } @@ -92,14 +94,13 @@ Id BufferAtomicU32CmpSwap(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre Id cmp_value, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id, Id, Id)) { const auto& buffer = ctx.buffers[handle]; - if (Sirit::ValidId(buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); + if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); - const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32]; - const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index); + const auto [id, pointer_type] = buffer.Alias(PointerType::U32); + const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address); const auto [scope, semantics]{AtomicArgs(ctx)}; - return AccessBoundsCheck<32>(ctx, index, buffer.size_dwords, [&] { + return AccessBoundsCheck<32>(ctx, address, buffer.Size(PointerSize::B32), [&] { return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, semantics, value, cmp_value); }); } @@ -107,14 +108,13 @@ Id BufferAtomicU32CmpSwap(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre Id BufferAtomicU64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { const auto& buffer = ctx.buffers[handle]; - if (Sirit::ValidId(buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); + if (const Id offset = buffer.Offset(PointerSize::B64); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(3u)); - const auto [id, pointer_type] = buffer[EmitContext::PointerType::U64]; - const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index); + const auto [id, pointer_type] = buffer.Alias(PointerType::U64); + const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address); const auto [scope, semantics]{AtomicArgs(ctx)}; - return AccessBoundsCheck<64>(ctx, index, buffer.size_qwords, [&] { + return AccessBoundsCheck<64>(ctx, address, buffer.Size(PointerSize::B64), [&] { return (ctx.*atomic_func)(ctx.U64, ptr, scope, semantics, value); }); } @@ -360,7 +360,7 @@ Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id co Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) { const auto& buffer = ctx.buffers[binding]; - const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32]; + const auto [id, pointer_type] = buffer.Alias(PointerType::U32); const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr)); const auto [scope, semantics]{AtomicArgs(ctx)}; return ctx.OpAtomicIIncrement(ctx.U32[1], ptr, scope, semantics); @@ -368,7 +368,7 @@ Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) { Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding) { const auto& buffer = ctx.buffers[binding]; - const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32]; + const auto [id, pointer_type] = buffer.Alias(PointerType::U32); const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr)); const auto [scope, semantics]{AtomicArgs(ctx)}; return ctx.OpAtomicIDecrement(ctx.U32[1], ptr, scope, semantics); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index ccbe54d0a..564fb3f80 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -3,6 +3,7 @@ #include "common/assert.h" #include "common/logging/log.h" +#include "shader_recompiler/backend/spirv/emit_spirv_bounds.h" #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" #include "shader_recompiler/ir/attribute.h" @@ -11,8 +12,6 @@ #include -#include "emit_spirv_bounds.h" - namespace Shader::Backend::SPIRV { namespace { @@ -164,6 +163,7 @@ void EmitGetGotoVariable(EmitContext&) { } using PointerType = EmitContext::PointerType; +using PointerSize = EmitContext::PointerSize; Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) { const u32 flatbuf_off_dw = inst->Flags(); @@ -179,14 +179,15 @@ Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) { template Id ReadConstBuffer(EmitContext& ctx, u32 handle, Id index) { const auto& buffer = ctx.buffers[handle]; - index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords); - const auto [id, pointer_type] = buffer[type]; + if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) { + index = ctx.OpIAdd(ctx.U32[1], index, offset); + } + const auto [id, pointer_type] = buffer.Alias(type); const auto value_type = type == PointerType::U32 ? ctx.U32[1] : ctx.F32[1]; const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; const Id result{ctx.OpLoad(value_type, ptr)}; - - if (Sirit::ValidId(buffer.size_dwords)) { - const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer.size_dwords); + if (const Id size = buffer.Size(PointerSize::B32); Sirit::ValidId(size)) { + const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, size); return ctx.OpSelect(value_type, in_bounds, result, ctx.u32_zero_value); } return result; @@ -419,25 +420,24 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { template static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + constexpr bool is_float = alias == PointerType::F32; const auto flags = inst->Flags(); const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); + if (const Id offset = spv_buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); const auto& data_types = alias == PointerType::U32 ? ctx.U32 : ctx.F32; - const auto [id, pointer_type] = spv_buffer[alias]; + const auto [id, pointer_type] = spv_buffer.Alias(alias); boost::container::static_vector ids; for (u32 i = 0; i < N; i++) { - const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); + const Id index_i = i == 0 ? address : ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i)); const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i); const Id result_i = ctx.OpLoad(data_types[1], ptr_i); if (!flags.typed) { // Untyped loads have bounds checking per-component. - ids.push_back(LoadAccessBoundsCheck < 32, 1, - alias == - PointerType::F32 > (ctx, index_i, spv_buffer.size_dwords, result_i)); + ids.push_back(LoadAccessBoundsCheck<32, 1, is_float>( + ctx, index_i, spv_buffer.Size(PointerSize::B32), result_i)); } else { ids.push_back(result_i); } @@ -446,33 +446,32 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a const Id result = N == 1 ? ids[0] : ctx.OpCompositeConstruct(data_types[N], ids); if (flags.typed) { // Typed loads have single bounds check for the whole load. - return LoadAccessBoundsCheck < 32, N, - alias == PointerType::F32 > (ctx, index, spv_buffer.size_dwords, result); + return LoadAccessBoundsCheck<32, N, is_float>(ctx, address, + spv_buffer.Size(PointerSize::B32), result); } return result; } Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); + if (const Id offset = spv_buffer.Offset(PointerSize::B8); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const auto [id, pointer_type] = spv_buffer[PointerType::U8]; + const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U8); const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)}; const Id result{ctx.OpLoad(ctx.U8, ptr)}; - return LoadAccessBoundsCheck<8>(ctx, address, spv_buffer.size, result); + return LoadAccessBoundsCheck<8>(ctx, address, spv_buffer.Size(PointerSize::B8), result); } Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); + if (const Id offset = spv_buffer.Offset(PointerSize::B16); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const auto [id, pointer_type] = spv_buffer[PointerType::U16]; - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u)); - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; + const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U16); + const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)}; const Id result{ctx.OpLoad(ctx.U16, ptr)}; - return LoadAccessBoundsCheck<16>(ctx, index, spv_buffer.size_shorts, result); + return LoadAccessBoundsCheck<16>(ctx, address, spv_buffer.Size(PointerSize::B16), result); } Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { @@ -493,14 +492,13 @@ Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) Id EmitLoadBufferU64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); + if (const Id offset = spv_buffer.Offset(PointerSize::B64); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const auto [id, pointer_type] = spv_buffer[PointerType::U64]; - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(3u)); - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u64_zero_value, index)}; + const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U64); + const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u64_zero_value, address)}; const Id result{ctx.OpLoad(ctx.U64, ptr)}; - return LoadAccessBoundsCheck<64>(ctx, index, spv_buffer.size_qwords, result); + return LoadAccessBoundsCheck<64>(ctx, address, spv_buffer.Size(PointerSize::B64), result); } Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { @@ -526,18 +524,18 @@ Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addr template static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + constexpr bool is_float = alias == PointerType::F32; const auto flags = inst->Flags(); const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); + if (const Id offset = spv_buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); const auto& data_types = alias == PointerType::U32 ? ctx.U32 : ctx.F32; - const auto [id, pointer_type] = spv_buffer[alias]; + const auto [id, pointer_type] = spv_buffer.Alias(alias); auto store = [&] { for (u32 i = 0; i < N; i++) { - const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); + const Id index_i = i == 0 ? address : ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i)); const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i); const Id value_i = N == 1 ? value : ctx.OpCompositeExtract(data_types[1], value, i); auto store_i = [&] { @@ -546,8 +544,8 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, I }; if (!flags.typed) { // Untyped stores have bounds checking per-component. - AccessBoundsCheck<32, 1, alias == PointerType::F32>( - ctx, index_i, spv_buffer.size_dwords, store_i); + AccessBoundsCheck<32, 1, is_float>(ctx, index_i, spv_buffer.Size(PointerSize::B32), + store_i); } else { store_i(); } @@ -557,8 +555,7 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, I if (flags.typed) { // Typed stores have single bounds check for the whole store. - AccessBoundsCheck<32, N, alias == PointerType::F32>(ctx, index, spv_buffer.size_dwords, - store); + AccessBoundsCheck<32, N, is_float>(ctx, address, spv_buffer.Size(PointerSize::B32), store); } else { store(); } @@ -566,12 +563,12 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, I void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); + if (const Id offset = spv_buffer.Offset(PointerSize::B8); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const auto [id, pointer_type] = spv_buffer[PointerType::U8]; + const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U8); const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)}; - AccessBoundsCheck<8>(ctx, address, spv_buffer.size, [&] { + AccessBoundsCheck<8>(ctx, address, spv_buffer.Size(PointerSize::B8), [&] { ctx.OpStore(ptr, value); return Id{}; }); @@ -579,13 +576,12 @@ void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id v void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); + if (const Id offset = spv_buffer.Offset(PointerSize::B16); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const auto [id, pointer_type] = spv_buffer[PointerType::U16]; - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u)); - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; - AccessBoundsCheck<16>(ctx, index, spv_buffer.size_shorts, [&] { + const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U16); + const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)}; + AccessBoundsCheck<16>(ctx, address, spv_buffer.Size(PointerSize::B16), [&] { ctx.OpStore(ptr, value); return Id{}; }); @@ -609,13 +605,12 @@ void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre void EmitStoreBufferU64(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); + if (const Id offset = spv_buffer.Offset(PointerSize::B64); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const auto [id, pointer_type] = spv_buffer[PointerType::U64]; - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(3u)); - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u64_zero_value, index)}; - AccessBoundsCheck<64>(ctx, index, spv_buffer.size_qwords, [&] { + const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U64); + const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u64_zero_value, address)}; + AccessBoundsCheck<64>(ctx, address, spv_buffer.Size(PointerSize::B64), [&] { ctx.OpStore(ptr, value); return Id{}; }); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 96b0bb4e3..b6397c6fd 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -213,7 +213,7 @@ Id EmitContext::GetBufferSize(const u32 sharp_idx) { // Can this be done with memory access? Like we do now with ReadConst const auto& srt_flatbuf = buffers[flatbuf_index]; ASSERT(srt_flatbuf.buffer_type == BufferType::Flatbuf); - const auto [id, pointer_type] = srt_flatbuf[PointerType::U32]; + const auto [id, pointer_type] = srt_flatbuf.Alias(PointerType::U32); const auto rsrc1{ OpLoad(U32[1], OpAccessChain(pointer_type, id, u32_zero_value, ConstU32(sharp_idx + 1)))}; @@ -229,38 +229,70 @@ Id EmitContext::GetBufferSize(const u32 sharp_idx) { } void EmitContext::DefineBufferProperties() { + if (!profile.needs_buffer_offsets && profile.supports_robust_buffer_access) { + return; + } for (u32 i = 0; i < buffers.size(); i++) { - BufferDefinition& buffer = buffers[i]; + auto& buffer = buffers[i]; + const auto& desc = info.buffers[i]; + const u32 binding = buffer.binding; if (buffer.buffer_type != BufferType::Guest) { continue; } - const u32 binding = buffer.binding; - const u32 half = PushData::BufOffsetIndex + (binding >> 4); - const u32 comp = (binding & 0xf) >> 2; - const u32 offset = (binding & 0x3) << 3; - const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]), - push_data_block, ConstU32(half), ConstU32(comp))}; - const Id value{OpLoad(U32[1], ptr)}; - buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U)); - Name(buffer.offset, fmt::format("buf{}_off", binding)); - buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U)); - Name(buffer.offset_dwords, fmt::format("buf{}_dword_off", binding)); - // Only load size if performing bounds checks and the buffer is both guest and not inline. - if (!profile.supports_robust_buffer_access && buffer.buffer_type == BufferType::Guest) { - const BufferResource& desc = info.buffers[i]; - if (desc.sharp_idx == std::numeric_limits::max()) { - buffer.size = ConstU32(desc.inline_cbuf.GetSize()); - } else { - buffer.size = GetBufferSize(desc.sharp_idx); + // Only load and apply buffer offsets if host GPU alignment is larger than guest. + if (profile.needs_buffer_offsets) { + const u32 half = PushData::BufOffsetIndex + (binding >> 4); + const u32 comp = (binding & 0xf) >> 2; + const u32 offset = (binding & 0x3) << 3; + const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]), + push_data_block, ConstU32(half), ConstU32(comp))}; + const Id value{OpLoad(U32[1], ptr)}; + + const Id buf_offset{OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U))}; + Name(buf_offset, fmt::format("buf{}_off", binding)); + buffer.Offset(PointerSize::B8) = buf_offset; + + if (True(desc.used_types & IR::Type::U16)) { + const Id buf_word_offset{OpShiftRightLogical(U32[1], buf_offset, ConstU32(1U))}; + Name(buf_word_offset, fmt::format("buf{}_word_off", binding)); + buffer.Offset(PointerSize::B16) = buf_word_offset; + } + if (True(desc.used_types & IR::Type::U32)) { + const Id buf_dword_offset{OpShiftRightLogical(U32[1], buf_offset, ConstU32(2U))}; + Name(buf_dword_offset, fmt::format("buf{}_dword_off", binding)); + buffer.Offset(PointerSize::B32) = buf_dword_offset; + } + if (True(desc.used_types & IR::Type::U64)) { + const Id buf_qword_offset{OpShiftRightLogical(U32[1], buf_offset, ConstU32(3U))}; + Name(buf_qword_offset, fmt::format("buf{}_qword_off", binding)); + buffer.Offset(PointerSize::B64) = buf_qword_offset; + } + } + + // Only load size if performing bounds checks. + if (!profile.supports_robust_buffer_access) { + const Id buf_size{desc.sharp_idx == std::numeric_limits::max() + ? ConstU32(desc.inline_cbuf.GetSize()) + : GetBufferSize(desc.sharp_idx)}; + Name(buf_size, fmt::format("buf{}_size", binding)); + buffer.Size(PointerSize::B8) = buf_size; + + if (True(desc.used_types & IR::Type::U16)) { + const Id buf_word_size{OpShiftRightLogical(U32[1], buf_size, ConstU32(1U))}; + Name(buf_word_size, fmt::format("buf{}_short_size", binding)); + buffer.Size(PointerSize::B16) = buf_word_size; + } + if (True(desc.used_types & IR::Type::U32)) { + const Id buf_dword_size{OpShiftRightLogical(U32[1], buf_size, ConstU32(2U))}; + Name(buf_dword_size, fmt::format("buf{}_dword_size", binding)); + buffer.Size(PointerSize::B32) = buf_dword_size; + } + if (True(desc.used_types & IR::Type::U64)) { + const Id buf_qword_size{OpShiftRightLogical(U32[1], buf_size, ConstU32(3U))}; + Name(buf_qword_size, fmt::format("buf{}_qword_size", binding)); + buffer.Size(PointerSize::B64) = buf_qword_size; } - Name(buffer.size, fmt::format("buf{}_size", binding)); - buffer.size_shorts = OpShiftRightLogical(U32[1], buffer.size, ConstU32(1U)); - Name(buffer.size_shorts, fmt::format("buf{}_short_size", binding)); - buffer.size_dwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(2U)); - Name(buffer.size_dwords, fmt::format("buf{}_dword_size", binding)); - buffer.size_qwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(3U)); - Name(buffer.size_qwords, fmt::format("buf{}_qword_size", binding)); } } } @@ -752,8 +784,7 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte }; void EmitContext::DefineBuffers() { - if (!profile.supports_robust_buffer_access && - info.readconst_types == Info::ReadConstType::None) { + if (!profile.supports_robust_buffer_access && !info.uses_dma) { // In case Flatbuf has not already been bound by IR and is needed // to query buffer sizes, bind it now. info.buffers.push_back({ @@ -782,23 +813,23 @@ void EmitContext::DefineBuffers() { // Define aliases depending on the shader usage. auto& spv_buffer = buffers.emplace_back(binding.buffer++, desc.buffer_type); if (True(desc.used_types & IR::Type::U64)) { - spv_buffer[PointerType::U64] = + spv_buffer.Alias(PointerType::U64) = DefineBuffer(is_storage, desc.is_written, 3, desc.buffer_type, U64); } if (True(desc.used_types & IR::Type::U32)) { - spv_buffer[PointerType::U32] = + spv_buffer.Alias(PointerType::U32) = DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, U32[1]); } if (True(desc.used_types & IR::Type::F32)) { - spv_buffer[PointerType::F32] = + spv_buffer.Alias(PointerType::F32) = DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, F32[1]); } if (True(desc.used_types & IR::Type::U16)) { - spv_buffer[PointerType::U16] = + spv_buffer.Alias(PointerType::U16) = DefineBuffer(is_storage, desc.is_written, 1, desc.buffer_type, U16); } if (True(desc.used_types & IR::Type::U8)) { - spv_buffer[PointerType::U8] = + spv_buffer.Alias(PointerType::U8) = DefineBuffer(is_storage, desc.is_written, 0, desc.buffer_type, U8); } ++binding.unified; @@ -1127,7 +1158,7 @@ Id EmitContext::DefineGetBdaPointer() { const auto page{OpShiftRightLogical(U64, address, caching_pagebits)}; const auto page32{OpUConvert(U32[1], page)}; const auto& bda_buffer{buffers[bda_pagetable_index]}; - const auto [bda_buffer_id, bda_pointer_type] = bda_buffer[PointerType::U64]; + const auto [bda_buffer_id, bda_pointer_type] = bda_buffer.Alias(PointerType::U64); const auto bda_ptr{OpAccessChain(bda_pointer_type, bda_buffer_id, u32_zero_value, page32)}; const auto bda{OpLoad(U64, bda_ptr)}; @@ -1139,7 +1170,7 @@ Id EmitContext::DefineGetBdaPointer() { // First time acces, mark as fault AddLabel(fault_label); const auto& fault_buffer{buffers[fault_buffer_index]}; - const auto [fault_buffer_id, fault_pointer_type] = fault_buffer[PointerType::U8]; + const auto [fault_buffer_id, fault_pointer_type] = fault_buffer.Alias(PointerType::U8); const auto page_div8{OpShiftRightLogical(U32[1], page32, ConstU32(3U))}; const auto page_mod8{OpBitwiseAnd(U32[1], page32, ConstU32(7U))}; const auto page_mask{OpShiftLeftLogical(U8, u8_one_value, page_mod8)}; @@ -1191,7 +1222,8 @@ Id EmitContext::DefineReadConst(bool dynamic) { const auto& flatbuf_buffer{buffers[flatbuf_index]}; ASSERT(flatbuf_buffer.binding >= 0 && flatbuf_buffer.buffer_type == BufferType::Flatbuf); - const auto [flatbuf_buffer_id, flatbuf_pointer_type] = flatbuf_buffer[PointerType::U32]; + const auto [flatbuf_buffer_id, flatbuf_pointer_type] = + flatbuf_buffer.Alias(PointerType::U32); const auto ptr{OpAccessChain(flatbuf_pointer_type, flatbuf_buffer_id, u32_zero_value, flatbuf_offset)}; return OpLoad(U32[1], ptr); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 03fefa513..f8c6416e8 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -42,17 +42,6 @@ public: Bindings& binding); ~EmitContext(); - enum class PointerType : u32 { - U8, - U16, - F16, - U32, - F32, - U64, - F64, - NumAlias, - }; - Id Def(const IR::Value& value); void DefineBufferProperties(); @@ -294,6 +283,24 @@ public: bool is_storage = false; }; + enum class PointerType : u32 { + U8, + U16, + U32, + F32, + U64, + F64, + NumAlias, + }; + + enum class PointerSize : u32 { + B8, + B16, + B32, + B64, + NumClass, + }; + struct BufferSpv { Id id; Id pointer_type; @@ -302,20 +309,23 @@ public: struct BufferDefinition { u32 binding; BufferType buffer_type; - Id offset; - Id offset_dwords; - Id size; - Id size_shorts; - Id size_dwords; - Id size_qwords; + std::array offsets; + std::array sizes; std::array aliases; - const BufferSpv& operator[](PointerType alias) const { - return aliases[u32(alias)]; + template + auto& Alias(this Self& self, PointerType alias) { + return self.aliases[u32(alias)]; } - BufferSpv& operator[](PointerType alias) { - return aliases[u32(alias)]; + template + auto& Offset(this Self& self, PointerSize size) { + return self.offsets[u32(size)]; + } + + template + auto& Size(this Self& self, PointerSize size) { + return self.sizes[u32(size)]; } }; diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 7beb594c3..48f977f49 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -1,7 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include #include "common/assert.h" #include "shader_recompiler/frontend/translate/translate.h" diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 2e9b78f0e..0168bbf19 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -105,6 +105,32 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) { } } +u32 BufferAddressShift(const IR::Inst& inst, AmdGpu::DataFormat data_format) { + switch (inst.GetOpcode()) { + case IR::Opcode::LoadBufferU8: + case IR::Opcode::StoreBufferU8: + return 0; + case IR::Opcode::LoadBufferU16: + case IR::Opcode::StoreBufferU16: + return 1; + case IR::Opcode::LoadBufferU64: + case IR::Opcode::StoreBufferU64: + case IR::Opcode::BufferAtomicIAdd64: + return 3; + case IR::Opcode::LoadBufferFormatF32: + case IR::Opcode::StoreBufferFormatF32: { + const auto num_comps = AmdGpu::NumComponents(data_format); + const auto num_bytes = (AmdGpu::NumBitsPerBlock(data_format) >> 3) / num_comps; + return std::bit_width(num_bytes) - 1; + } + case IR::Opcode::ReadConstBuffer: + // Provided address is already in dwords + return 0; + default: + return 2; + } +} + bool IsImageAtomicInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::ImageAtomicIAdd32: @@ -545,6 +571,15 @@ IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const In // buffer_offset = index * const_stride + offset buffer_offset = ir.IAdd(ir.IMul(index, const_stride), offset); } + + const auto is_inst_typed = inst_info.inst_data_fmt != AmdGpu::DataFormat::FormatInvalid; + const auto data_format = is_inst_typed + ? AmdGpu::RemapDataFormat(inst_info.inst_data_fmt.Value()) + : buffer.GetDataFmt(); + const u32 shift = BufferAddressShift(inst, data_format); + if (shift != 0) { + buffer_offset = ir.ShiftRightLogical(buffer_offset, ir.Imm32(shift)); + } return buffer_offset; } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index bcdf86962..d7eb307b6 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -35,7 +35,7 @@ struct Profile { bool lower_left_origin_mode{}; bool needs_manual_interpolation{}; bool needs_lds_barriers{}; - u64 min_ssbo_alignment{}; + bool needs_buffer_offsets{}; u64 max_ubo_size{}; u32 max_viewport_width{}; u32 max_viewport_height{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 1d8ac4823..831995339 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -225,6 +225,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, .needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary || instance.GetDriverID() == vk::DriverId::eMoltenvk, + .needs_buffer_offsets = instance.StorageMinAlignment() > 4, // When binding a UBO, we calculate its size considering the offset in the larger buffer // cache underlying resource. In some cases, it may produce sizes exceeding the system // maximum allowed UBO range, so we need to reduce the threshold to prevent issues.