From e741c3cc1067073dbaa743e2e1cf64549c083b66 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Thu, 26 Jun 2025 01:43:50 +0300 Subject: [PATCH] shader_recompiler: Optimize common buffer access pattern --- .../ir/passes/resource_tracking_pass.cpp | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 0168bbf19..06ebad3e6 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -522,6 +522,22 @@ void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descripto IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const Info& info, const AmdGpu::Buffer& buffer, u32 stride) { const auto inst_info = inst.Flags(); + const u32 inst_offset = inst_info.inst_offset.Value(); + const auto is_inst_typed = inst_info.inst_data_fmt != AmdGpu::DataFormat::FormatInvalid; + const auto data_format = is_inst_typed + ? AmdGpu::RemapDataFormat(inst_info.inst_data_fmt.Value()) + : buffer.GetDataFmt(); + const u32 shift = BufferAddressShift(inst, data_format); + const u32 mask = (1 << shift) - 1; + + // If address calculation is of the form "index * const_stride + offset" with offset constant + // and both const_stride and offset are divisible with the element size, apply shift directly. + if (inst_info.index_enable && !inst_info.offset_enable && !buffer.swizzle_enable && + !buffer.add_tid_enable && (stride & mask) == 0 && (inst_offset & mask) == 0) { + // buffer_offset = index * (const_stride >> shift) + (inst_offset >> shift) + const IR::U32 index = IR::U32{inst.Arg(1)}; + return ir.IAdd(ir.IMul(index, ir.Imm32(stride >> shift)), ir.Imm32(inst_offset >> shift)); + } // index = (inst_idxen ? vgpr_index : 0) + (const_add_tid_enable ? thread_id[5:0] : 0) IR::U32 index = ir.Imm32(0U); @@ -538,7 +554,7 @@ IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const In index = ir.IAdd(index, thread_id); } // offset = (inst_offen ? vgpr_offset : 0) + inst_offset - IR::U32 offset = ir.Imm32(inst_info.inst_offset.Value()); + IR::U32 offset = ir.Imm32(inst_offset); if (inst_info.offset_enable) { const IR::U32 vgpr_offset = inst_info.index_enable ? IR::U32{ir.CompositeExtract(inst.Arg(1), 1)} @@ -571,12 +587,6 @@ IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const In // buffer_offset = index * const_stride + offset buffer_offset = ir.IAdd(ir.IMul(index, const_stride), offset); } - - const auto is_inst_typed = inst_info.inst_data_fmt != AmdGpu::DataFormat::FormatInvalid; - const auto data_format = is_inst_typed - ? AmdGpu::RemapDataFormat(inst_info.inst_data_fmt.Value()) - : buffer.GetDataFmt(); - const u32 shift = BufferAddressShift(inst, data_format); if (shift != 0) { buffer_offset = ir.ShiftRightLogical(buffer_offset, ir.Imm32(shift)); }