mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-23 02:24:38 +00:00
shader_recompiler: Optimize common buffer access pattern
This commit is contained in:
parent
6fa5f51702
commit
e741c3cc10
@ -522,6 +522,22 @@ void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descripto
|
||||
IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const Info& info,
|
||||
const AmdGpu::Buffer& buffer, u32 stride) {
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
const u32 inst_offset = inst_info.inst_offset.Value();
|
||||
const auto is_inst_typed = inst_info.inst_data_fmt != AmdGpu::DataFormat::FormatInvalid;
|
||||
const auto data_format = is_inst_typed
|
||||
? AmdGpu::RemapDataFormat(inst_info.inst_data_fmt.Value())
|
||||
: buffer.GetDataFmt();
|
||||
const u32 shift = BufferAddressShift(inst, data_format);
|
||||
const u32 mask = (1 << shift) - 1;
|
||||
|
||||
// If address calculation is of the form "index * const_stride + offset" with offset constant
|
||||
// and both const_stride and offset are divisible with the element size, apply shift directly.
|
||||
if (inst_info.index_enable && !inst_info.offset_enable && !buffer.swizzle_enable &&
|
||||
!buffer.add_tid_enable && (stride & mask) == 0 && (inst_offset & mask) == 0) {
|
||||
// buffer_offset = index * (const_stride >> shift) + (inst_offset >> shift)
|
||||
const IR::U32 index = IR::U32{inst.Arg(1)};
|
||||
return ir.IAdd(ir.IMul(index, ir.Imm32(stride >> shift)), ir.Imm32(inst_offset >> shift));
|
||||
}
|
||||
|
||||
// index = (inst_idxen ? vgpr_index : 0) + (const_add_tid_enable ? thread_id[5:0] : 0)
|
||||
IR::U32 index = ir.Imm32(0U);
|
||||
@ -538,7 +554,7 @@ IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const In
|
||||
index = ir.IAdd(index, thread_id);
|
||||
}
|
||||
// offset = (inst_offen ? vgpr_offset : 0) + inst_offset
|
||||
IR::U32 offset = ir.Imm32(inst_info.inst_offset.Value());
|
||||
IR::U32 offset = ir.Imm32(inst_offset);
|
||||
if (inst_info.offset_enable) {
|
||||
const IR::U32 vgpr_offset = inst_info.index_enable
|
||||
? IR::U32{ir.CompositeExtract(inst.Arg(1), 1)}
|
||||
@ -571,12 +587,6 @@ IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const In
|
||||
// buffer_offset = index * const_stride + offset
|
||||
buffer_offset = ir.IAdd(ir.IMul(index, const_stride), offset);
|
||||
}
|
||||
|
||||
const auto is_inst_typed = inst_info.inst_data_fmt != AmdGpu::DataFormat::FormatInvalid;
|
||||
const auto data_format = is_inst_typed
|
||||
? AmdGpu::RemapDataFormat(inst_info.inst_data_fmt.Value())
|
||||
: buffer.GetDataFmt();
|
||||
const u32 shift = BufferAddressShift(inst, data_format);
|
||||
if (shift != 0) {
|
||||
buffer_offset = ir.ShiftRightLogical(buffer_offset, ir.Imm32(shift));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user