mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-09 13:19:00 +00:00
Allow vector and scalar offset in buffer address arg to LoadBuffer/StoreBuffer (#3439)
* Allow vector and scalar offset in buffer address arg to LoadBuffer/StoreBuffer * remove is_ring check * fix atomics and update pattern matching for tess factor stores * remove old asserts about soffset * small fixes * copyright * Handle sgpr initialization for 2 special hull shader values, including tess factor buffer offset
This commit is contained in:
@@ -926,6 +926,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
|||||||
src/shader_recompiler/ir/opcodes.cpp
|
src/shader_recompiler/ir/opcodes.cpp
|
||||||
src/shader_recompiler/ir/opcodes.h
|
src/shader_recompiler/ir/opcodes.h
|
||||||
src/shader_recompiler/ir/opcodes.inc
|
src/shader_recompiler/ir/opcodes.inc
|
||||||
|
src/shader_recompiler/ir/operand_helper.h
|
||||||
src/shader_recompiler/ir/patch.cpp
|
src/shader_recompiler/ir/patch.cpp
|
||||||
src/shader_recompiler/ir/patch.h
|
src/shader_recompiler/ir/patch.h
|
||||||
src/shader_recompiler/ir/position.h
|
src/shader_recompiler/ir/position.h
|
||||||
|
|||||||
@@ -179,6 +179,14 @@ void Translator::EmitPrologue(IR::Block* first_block) {
|
|||||||
// [8:12]: output control point id
|
// [8:12]: output control point id
|
||||||
ir.SetVectorReg(IR::VectorReg::V1,
|
ir.SetVectorReg(IR::VectorReg::V1,
|
||||||
ir.GetAttributeU32(IR::Attribute::PackedHullInvocationInfo));
|
ir.GetAttributeU32(IR::Attribute::PackedHullInvocationInfo));
|
||||||
|
|
||||||
|
if (runtime_info.hs_info.offchip_lds_enable) {
|
||||||
|
// No off-chip tessellation has been observed yet. If this survives dead code elim,
|
||||||
|
// revisit
|
||||||
|
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::OffChipLdsBase));
|
||||||
|
}
|
||||||
|
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::TessFactorsBufferBase));
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case LogicalStage::TessellationEval:
|
case LogicalStage::TessellationEval:
|
||||||
|
|||||||
@@ -202,39 +202,18 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
|||||||
void Translator::BUFFER_LOAD(u32 num_dwords, bool is_inst_typed, bool is_buffer_typed,
|
void Translator::BUFFER_LOAD(u32 num_dwords, bool is_inst_typed, bool is_buffer_typed,
|
||||||
const GcnInst& inst, u32 scalar_width, bool is_signed) {
|
const GcnInst& inst, u32 scalar_width, bool is_signed) {
|
||||||
const auto& mubuf = inst.control.mubuf;
|
const auto& mubuf = inst.control.mubuf;
|
||||||
const bool is_ring = mubuf.glc && mubuf.slc && info.l_stage != LogicalStage::Vertex &&
|
|
||||||
info.l_stage != LogicalStage::Fragment;
|
|
||||||
const IR::VectorReg vaddr{inst.src[0].code};
|
const IR::VectorReg vaddr{inst.src[0].code};
|
||||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
|
||||||
const bool has_soffset = !soffset.IsImmediate() || soffset.U32() != 0;
|
|
||||||
if (info.stage != Stage::Geometry) {
|
|
||||||
ASSERT_MSG(!has_soffset || !mubuf.offen,
|
|
||||||
"Having both scalar and vector offsets is not supported");
|
|
||||||
}
|
|
||||||
|
|
||||||
const IR::Value address = [&] -> IR::Value {
|
const IR::U32 index = mubuf.idxen ? ir.GetVectorReg(vaddr) : ir.Imm32(0);
|
||||||
if (is_ring) {
|
const IR::VectorReg voffset_vgpr = mubuf.idxen ? vaddr + 1 : vaddr;
|
||||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset);
|
const IR::U32 voffset = mubuf.offen ? ir.GetVectorReg(voffset_vgpr) : ir.Imm32(0);
|
||||||
}
|
const IR::U32 soffset{GetSrc(inst.src[3])};
|
||||||
if (mubuf.idxen && mubuf.offen) {
|
const IR::Value address = ir.CompositeConstruct(index, voffset, soffset);
|
||||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
|
|
||||||
}
|
|
||||||
if (mubuf.idxen && has_soffset) {
|
|
||||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset);
|
|
||||||
}
|
|
||||||
if (mubuf.idxen || mubuf.offen) {
|
|
||||||
return ir.GetVectorReg(vaddr);
|
|
||||||
}
|
|
||||||
if (has_soffset) {
|
|
||||||
return soffset;
|
|
||||||
}
|
|
||||||
return {};
|
|
||||||
}();
|
|
||||||
|
|
||||||
IR::BufferInstInfo buffer_info{};
|
IR::BufferInstInfo buffer_info{};
|
||||||
buffer_info.index_enable.Assign(mubuf.idxen);
|
buffer_info.index_enable.Assign(mubuf.idxen);
|
||||||
buffer_info.offset_enable.Assign(mubuf.offen || has_soffset);
|
buffer_info.voffset_enable.Assign(mubuf.offen);
|
||||||
buffer_info.inst_offset.Assign(mubuf.offset);
|
buffer_info.inst_offset.Assign(mubuf.offset);
|
||||||
buffer_info.globally_coherent.Assign(mubuf.glc);
|
buffer_info.globally_coherent.Assign(mubuf.glc);
|
||||||
buffer_info.system_coherent.Assign(mubuf.slc);
|
buffer_info.system_coherent.Assign(mubuf.slc);
|
||||||
@@ -290,35 +269,18 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_inst_typed, bool is_buffer_
|
|||||||
void Translator::BUFFER_STORE(u32 num_dwords, bool is_inst_typed, bool is_buffer_typed,
|
void Translator::BUFFER_STORE(u32 num_dwords, bool is_inst_typed, bool is_buffer_typed,
|
||||||
const GcnInst& inst, u32 scalar_width) {
|
const GcnInst& inst, u32 scalar_width) {
|
||||||
const auto& mubuf = inst.control.mubuf;
|
const auto& mubuf = inst.control.mubuf;
|
||||||
const bool is_ring =
|
|
||||||
mubuf.glc && mubuf.slc && info.l_stage != LogicalStage::Fragment &&
|
|
||||||
info.stage !=
|
|
||||||
Stage::Vertex; // VS passes attributes down with EXPORT, VS HW stage is always present
|
|
||||||
const IR::VectorReg vaddr{inst.src[0].code};
|
const IR::VectorReg vaddr{inst.src[0].code};
|
||||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
|
||||||
|
|
||||||
if (info.stage != Stage::Export && info.stage != Stage::Hull && info.stage != Stage::Geometry) {
|
const IR::U32 index = mubuf.idxen ? ir.GetVectorReg(vaddr) : ir.Imm32(0);
|
||||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0,
|
const IR::VectorReg voffset_vgpr = mubuf.idxen ? vaddr + 1 : vaddr;
|
||||||
"Non immediate offset not supported");
|
const IR::U32 voffset = mubuf.offen ? ir.GetVectorReg(voffset_vgpr) : ir.Imm32(0);
|
||||||
}
|
const IR::U32 soffset{GetSrc(inst.src[3])};
|
||||||
|
const IR::Value address = ir.CompositeConstruct(index, voffset, soffset);
|
||||||
IR::Value address = [&] -> IR::Value {
|
|
||||||
if (is_ring) {
|
|
||||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset);
|
|
||||||
}
|
|
||||||
if (mubuf.idxen && mubuf.offen) {
|
|
||||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
|
|
||||||
}
|
|
||||||
if (mubuf.idxen || mubuf.offen) {
|
|
||||||
return ir.GetVectorReg(vaddr);
|
|
||||||
}
|
|
||||||
return {};
|
|
||||||
}();
|
|
||||||
|
|
||||||
IR::BufferInstInfo buffer_info{};
|
IR::BufferInstInfo buffer_info{};
|
||||||
buffer_info.index_enable.Assign(mubuf.idxen);
|
buffer_info.index_enable.Assign(mubuf.idxen);
|
||||||
buffer_info.offset_enable.Assign(mubuf.offen);
|
buffer_info.voffset_enable.Assign(mubuf.offen);
|
||||||
buffer_info.inst_offset.Assign(mubuf.offset);
|
buffer_info.inst_offset.Assign(mubuf.offset);
|
||||||
buffer_info.globally_coherent.Assign(mubuf.glc);
|
buffer_info.globally_coherent.Assign(mubuf.glc);
|
||||||
buffer_info.system_coherent.Assign(mubuf.slc);
|
buffer_info.system_coherent.Assign(mubuf.slc);
|
||||||
@@ -377,21 +339,15 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
|
|||||||
const IR::VectorReg vaddr{inst.src[0].code};
|
const IR::VectorReg vaddr{inst.src[0].code};
|
||||||
const IR::VectorReg vdata{inst.src[1].code};
|
const IR::VectorReg vdata{inst.src[1].code};
|
||||||
const IR::ScalarReg srsrc{inst.src[2].code * 4};
|
const IR::ScalarReg srsrc{inst.src[2].code * 4};
|
||||||
const IR::Value address = [&] -> IR::Value {
|
const IR::U32 index = mubuf.idxen ? ir.GetVectorReg(vaddr) : ir.Imm32(0);
|
||||||
if (mubuf.idxen && mubuf.offen) {
|
const IR::VectorReg voffset_vgpr = mubuf.idxen ? vaddr + 1 : vaddr;
|
||||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
|
const IR::U32 voffset = mubuf.offen ? ir.GetVectorReg(voffset_vgpr) : ir.Imm32(0);
|
||||||
}
|
|
||||||
if (mubuf.idxen || mubuf.offen) {
|
|
||||||
return ir.GetVectorReg(vaddr);
|
|
||||||
}
|
|
||||||
return {};
|
|
||||||
}();
|
|
||||||
const IR::U32 soffset{GetSrc(inst.src[3])};
|
const IR::U32 soffset{GetSrc(inst.src[3])};
|
||||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
|
const IR::Value address = ir.CompositeConstruct(index, voffset, soffset);
|
||||||
|
|
||||||
IR::BufferInstInfo buffer_info{};
|
IR::BufferInstInfo buffer_info{};
|
||||||
buffer_info.index_enable.Assign(mubuf.idxen);
|
buffer_info.index_enable.Assign(mubuf.idxen);
|
||||||
buffer_info.offset_enable.Assign(mubuf.offen);
|
buffer_info.voffset_enable.Assign(mubuf.offen);
|
||||||
buffer_info.inst_offset.Assign(mubuf.offset);
|
buffer_info.inst_offset.Assign(mubuf.offset);
|
||||||
buffer_info.globally_coherent.Assign(mubuf.glc);
|
buffer_info.globally_coherent.Assign(mubuf.glc);
|
||||||
buffer_info.system_coherent.Assign(mubuf.slc);
|
buffer_info.system_coherent.Assign(mubuf.slc);
|
||||||
|
|||||||
@@ -153,7 +153,11 @@ std::string NameOf(Attribute attribute) {
|
|||||||
case Attribute::TessellationEvaluationPointV:
|
case Attribute::TessellationEvaluationPointV:
|
||||||
return "TessellationEvaluationPointV";
|
return "TessellationEvaluationPointV";
|
||||||
case Attribute::PackedHullInvocationInfo:
|
case Attribute::PackedHullInvocationInfo:
|
||||||
|
return "OffChipLdsBase";
|
||||||
|
case Attribute::OffChipLdsBase:
|
||||||
return "PackedHullInvocationInfo";
|
return "PackedHullInvocationInfo";
|
||||||
|
case Attribute::TessFactorsBufferBase:
|
||||||
|
return "TessFactorsBufferBase";
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -85,6 +85,8 @@ enum class Attribute : u64 {
|
|||||||
TessellationEvaluationPointU = 88,
|
TessellationEvaluationPointU = 88,
|
||||||
TessellationEvaluationPointV = 89,
|
TessellationEvaluationPointV = 89,
|
||||||
PackedHullInvocationInfo = 90, // contains patch id within the VGT and invocation ID
|
PackedHullInvocationInfo = 90, // contains patch id within the VGT and invocation ID
|
||||||
|
OffChipLdsBase = 91,
|
||||||
|
TessFactorsBufferBase = 92,
|
||||||
Max,
|
Max,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
48
src/shader_recompiler/ir/operand_helper.h
Normal file
48
src/shader_recompiler/ir/operand_helper.h
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
// Some helpers to get operand indices of instructions by name to make it a bit safer.
|
||||||
|
// Just a start, not widely used
|
||||||
|
|
||||||
|
#include "shader_recompiler/ir/value.h"
|
||||||
|
|
||||||
|
namespace Shader::IR {
|
||||||
|
|
||||||
|
// use namespaces. Enums would be better choice, but annoyingly need casting to size_t to use
|
||||||
|
// as indices
|
||||||
|
|
||||||
|
namespace LoadBufferArgs {
|
||||||
|
static const size_t Handle = 0;
|
||||||
|
static const size_t Address = 1;
|
||||||
|
}; // namespace LoadBufferArgs
|
||||||
|
|
||||||
|
namespace StoreBufferArgs {
|
||||||
|
static const size_t Handle = 0;
|
||||||
|
static const size_t Address = 1;
|
||||||
|
static const size_t Data = 2;
|
||||||
|
}; // namespace StoreBufferArgs
|
||||||
|
|
||||||
|
static_assert(LoadBufferArgs::Handle == StoreBufferArgs::Handle);
|
||||||
|
static_assert(LoadBufferArgs::Address == StoreBufferArgs::Address);
|
||||||
|
|
||||||
|
// Get certain components of buffer address argument, used in Load/StoreBuffer variants.
|
||||||
|
// We keep components separate as u32x3, before combining after sharp tracking
|
||||||
|
static inline IR::U32 GetBufferAddressComponent(const Inst* buffer_inst, u32 comp) {
|
||||||
|
Inst* address = buffer_inst->Arg(1).InstRecursive();
|
||||||
|
ASSERT(address->GetOpcode() == IR::Opcode::CompositeConstructU32x3);
|
||||||
|
return IR::U32{address->Arg(comp).Resolve()};
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline U32 GetBufferIndexArg(const Inst* buffer_inst) {
|
||||||
|
return GetBufferAddressComponent(buffer_inst, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline U32 GetBufferVOffsetArg(const Inst* buffer_inst) {
|
||||||
|
return GetBufferAddressComponent(buffer_inst, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline U32 GetBufferSOffsetArg(const Inst* buffer_inst) {
|
||||||
|
return GetBufferAddressComponent(buffer_inst, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Shader::IR
|
||||||
@@ -7,6 +7,7 @@
|
|||||||
#include "shader_recompiler/ir/breadth_first_search.h"
|
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||||
#include "shader_recompiler/ir/ir_emitter.h"
|
#include "shader_recompiler/ir/ir_emitter.h"
|
||||||
#include "shader_recompiler/ir/opcodes.h"
|
#include "shader_recompiler/ir/opcodes.h"
|
||||||
|
#include "shader_recompiler/ir/operand_helper.h"
|
||||||
#include "shader_recompiler/ir/passes/ir_passes.h"
|
#include "shader_recompiler/ir/passes/ir_passes.h"
|
||||||
#include "shader_recompiler/ir/pattern_matching.h"
|
#include "shader_recompiler/ir/pattern_matching.h"
|
||||||
#include "shader_recompiler/ir/program.h"
|
#include "shader_recompiler/ir/program.h"
|
||||||
@@ -373,11 +374,27 @@ void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
|
|||||||
case IR::Opcode::StoreBufferU32x2:
|
case IR::Opcode::StoreBufferU32x2:
|
||||||
case IR::Opcode::StoreBufferU32x3:
|
case IR::Opcode::StoreBufferU32x3:
|
||||||
case IR::Opcode::StoreBufferU32x4: {
|
case IR::Opcode::StoreBufferU32x4: {
|
||||||
const auto info = inst.Flags<IR::BufferInstInfo>();
|
IR::Value soffset = IR::GetBufferSOffsetArg(&inst);
|
||||||
if (!info.globally_coherent) {
|
if (!M_GETATTRIBUTEU32(MatchAttribute(IR::Attribute::TessFactorsBufferBase),
|
||||||
|
MatchIgnore())
|
||||||
|
.Match(soffset)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const auto info = inst.Flags<IR::BufferInstInfo>();
|
||||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
|
|
||||||
|
IR::Value voffset;
|
||||||
|
bool success =
|
||||||
|
M_COMPOSITECONSTRUCTU32X3(MatchU32(0), MatchImm(voffset), MatchIgnore())
|
||||||
|
.Match(inst.Arg(IR::StoreBufferArgs::Address));
|
||||||
|
ASSERT_MSG(success, "unhandled pattern in tess factor store");
|
||||||
|
|
||||||
|
const u32 gcn_factor_idx = (info.inst_offset.Value() + voffset.U32()) >> 2;
|
||||||
|
const IR::Value data = inst.Arg(IR::StoreBufferArgs::Data);
|
||||||
|
|
||||||
|
const u32 num_dwords = u32(opcode) - u32(IR::Opcode::StoreBufferU32) + 1;
|
||||||
|
|
||||||
const auto GetValue = [&](IR::Value data) -> IR::F32 {
|
const auto GetValue = [&](IR::Value data) -> IR::F32 {
|
||||||
if (auto* inst = data.TryInstRecursive();
|
if (auto* inst = data.TryInstRecursive();
|
||||||
inst && inst->GetOpcode() == IR::Opcode::BitCastU32F32) {
|
inst && inst->GetOpcode() == IR::Opcode::BitCastU32F32) {
|
||||||
@@ -385,12 +402,7 @@ void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
|
|||||||
}
|
}
|
||||||
return ir.BitCast<IR::F32, IR::U32>(IR::U32{data});
|
return ir.BitCast<IR::F32, IR::U32>(IR::U32{data});
|
||||||
};
|
};
|
||||||
const u32 num_dwords = u32(opcode) - u32(IR::Opcode::StoreBufferU32) + 1;
|
|
||||||
IR::U32 index = IR::U32{inst.Arg(1)};
|
|
||||||
ASSERT(index.IsImmediate());
|
|
||||||
const u32 gcn_factor_idx = (info.inst_offset.Value() + index.U32()) >> 2;
|
|
||||||
|
|
||||||
const IR::Value data = inst.Arg(2);
|
|
||||||
auto get_factor_attr = [&](u32 gcn_factor_idx) -> IR::Patch {
|
auto get_factor_attr = [&](u32 gcn_factor_idx) -> IR::Patch {
|
||||||
// The hull outputs tess factors in different formats depending on the shader.
|
// The hull outputs tess factors in different formats depending on the shader.
|
||||||
// For triangle domains, it seems to pack the entries into 4 consecutive floats,
|
// For triangle domains, it seems to pack the entries into 4 consecutive floats,
|
||||||
|
|||||||
@@ -6,6 +6,7 @@
|
|||||||
#include "shader_recompiler/ir/basic_block.h"
|
#include "shader_recompiler/ir/basic_block.h"
|
||||||
#include "shader_recompiler/ir/breadth_first_search.h"
|
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||||
#include "shader_recompiler/ir/ir_emitter.h"
|
#include "shader_recompiler/ir/ir_emitter.h"
|
||||||
|
#include "shader_recompiler/ir/operand_helper.h"
|
||||||
#include "shader_recompiler/ir/program.h"
|
#include "shader_recompiler/ir/program.h"
|
||||||
#include "shader_recompiler/ir/reinterpret.h"
|
#include "shader_recompiler/ir/reinterpret.h"
|
||||||
#include "video_core/amdgpu/resource.h"
|
#include "video_core/amdgpu/resource.h"
|
||||||
@@ -740,22 +741,25 @@ IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const In
|
|||||||
: buffer.GetDataFmt();
|
: buffer.GetDataFmt();
|
||||||
const u32 shift = BufferAddressShift(inst, data_format);
|
const u32 shift = BufferAddressShift(inst, data_format);
|
||||||
const u32 mask = (1 << shift) - 1;
|
const u32 mask = (1 << shift) - 1;
|
||||||
|
const IR::U32 soffset = IR::GetBufferSOffsetArg(&inst);
|
||||||
|
|
||||||
// If address calculation is of the form "index * const_stride + offset" with offset constant
|
// If address calculation is of the form "index * const_stride + offset" with offset constant
|
||||||
// and both const_stride and offset are divisible with the element size, apply shift directly.
|
// and both const_stride and offset are divisible with the element size, apply shift directly.
|
||||||
if (inst_info.index_enable && !inst_info.offset_enable && !buffer.swizzle_enable &&
|
if (inst_info.index_enable && !inst_info.voffset_enable && soffset.IsImmediate() &&
|
||||||
!buffer.add_tid_enable && (stride & mask) == 0 && (inst_offset & mask) == 0) {
|
!buffer.swizzle_enable && !buffer.add_tid_enable && (stride & mask) == 0) {
|
||||||
// buffer_offset = index * (const_stride >> shift) + (inst_offset >> shift)
|
const u32 total_offset = soffset.U32() + inst_offset;
|
||||||
const IR::U32 index = IR::U32{inst.Arg(1)};
|
if ((total_offset & mask) == 0) {
|
||||||
return ir.IAdd(ir.IMul(index, ir.Imm32(stride >> shift)), ir.Imm32(inst_offset >> shift));
|
// buffer_offset = index * (const_stride >> shift) + (offset >> shift)
|
||||||
|
const IR::U32 index = IR::GetBufferIndexArg(&inst);
|
||||||
|
return ir.IAdd(ir.IMul(index, ir.Imm32(stride >> shift)),
|
||||||
|
ir.Imm32(total_offset >> shift));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// index = (inst_idxen ? vgpr_index : 0) + (const_add_tid_enable ? thread_id[5:0] : 0)
|
// index = (inst_idxen ? vgpr_index : 0) + (const_add_tid_enable ? thread_id[5:0] : 0)
|
||||||
IR::U32 index = ir.Imm32(0U);
|
IR::U32 index = ir.Imm32(0U);
|
||||||
if (inst_info.index_enable) {
|
if (inst_info.index_enable) {
|
||||||
const IR::U32 vgpr_index{inst_info.offset_enable
|
const IR::U32 vgpr_index = IR::GetBufferIndexArg(&inst);
|
||||||
? IR::U32{ir.CompositeExtract(inst.Arg(1), 0)}
|
|
||||||
: IR::U32{inst.Arg(1)}};
|
|
||||||
index = ir.IAdd(index, vgpr_index);
|
index = ir.IAdd(index, vgpr_index);
|
||||||
}
|
}
|
||||||
if (buffer.add_tid_enable) {
|
if (buffer.add_tid_enable) {
|
||||||
@@ -766,11 +770,10 @@ IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const In
|
|||||||
}
|
}
|
||||||
// offset = (inst_offen ? vgpr_offset : 0) + inst_offset
|
// offset = (inst_offen ? vgpr_offset : 0) + inst_offset
|
||||||
IR::U32 offset = ir.Imm32(inst_offset);
|
IR::U32 offset = ir.Imm32(inst_offset);
|
||||||
if (inst_info.offset_enable) {
|
offset = ir.IAdd(offset, soffset);
|
||||||
const IR::U32 vgpr_offset = inst_info.index_enable
|
if (inst_info.voffset_enable) {
|
||||||
? IR::U32{ir.CompositeExtract(inst.Arg(1), 1)}
|
const IR::U32 voffset = IR::GetBufferVOffsetArg(&inst);
|
||||||
: IR::U32{inst.Arg(1)};
|
offset = ir.IAdd(offset, voffset);
|
||||||
offset = ir.IAdd(offset, vgpr_offset);
|
|
||||||
}
|
}
|
||||||
const IR::U32 const_stride = ir.Imm32(stride);
|
const IR::U32 const_stride = ir.Imm32(stride);
|
||||||
IR::U32 buffer_offset;
|
IR::U32 buffer_offset;
|
||||||
@@ -815,7 +818,8 @@ void PatchBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
inst.SetArg(1, CalculateBufferAddress(ir, inst, info, buffer, buffer.stride));
|
inst.SetArg(IR::LoadBufferArgs::Address,
|
||||||
|
CalculateBufferAddress(ir, inst, info, buffer, buffer.stride));
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::Value FixCubeCoords(IR::IREmitter& ir, const AmdGpu::Image& image, const IR::Value& x,
|
IR::Value FixCubeCoords(IR::IREmitter& ir, const AmdGpu::Image& image, const IR::Value& x,
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "shader_recompiler/ir/ir_emitter.h"
|
#include "shader_recompiler/ir/ir_emitter.h"
|
||||||
#include "shader_recompiler/ir/opcodes.h"
|
#include "shader_recompiler/ir/opcodes.h"
|
||||||
|
#include "shader_recompiler/ir/operand_helper.h"
|
||||||
#include "shader_recompiler/ir/position.h"
|
#include "shader_recompiler/ir/position.h"
|
||||||
#include "shader_recompiler/ir/program.h"
|
#include "shader_recompiler/ir/program.h"
|
||||||
#include "shader_recompiler/ir/reg.h"
|
#include "shader_recompiler/ir/reg.h"
|
||||||
@@ -113,10 +114,12 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto shl_inst = inst.Arg(1).TryInstRecursive();
|
const auto vertex_id = (info.index_enable ? IR::GetBufferIndexArg(&inst)
|
||||||
const auto vertex_id = shl_inst->Arg(0).Resolve().U32() >> 2;
|
: IR::GetBufferVOffsetArg(&inst))
|
||||||
const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1);
|
.U32() >>
|
||||||
const auto bucket = offset.Resolve().U32() / 256u;
|
2;
|
||||||
|
const auto soffset = IR::GetBufferSOffsetArg(&inst);
|
||||||
|
const auto bucket = soffset.Resolve().U32() / 256u;
|
||||||
const auto attrib = bucket < 4 ? IR::Attribute::Position0
|
const auto attrib = bucket < 4 ? IR::Attribute::Position0
|
||||||
: IR::Attribute::Param0 + (bucket / 4 - 1);
|
: IR::Attribute::Param0 + (bucket / 4 - 1);
|
||||||
const auto comp = bucket % 4;
|
const auto comp = bucket % 4;
|
||||||
|
|||||||
@@ -121,6 +121,8 @@ inline auto MakeInstPattern(Args&&... args) {
|
|||||||
MakeInstPattern<IR::Opcode::SetTcsGenericAttribute>(__VA_ARGS__)
|
MakeInstPattern<IR::Opcode::SetTcsGenericAttribute>(__VA_ARGS__)
|
||||||
#define M_COMPOSITECONSTRUCTU32X2(...) \
|
#define M_COMPOSITECONSTRUCTU32X2(...) \
|
||||||
MakeInstPattern<IR::Opcode::CompositeConstructU32x2>(__VA_ARGS__)
|
MakeInstPattern<IR::Opcode::CompositeConstructU32x2>(__VA_ARGS__)
|
||||||
|
#define M_COMPOSITECONSTRUCTU32X3(...) \
|
||||||
|
MakeInstPattern<IR::Opcode::CompositeConstructU32x3>(__VA_ARGS__)
|
||||||
#define M_COMPOSITECONSTRUCTU32X4(...) \
|
#define M_COMPOSITECONSTRUCTU32X4(...) \
|
||||||
MakeInstPattern<IR::Opcode::CompositeConstructU32x4>(__VA_ARGS__)
|
MakeInstPattern<IR::Opcode::CompositeConstructU32x4>(__VA_ARGS__)
|
||||||
|
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ union TextureInstInfo {
|
|||||||
union BufferInstInfo {
|
union BufferInstInfo {
|
||||||
u32 raw;
|
u32 raw;
|
||||||
BitField<0, 1, u32> index_enable;
|
BitField<0, 1, u32> index_enable;
|
||||||
BitField<1, 1, u32> offset_enable;
|
BitField<1, 1, u32> voffset_enable;
|
||||||
BitField<2, 12, u32> inst_offset;
|
BitField<2, 12, u32> inst_offset;
|
||||||
BitField<14, 1, u32> system_coherent;
|
BitField<14, 1, u32> system_coherent;
|
||||||
BitField<15, 1, u32> globally_coherent;
|
BitField<15, 1, u32> globally_coherent;
|
||||||
|
|||||||
@@ -114,6 +114,7 @@ struct HullRuntimeInfo {
|
|||||||
u32 num_input_control_points;
|
u32 num_input_control_points;
|
||||||
u32 num_threads;
|
u32 num_threads;
|
||||||
AmdGpu::TessellationType tess_type;
|
AmdGpu::TessellationType tess_type;
|
||||||
|
bool offchip_lds_enable;
|
||||||
|
|
||||||
// from tess constants buffer
|
// from tess constants buffer
|
||||||
u32 ls_stride;
|
u32 ls_stride;
|
||||||
|
|||||||
@@ -118,6 +118,7 @@ struct Liverpool {
|
|||||||
u32 address_lo;
|
u32 address_lo;
|
||||||
BitField<0, 8, u32> address_hi;
|
BitField<0, 8, u32> address_hi;
|
||||||
union {
|
union {
|
||||||
|
// SPI_SHADER_PGM_RSRC1_XX
|
||||||
BitField<0, 6, u64> num_vgprs;
|
BitField<0, 6, u64> num_vgprs;
|
||||||
BitField<6, 4, u64> num_sgprs;
|
BitField<6, 4, u64> num_sgprs;
|
||||||
BitField<10, 2, u64> priority;
|
BitField<10, 2, u64> priority;
|
||||||
@@ -127,7 +128,12 @@ struct Liverpool {
|
|||||||
BitField<18, 2, FpDenormMode> fp_denorm_mode64;
|
BitField<18, 2, FpDenormMode> fp_denorm_mode64;
|
||||||
BitField<12, 8, u64> float_mode;
|
BitField<12, 8, u64> float_mode;
|
||||||
BitField<24, 2, u64> vgpr_comp_cnt; // SPI provided per-thread inputs
|
BitField<24, 2, u64> vgpr_comp_cnt; // SPI provided per-thread inputs
|
||||||
|
// SPI_SHADER_PGM_RSRC2_XX
|
||||||
|
BitField<32, 1, u64> scratch_en;
|
||||||
BitField<33, 5, u64> num_user_regs;
|
BitField<33, 5, u64> num_user_regs;
|
||||||
|
union {
|
||||||
|
BitField<39, 1, u64> oc_lds_en;
|
||||||
|
} rsrc2_hs;
|
||||||
} settings;
|
} settings;
|
||||||
UserData user_data;
|
UserData user_data;
|
||||||
|
|
||||||
|
|||||||
@@ -112,6 +112,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
|
|||||||
info.hs_info.num_input_control_points = regs.ls_hs_config.hs_input_control_points.Value();
|
info.hs_info.num_input_control_points = regs.ls_hs_config.hs_input_control_points.Value();
|
||||||
info.hs_info.num_threads = regs.ls_hs_config.hs_output_control_points.Value();
|
info.hs_info.num_threads = regs.ls_hs_config.hs_output_control_points.Value();
|
||||||
info.hs_info.tess_type = regs.tess_config.type;
|
info.hs_info.tess_type = regs.tess_config.type;
|
||||||
|
info.hs_info.offchip_lds_enable = regs.hs_program.settings.rsrc2_hs.oc_lds_en.Value();
|
||||||
|
|
||||||
// We need to initialize most hs_info fields after finding the V# with tess constants
|
// We need to initialize most hs_info fields after finding the V# with tess constants
|
||||||
break;
|
break;
|
||||||
|
|||||||
Reference in New Issue
Block a user