Merge branch 'main' into microphone

This commit is contained in:
georgemoralis 2025-07-14 22:14:19 +03:00 committed by GitHub
commit e0eabac115
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
35 changed files with 574 additions and 364 deletions

2
externals/sirit vendored

@ -1 +1 @@
Subproject commit 6b450704f6fedb9413d0c89a9eb59d028eb1e6c0
Subproject commit b4eccb336f1b1169af48dac1e04015985af86e3e

View File

@ -52,7 +52,7 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
if (IR::IsParam(attr)) {
const u32 attr_index{u32(attr) - u32(IR::Attribute::Param0)};
if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) {
if (ctx.stage == Stage::Local) {
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]);
return ctx.OpAccessChain(component_ptr, ctx.output_attr_array, ctx.ConstU32(attr_index),
ctx.ConstU32(element));
@ -94,13 +94,9 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr) {
if (IR::IsParam(attr)) {
if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) {
return {ctx.F32[1], false};
} else {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& info{ctx.output_params.at(index)};
return {info.component_type, info.is_integer};
}
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& info{ctx.output_params.at(index)};
return {info.component_type, info.is_integer};
}
if (IR::IsMrt(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::RenderTarget0)};
@ -120,6 +116,9 @@ std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr
}
} // Anonymous namespace
using PointerType = EmitContext::PointerType;
using PointerSize = EmitContext::PointerSize;
Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) {
const u32 index = ctx.binding.user_data + ctx.info.ud_mask.Index(reg);
const u32 half = PushData::UdRegsIndex + (index >> 2);
@ -131,41 +130,6 @@ Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) {
return ud_reg;
}
void EmitGetThreadBitScalarReg(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetThreadBitScalarReg(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetScalarRegister(EmitContext&) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetScalarRegister(EmitContext&) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetVectorRegister(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetVectorRegister(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetGotoVariable(EmitContext&) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetGotoVariable(EmitContext&) {
UNREACHABLE_MSG("Unreachable instruction");
}
using PointerType = EmitContext::PointerType;
using PointerSize = EmitContext::PointerSize;
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) {
const u32 flatbuf_off_dw = inst->Flags<u32>();
if (!Config::directMemoryAccess()) {
@ -180,39 +144,27 @@ Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) {
}
}
template <PointerType type>
Id ReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
const auto& buffer = ctx.buffers[handle];
if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
index = ctx.OpIAdd(ctx.U32[1], index, offset);
}
const auto [id, pointer_type] = buffer.Alias(type);
const auto value_type = type == PointerType::U32 ? ctx.U32[1] : ctx.F32[1];
const auto [id, pointer_type] = buffer.Alias(PointerType::U32);
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
const Id result{ctx.OpLoad(value_type, ptr)};
const Id result{ctx.OpLoad(ctx.U32[1], ptr)};
if (const Id size = buffer.Size(PointerSize::B32); Sirit::ValidId(size)) {
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, size);
return ctx.OpSelect(value_type, in_bounds, result, ctx.u32_zero_value);
return ctx.OpSelect(ctx.U32[1], in_bounds, result, ctx.u32_zero_value);
}
return result;
}
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
return ReadConstBuffer<PointerType::U32>(ctx, handle, index);
}
Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
const auto index{rate_idx == 0 ? PushData::Step0Index : PushData::Step1Index};
return ctx.OpLoad(
ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
ctx.push_data_block, ctx.ConstU32(index)));
}
static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
if (IR::IsPosition(attr)) {
ASSERT(attr == IR::Attribute::Position0);
const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, index, ctx.ConstU32(0u))};
const auto pointer{
ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index), ctx.ConstU32(0u))};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
@ -222,7 +174,7 @@ static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
const auto param = ctx.input_params.at(param_id).id;
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)};
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
@ -230,7 +182,7 @@ static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32
UNREACHABLE();
}
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
if (ctx.info.l_stage == LogicalStage::Geometry) {
return EmitGetAttributeForGeometry(ctx, attr, comp, index);
} else if (ctx.info.l_stage == LogicalStage::TessellationControl ||
@ -248,18 +200,6 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
if (IR::IsParam(attr)) {
const u32 param_index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& param{ctx.input_params.at(param_index)};
if (param.buffer_handle >= 0) {
const auto step_rate = EmitReadStepRate(ctx, param.id.value);
const auto offset = ctx.OpIAdd(
ctx.U32[1],
ctx.OpIMul(
ctx.U32[1],
ctx.OpUDiv(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id), step_rate),
ctx.ConstU32(param.num_components)),
ctx.ConstU32(comp));
return ReadConstBuffer<PointerType::F32>(ctx, param.buffer_handle, offset);
}
Id result;
if (param.is_loaded) {
// Attribute is either default or manually interpolated. The id points to an already
@ -305,10 +245,6 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
return ctx.OpLoad(ctx.U32[1], ctx.vertex_index);
case IR::Attribute::InstanceId:
return ctx.OpLoad(ctx.U32[1], ctx.instance_id);
case IR::Attribute::InstanceId0:
return EmitReadStepRate(ctx, 0);
case IR::Attribute::InstanceId1:
return EmitReadStepRate(ctx, 1);
case IR::Attribute::WorkgroupIndex:
return ctx.workgroup_index_id;
case IR::Attribute::WorkgroupId:
@ -640,4 +576,36 @@ void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a
UNREACHABLE_MSG("SPIR-V instruction");
}
void EmitGetThreadBitScalarReg(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetThreadBitScalarReg(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetScalarRegister(EmitContext&) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetScalarRegister(EmitContext&) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetVectorRegister(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetVectorRegister(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetGotoVariable(EmitContext&) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetGotoVariable(EmitContext&) {
UNREACHABLE_MSG("Unreachable instruction");
}
} // namespace Shader::Backend::SPIRV

View File

@ -108,7 +108,7 @@ Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicCmpSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
Id cmp_value);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index);
@ -531,6 +531,8 @@ Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);
Id EmitReadFirstLane(EmitContext& ctx, Id value);
Id EmitReadLane(EmitContext& ctx, Id value, Id lane);
Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane);
Id EmitBallot(EmitContext& ctx, Id bit);
Id EmitBallotFindLsb(EmitContext& ctx, Id mask);
Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding);
Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding);

View File

@ -34,4 +34,12 @@ Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane) {
return ctx.u32_zero_value;
}
Id EmitBallot(EmitContext& ctx, Id bit) {
return ctx.OpGroupNonUniformBallot(ctx.U32[4], SubgroupScope(ctx), bit);
}
Id EmitBallotFindLsb(EmitContext& ctx, Id mask) {
return ctx.OpGroupNonUniformBallotFindLSB(ctx.U32[1], SubgroupScope(ctx), mask);
}
} // namespace Shader::Backend::SPIRV

View File

@ -377,35 +377,13 @@ void EmitContext::DefineInputs() {
ASSERT(attrib.semantic < IR::NumParams);
const auto sharp = attrib.GetSharp(info);
const Id type{GetAttributeType(*this, sharp.GetNumberFmt())[4]};
if (attrib.UsesStepRates()) {
const u32 rate_idx =
attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::OverStepRate0 ? 0
: 1;
const u32 num_components = AmdGpu::NumComponents(sharp.GetDataFmt());
const auto buffer =
std::ranges::find_if(info.buffers, [&attrib](const auto& buffer) {
return buffer.instance_attrib == attrib.semantic;
});
// Note that we pass index rather than Id
input_params[attrib.semantic] = SpirvAttribute{
.id = {rate_idx},
.pointer_type = input_u32,
.component_type = U32[1],
.num_components = std::min<u16>(attrib.num_elements, num_components),
.is_integer = true,
.is_loaded = false,
.buffer_handle = int(buffer - info.buffers.begin()),
};
Id id{DefineInput(type, attrib.semantic)};
if (attrib.GetStepRate() != Gcn::VertexAttribute::InstanceIdType::None) {
Name(id, fmt::format("vs_instance_attr{}", attrib.semantic));
} else {
Id id{DefineInput(type, attrib.semantic)};
if (attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::Plain) {
Name(id, fmt::format("vs_instance_attr{}", attrib.semantic));
} else {
Name(id, fmt::format("vs_in_attr{}", attrib.semantic));
}
input_params[attrib.semantic] =
GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false);
Name(id, fmt::format("vs_in_attr{}", attrib.semantic));
}
input_params[attrib.semantic] = GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false);
}
break;
}
@ -573,7 +551,7 @@ void EmitContext::DefineOutputs() {
cull_distances =
DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output);
}
if (stage == Shader::Stage::Local && runtime_info.ls_info.links_with_tcs) {
if (stage == Stage::Local) {
const u32 num_attrs = Common::AlignUp(runtime_info.ls_info.ls_stride, 16) >> 4;
if (num_attrs > 0) {
const Id type{TypeArray(F32[4], ConstU32(num_attrs))};
@ -700,12 +678,10 @@ void EmitContext::DefineOutputs() {
void EmitContext::DefinePushDataBlock() {
// Create push constants block for instance steps rates
const Id struct_type{Name(TypeStruct(U32[1], U32[1], F32[1], F32[1], F32[1], F32[1], U32[4],
U32[4], U32[4], U32[4], U32[4], U32[4], U32[2]),
const Id struct_type{Name(TypeStruct(F32[1], F32[1], F32[1], F32[1], U32[4], U32[4], U32[4],
U32[4], U32[4], U32[4], U32[2]),
"AuxData")};
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, PushData::Step0Index, "sr0");
MemberName(struct_type, PushData::Step1Index, "sr1");
MemberName(struct_type, PushData::XOffsetIndex, "xoffset");
MemberName(struct_type, PushData::YOffsetIndex, "yoffset");
MemberName(struct_type, PushData::XScaleIndex, "xscale");
@ -717,19 +693,17 @@ void EmitContext::DefinePushDataBlock() {
MemberName(struct_type, PushData::BufOffsetIndex + 0, "buf_offsets0");
MemberName(struct_type, PushData::BufOffsetIndex + 1, "buf_offsets1");
MemberName(struct_type, PushData::BufOffsetIndex + 2, "buf_offsets2");
MemberDecorate(struct_type, PushData::Step0Index, spv::Decoration::Offset, 0U);
MemberDecorate(struct_type, PushData::Step1Index, spv::Decoration::Offset, 4U);
MemberDecorate(struct_type, PushData::XOffsetIndex, spv::Decoration::Offset, 8U);
MemberDecorate(struct_type, PushData::YOffsetIndex, spv::Decoration::Offset, 12U);
MemberDecorate(struct_type, PushData::XScaleIndex, spv::Decoration::Offset, 16U);
MemberDecorate(struct_type, PushData::YScaleIndex, spv::Decoration::Offset, 20U);
MemberDecorate(struct_type, PushData::UdRegsIndex + 0, spv::Decoration::Offset, 24U);
MemberDecorate(struct_type, PushData::UdRegsIndex + 1, spv::Decoration::Offset, 40U);
MemberDecorate(struct_type, PushData::UdRegsIndex + 2, spv::Decoration::Offset, 56U);
MemberDecorate(struct_type, PushData::UdRegsIndex + 3, spv::Decoration::Offset, 72U);
MemberDecorate(struct_type, PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 88U);
MemberDecorate(struct_type, PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 104U);
MemberDecorate(struct_type, PushData::BufOffsetIndex + 2, spv::Decoration::Offset, 120U);
MemberDecorate(struct_type, PushData::XOffsetIndex, spv::Decoration::Offset, 0U);
MemberDecorate(struct_type, PushData::YOffsetIndex, spv::Decoration::Offset, 4U);
MemberDecorate(struct_type, PushData::XScaleIndex, spv::Decoration::Offset, 8U);
MemberDecorate(struct_type, PushData::YScaleIndex, spv::Decoration::Offset, 12U);
MemberDecorate(struct_type, PushData::UdRegsIndex + 0, spv::Decoration::Offset, 16U);
MemberDecorate(struct_type, PushData::UdRegsIndex + 1, spv::Decoration::Offset, 32U);
MemberDecorate(struct_type, PushData::UdRegsIndex + 2, spv::Decoration::Offset, 48U);
MemberDecorate(struct_type, PushData::UdRegsIndex + 3, spv::Decoration::Offset, 64U);
MemberDecorate(struct_type, PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 80U);
MemberDecorate(struct_type, PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 96U);
MemberDecorate(struct_type, PushData::BufOffsetIndex + 2, spv::Decoration::Offset, 112U);
push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant);
Name(push_data_block, "push_data");
interfaces.push_back(push_data_block);
@ -763,19 +737,19 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte
Decorate(id, spv::Decoration::NonWritable);
}
switch (buffer_type) {
case Shader::BufferType::GdsBuffer:
case BufferType::GdsBuffer:
Name(id, "gds_buffer");
break;
case Shader::BufferType::Flatbuf:
case BufferType::Flatbuf:
Name(id, "srt_flatbuf");
break;
case Shader::BufferType::BdaPagetable:
case BufferType::BdaPagetable:
Name(id, "bda_pagetable");
break;
case Shader::BufferType::FaultBuffer:
case BufferType::FaultBuffer:
Name(id, "fault_buffer");
break;
case Shader::BufferType::SharedMemory:
case BufferType::SharedMemory:
Name(id, "ssbo_shmem");
break;
default:

View File

@ -361,7 +361,6 @@ public:
u32 num_components;
bool is_integer{};
bool is_loaded{};
s32 buffer_handle{-1};
};
Id input_attr_array;
Id output_attr_array;

View File

@ -3,7 +3,6 @@
#pragma once
#include <ranges>
#include <vector>
#include "common/types.h"
#include "shader_recompiler/info.h"
@ -29,11 +28,6 @@ struct VertexAttribute {
return static_cast<InstanceIdType>(instance_data);
}
[[nodiscard]] bool UsesStepRates() const {
const auto step_rate = GetStepRate();
return step_rate == OverStepRate0 || step_rate == OverStepRate1;
}
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Shader::Info& info) const noexcept {
return info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
}
@ -52,12 +46,6 @@ struct FetchShaderData {
s8 vertex_offset_sgpr = -1; ///< SGPR of vertex offset from VADDR
s8 instance_offset_sgpr = -1; ///< SGPR of instance offset from VADDR
[[nodiscard]] bool UsesStepRates() const {
return std::ranges::find_if(attributes, [](const VertexAttribute& attribute) {
return attribute.UsesStepRates();
}) != attributes.end();
}
bool operator==(const FetchShaderData& other) const {
return attributes == other.attributes && vertex_offset_sgpr == other.vertex_offset_sgpr &&
instance_offset_sgpr == other.instance_offset_sgpr;

View File

@ -680,8 +680,9 @@ void Translator::S_FF1_I32_B32(const GcnInst& inst) {
}
void Translator::S_FF1_I32_B64(const GcnInst& inst) {
const IR::U64 src0{GetSrc64(inst.src[0])};
const IR::U32 result{ir.FindILsb(src0)};
ASSERT(inst.src[0].field == OperandField::ScalarGPR);
const IR::U32 result{
ir.BallotFindLsb(ir.Ballot(ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code))))};
SetDst(inst.dst[0], result);
}

View File

@ -90,17 +90,40 @@ void Translator::EmitPrologue(IR::Block* first_block) {
case LogicalStage::Vertex:
// v0: vertex ID, always present
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
// v1: instance ID, step rate 0
if (runtime_info.num_input_vgprs > 0) {
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId0));
}
// v2: instance ID, step rate 1
if (runtime_info.num_input_vgprs > 1) {
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId1));
}
// v3: instance ID, plain
if (runtime_info.num_input_vgprs > 2) {
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId));
if (info.stage == Stage::Local) {
// v1: rel patch ID
if (runtime_info.num_input_vgprs > 0) {
ir.SetVectorReg(dst_vreg++, ir.Imm32(0));
}
// v2: instance ID
if (runtime_info.num_input_vgprs > 1) {
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId));
}
} else {
// v1: instance ID, step rate 0
if (runtime_info.num_input_vgprs > 0) {
if (runtime_info.vs_info.step_rate_0 != 0) {
ir.SetVectorReg(dst_vreg++,
ir.IDiv(ir.GetAttributeU32(IR::Attribute::InstanceId),
ir.Imm32(runtime_info.vs_info.step_rate_0)));
} else {
ir.SetVectorReg(dst_vreg++, ir.Imm32(0));
}
}
// v2: instance ID, step rate 1
if (runtime_info.num_input_vgprs > 1) {
if (runtime_info.vs_info.step_rate_1 != 0) {
ir.SetVectorReg(dst_vreg++,
ir.IDiv(ir.GetAttributeU32(IR::Attribute::InstanceId),
ir.Imm32(runtime_info.vs_info.step_rate_1)));
} else {
ir.SetVectorReg(dst_vreg++, ir.Imm32(0));
}
}
// v3: instance ID, plain
if (runtime_info.num_input_vgprs > 2) {
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId));
}
}
break;
case LogicalStage::Fragment:
@ -183,10 +206,8 @@ void Translator::EmitPrologue(IR::Block* first_block) {
switch (runtime_info.gs_info.out_primitive[0]) {
case AmdGpu::GsOutputPrimitiveType::TriangleStrip:
ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2
[[fallthrough]];
case AmdGpu::GsOutputPrimitiveType::LineStrip:
ir.SetVectorReg(IR::VectorReg::V1, ir.Imm32(1u)); // vertex 1
[[fallthrough]];
default:
ir.SetVectorReg(IR::VectorReg::V0, ir.Imm32(0u)); // vertex 0
break;
@ -481,11 +502,11 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra
}
void Translator::EmitFetch(const GcnInst& inst) {
// Read the pointer to the fetch shader assembly.
const auto code_sgpr_base = inst.src[0].code;
// The fetch shader must be inlined to access as regular buffers, so that
// bounds checks can be emitted to emulate robust buffer access.
if (!profile.supports_robust_buffer_access) {
// The fetch shader must be inlined to access as regular buffers, so that
// bounds checks can be emitted to emulate robust buffer access.
const auto* code = GetFetchShaderCode(info, code_sgpr_base);
GcnCodeSlice slice(code, code + std::numeric_limits<u32>::max());
GcnDecodeContext decoder;
@ -535,16 +556,6 @@ void Translator::EmitFetch(const GcnInst& inst) {
for (u32 i = 0; i < 4; i++) {
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(swizzled, i)});
}
// In case of programmable step rates we need to fallback to instance data pulling in
// shader, so VBs should be bound as regular data buffers
if (attrib.UsesStepRates()) {
info.buffers.push_back({
.sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
.used_types = IR::Type::F32,
.instance_attrib = attrib.semantic,
});
}
}
}

View File

@ -113,17 +113,13 @@ struct FMaskResource {
using FMaskResourceList = boost::container::small_vector<FMaskResource, NumFMasks>;
struct PushData {
static constexpr u32 Step0Index = 0;
static constexpr u32 Step1Index = 1;
static constexpr u32 XOffsetIndex = 2;
static constexpr u32 YOffsetIndex = 3;
static constexpr u32 XScaleIndex = 4;
static constexpr u32 YScaleIndex = 5;
static constexpr u32 UdRegsIndex = 6;
static constexpr u32 XOffsetIndex = 0;
static constexpr u32 YOffsetIndex = 1;
static constexpr u32 XScaleIndex = 2;
static constexpr u32 YScaleIndex = 3;
static constexpr u32 UdRegsIndex = 4;
static constexpr u32 BufOffsetIndex = UdRegsIndex + NumUserDataRegs / 4;
u32 step0;
u32 step1;
float xoffset;
float yoffset;
float xscale;

View File

@ -100,22 +100,36 @@ std::string NameOf(Attribute attribute) {
return "Param30";
case Attribute::Param31:
return "Param31";
case Attribute::ClipDistance:
return "ClipDistanace";
case Attribute::CullDistance:
return "CullDistance";
case Attribute::RenderTargetId:
return "RenderTargetId";
case Attribute::ViewportId:
return "ViewportId";
case Attribute::VertexId:
return "VertexId";
case Attribute::InstanceId:
return "InstanceId";
case Attribute::PrimitiveId:
return "PrimitiveId";
case Attribute::FragCoord:
return "FragCoord";
case Attribute::InstanceId:
return "InstanceId";
case Attribute::IsFrontFace:
return "IsFrontFace";
case Attribute::SampleIndex:
return "SampleIndex";
case Attribute::GlobalInvocationId:
return "GlobalInvocationId";
case Attribute::WorkgroupId:
return "WorkgroupId";
case Attribute::WorkgroupIndex:
return "WorkgroupIndex";
case Attribute::LocalInvocationId:
return "LocalInvocationId";
case Attribute::LocalInvocationIndex:
return "LocalInvocationIndex";
case Attribute::FragCoord:
return "FragCoord";
case Attribute::InvocationId:
return "InvocationId";
case Attribute::PatchVertices:

View File

@ -73,8 +73,6 @@ enum class Attribute : u64 {
LocalInvocationId = 76,
LocalInvocationIndex = 77,
FragCoord = 78,
InstanceId0 = 79, // step rate 0
InstanceId1 = 80, // step rate 1
InvocationId = 81, // TCS id in output patch and instanced geometry shader id
PatchVertices = 82,
TessellationEvaluationPointU = 83,

View File

@ -255,8 +255,8 @@ void IREmitter::SetM0(const U32& value) {
Inst(Opcode::SetM0, value);
}
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, IR::Value index) {
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), index);
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, u32 index) {
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), Imm32(index));
}
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) {
@ -660,6 +660,14 @@ U32 IREmitter::WriteLane(const U32& value, const U32& write_value, const U32& la
return Inst<U32>(Opcode::WriteLane, value, write_value, lane);
}
Value IREmitter::Ballot(const U1& bit) {
return Inst(Opcode::Ballot, bit);
}
U32 IREmitter::BallotFindLsb(const Value& mask) {
return Inst<U32>(Opcode::BallotFindLsb, mask);
}
F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) {
if (a.Type() != b.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());

View File

@ -81,8 +81,7 @@ public:
[[nodiscard]] U1 Condition(IR::Condition cond);
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0,
IR::Value index = IR::Value(u32(0u)));
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, u32 index = 0);
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
@ -176,6 +175,8 @@ public:
[[nodiscard]] U32 ReadFirstLane(const U32& value);
[[nodiscard]] U32 ReadLane(const U32& value, const U32& lane);
[[nodiscard]] U32 WriteLane(const U32& value, const U32& write_value, const U32& lane);
[[nodiscard]] Value Ballot(const U1& bit);
[[nodiscard]] U32 BallotFindLsb(const Value& mask);
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);

View File

@ -472,5 +472,7 @@ OPCODE(QuadShuffle, U32, U32,
OPCODE(ReadFirstLane, U32, U32, )
OPCODE(ReadLane, U32, U32, U32 )
OPCODE(WriteLane, U32, U32, U32, U32 )
OPCODE(Ballot, U32x4, U1, )
OPCODE(BallotFindLsb, U32, U32x4, )
OPCODE(DataAppend, U32, U32, U32 )
OPCODE(DataConsume, U32, U32, U32 )

View File

@ -191,7 +191,7 @@ static void VisitPointer(u32 off_dw, IR::Inst* subtree, PassInfo& pass_info,
static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
Xbyak::CodeGenerator& c = g_srt_codegen;
if (info.srt_info.srt_reservations.empty() && pass_info.srt_roots.empty()) {
if (pass_info.srt_roots.empty()) {
return;
}
@ -205,29 +205,7 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
}
info.srt_info.walker_func = c.getCurr<PFN_SrtWalker>();
pass_info.dst_off_dw = NumUserDataRegs;
// Special case for V# step rate buffers in fetch shader
for (const auto [sgpr_base, dword_offset, num_dwords] : info.srt_info.srt_reservations) {
// get pointer to V#
if (sgpr_base != IR::NumScalarRegs) {
PushPtr(c, sgpr_base);
}
u32 src_off = dword_offset << 2;
for (auto j = 0; j < num_dwords; j++) {
c.mov(r11d, ptr[rdi + src_off]);
c.mov(ptr[rsi + (pass_info.dst_off_dw << 2)], r11d);
src_off += 4;
++pass_info.dst_off_dw;
}
if (sgpr_base != IR::NumScalarRegs) {
PopPtr(c);
}
}
ASSERT(pass_info.dst_off_dw == info.srt_info.flattened_bufsize_dw);
for (const auto& [sgpr_base, root] : pass_info.srt_roots) {

View File

@ -95,17 +95,7 @@ void ReadLaneEliminationPass(IR::Program& program) {
if (inst.GetOpcode() != IR::Opcode::ReadLane) {
continue;
}
// Check for the following pattern and replace it with ReadFirstLane
// s_ff1_i32_b64 sgpr, exec
// v_readlane_b32 sdst, vgpr, sgpr
if (const auto lane = inst.Arg(1); !lane.IsImmediate()) {
if (lane.InstRecursive()->GetOpcode() == IR::Opcode::FindILsb64) {
const auto value = inst.Arg(0);
inst.ReplaceOpcode(IR::Opcode::ReadFirstLane);
inst.ClearArgs();
inst.SetArg(0, value);
}
if (!inst.Arg(1).IsImmediate()) {
continue;
}

View File

@ -33,12 +33,9 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
bool is_composite = opcode == IR::Opcode::WriteSharedU64;
u32 num_components = opcode == IR::Opcode::WriteSharedU32 ? 1 : 2;
u32 offset = 0;
const auto* addr = inst.Arg(0).InstRecursive();
if (addr->GetOpcode() == IR::Opcode::IAdd32) {
ASSERT(addr->Arg(1).IsImmediate());
offset = addr->Arg(1).U32();
}
ASSERT(inst.Arg(0).IsImmediate());
u32 offset = inst.Arg(0).U32();
IR::Value data = is_composite ? ir.UnpackUint2x32(IR::U64{inst.Arg(1).Resolve()})
: inst.Arg(1).Resolve();
for (s32 i = 0; i < num_components; i++) {
@ -116,7 +113,7 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
}
const auto shl_inst = inst.Arg(1).TryInstRecursive();
const auto vertex_id = ir.Imm32(shl_inst->Arg(0).Resolve().U32() >> 2);
const auto vertex_id = shl_inst->Arg(0).Resolve().U32() >> 2;
const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1);
const auto bucket = offset.Resolve().U32() / 256u;
const auto attrib = bucket < 4 ? IR::Attribute::Position0

View File

@ -20,18 +20,7 @@ struct PersistentSrtInfo {
};
PFN_SrtWalker walker_func{};
boost::container::small_vector<SrtSharpReservation, 2> srt_reservations;
u32 flattened_bufsize_dw = 16; // NumUserDataRegs
// Special case for fetch shaders because we don't generate IR to read from step rate buffers,
// so we won't see usage with GetUserData/ReadConst.
// Reserve space in the flattened buffer for a sharp ahead of time
u32 ReserveSharp(u32 sgpr_base, u32 dword_offset, u32 num_dwords) {
u32 rv = flattened_bufsize_dw;
srt_reservations.emplace_back(sgpr_base, dword_offset, num_dwords);
flattened_bufsize_dw += num_dwords;
return rv;
}
};
} // namespace Shader
} // namespace Shader

View File

@ -42,7 +42,6 @@ constexpr u32 MaxStageTypes = static_cast<u32>(LogicalStage::NumLogicalStages);
struct LocalRuntimeInfo {
u32 ls_stride;
bool links_with_tcs;
auto operator<=>(const LocalRuntimeInfo&) const noexcept = default;
};
@ -85,6 +84,8 @@ struct VertexRuntimeInfo {
std::array<VsOutputMap, 3> outputs;
bool emulate_depth_negative_one_to_one{};
bool clip_disable{};
u32 step_rate_0;
u32 step_rate_1;
// Domain
AmdGpu::TessellationType tess_type;
AmdGpu::TessellationTopology tess_topology;
@ -96,7 +97,8 @@ struct VertexRuntimeInfo {
clip_disable == other.clip_disable && tess_type == other.tess_type &&
tess_topology == other.tess_topology &&
tess_partitioning == other.tess_partitioning &&
hs_output_cp_stride == other.hs_output_cp_stride;
hs_output_cp_stride == other.hs_output_cp_stride &&
step_rate_0 == other.step_rate_0 && step_rate_1 == other.step_rate_1;
}
void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {

View File

@ -13,7 +13,7 @@
namespace Shader {
struct VsAttribSpecialization {
s32 num_components{};
u32 divisor{};
AmdGpu::NumberClass num_class{};
AmdGpu::CompMapping dst_select{};
@ -74,13 +74,13 @@ struct SamplerSpecialization {
* after the first compilation of a module.
*/
struct StageSpecialization {
static constexpr size_t MaxStageResources = 64;
static constexpr size_t MaxStageResources = 128;
const Shader::Info* info;
RuntimeInfo runtime_info;
std::bitset<MaxStageResources> bitset{};
std::optional<Gcn::FetchShaderData> fetch_shader_data{};
boost::container::small_vector<VsAttribSpecialization, 32> vs_attribs;
std::bitset<MaxStageResources> bitset{};
boost::container::small_vector<BufferSpecialization, 16> buffers;
boost::container::small_vector<ImageSpecialization, 16> images;
boost::container::small_vector<FMaskSpecialization, 8> fmasks;
@ -94,10 +94,16 @@ struct StageSpecialization {
if (info_.stage == Stage::Vertex && fetch_shader_data) {
// Specialize shader on VS input number types to follow spec.
ForEachSharp(vs_attribs, fetch_shader_data->attributes,
[&profile_](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
spec.num_components = desc.UsesStepRates()
? AmdGpu::NumComponents(sharp.GetDataFmt())
: 0;
[&profile_, this](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
using InstanceIdType = Shader::Gcn::VertexAttribute::InstanceIdType;
if (const auto step_rate = desc.GetStepRate();
step_rate != InstanceIdType::None) {
spec.divisor = step_rate == InstanceIdType::OverStepRate0
? runtime_info.vs_info.step_rate_0
: (step_rate == InstanceIdType::OverStepRate1
? runtime_info.vs_info.step_rate_1
: 1);
}
spec.num_class = profile_.support_legacy_vertex_attributes
? AmdGpu::NumberClass{}
: AmdGpu::GetNumberClass(sharp.GetNumberFmt());

View File

@ -304,6 +304,14 @@ struct Liverpool {
}
};
struct LineControl {
u32 width_fixed_point;
float Width() const {
return static_cast<float>(width_fixed_point) / 8.0;
}
};
struct ModeControl {
s32 msaa_enable : 1;
s32 vport_scissor_enable : 1;
@ -513,9 +521,16 @@ struct Liverpool {
BitField<19, 1, ClipSpace> clip_space;
BitField<21, 1, PrimKillCond> vtx_kill_or;
BitField<22, 1, u32> dx_rasterization_kill;
BitField<23, 1, u32> dx_linear_attr_clip_enable;
BitField<24, 1, u32> dx_linear_attr_clip_enable;
BitField<26, 1, u32> zclip_near_disable;
BitField<26, 1, u32> zclip_far_disable;
BitField<27, 1, u32> zclip_far_disable;
bool ZclipEnable() const {
if (zclip_near_disable != zclip_far_disable) {
return false;
}
return !zclip_near_disable;
}
};
enum class PolygonMode : u32 {
@ -738,12 +753,7 @@ struct Liverpool {
u32 data_w;
};
struct BlendConstants {
float red;
float green;
float blue;
float alpha;
};
using BlendConstants = std::array<float, 4>;
union BlendControl {
enum class BlendFactor : u32 {
@ -796,11 +806,29 @@ struct Liverpool {
Err = 4u,
FmaskDecompress = 5u,
};
enum class LogicOp : u32 {
Clear = 0x00,
Nor = 0x11,
AndInverted = 0x22,
CopyInverted = 0x33,
AndReverse = 0x44,
Invert = 0x55,
Xor = 0x66,
Nand = 0x77,
And = 0x88,
Equiv = 0x99,
Noop = 0xAA,
OrInverted = 0xBB,
Copy = 0xCC,
OrReverse = 0xDD,
Or = 0xEE,
Set = 0xFF,
};
BitField<0, 1, u32> disable_dual_quad;
BitField<3, 1, u32> degamma_enable;
BitField<4, 3, OperationMode> mode;
BitField<16, 8, u32> rop3;
BitField<16, 8, LogicOp> rop3;
};
struct ColorBuffer {
@ -1369,7 +1397,9 @@ struct Liverpool {
PolygonControl polygon_control;
ViewportControl viewport_control;
VsOutputControl vs_output_control;
INSERT_PADDING_WORDS(0xA287 - 0xA207 - 1);
INSERT_PADDING_WORDS(0xA287 - 0xA207 - 6);
LineControl line_control;
INSERT_PADDING_WORDS(4);
HsTessFactorClamp hs_clamp;
INSERT_PADDING_WORDS(0xA290 - 0xA287 - 2);
GsMode vgt_gs_mode;
@ -1695,6 +1725,7 @@ static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202);
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
static_assert(GFX6_3D_REG_INDEX(line_control) == 0xA282);
static_assert(GFX6_3D_REG_INDEX(hs_clamp) == 0xA287);
static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290);
static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292);

View File

@ -198,10 +198,13 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
}
void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) {
const auto& regs = liverpool->regs;
Vulkan::VertexInputs<vk::VertexInputAttributeDescription2EXT> attributes;
Vulkan::VertexInputs<vk::VertexInputBindingDescription2EXT> bindings;
Vulkan::VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT> divisors;
Vulkan::VertexInputs<AmdGpu::Buffer> guest_buffers;
pipeline.GetVertexInputs(attributes, bindings, guest_buffers);
pipeline.GetVertexInputs(attributes, bindings, divisors, guest_buffers,
regs.vgt_instance_step_rate_0, regs.vgt_instance_step_rate_1);
if (instance.IsVertexInputDynamicState()) {
// Update current vertex inputs.

View File

@ -245,6 +245,46 @@ vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) {
}
}
vk::LogicOp LogicOp(Liverpool::ColorControl::LogicOp logic_op) {
using LogicOp = Liverpool::ColorControl::LogicOp;
switch (logic_op) {
case LogicOp::Clear:
return vk::LogicOp::eClear;
case LogicOp::Nor:
return vk::LogicOp::eNor;
case LogicOp::AndInverted:
return vk::LogicOp::eAndInverted;
case LogicOp::CopyInverted:
return vk::LogicOp::eCopyInverted;
case LogicOp::AndReverse:
return vk::LogicOp::eAndReverse;
case LogicOp::Invert:
return vk::LogicOp::eInvert;
case LogicOp::Xor:
return vk::LogicOp::eXor;
case LogicOp::Nand:
return vk::LogicOp::eNand;
case LogicOp::And:
return vk::LogicOp::eAnd;
case LogicOp::Equiv:
return vk::LogicOp::eEquivalent;
case LogicOp::Noop:
return vk::LogicOp::eNoOp;
case LogicOp::OrInverted:
return vk::LogicOp::eOrInverted;
case LogicOp::Copy:
return vk::LogicOp::eCopy;
case LogicOp::OrReverse:
return vk::LogicOp::eOrReverse;
case LogicOp::Or:
return vk::LogicOp::eOr;
case LogicOp::Set:
return vk::LogicOp::eSet;
default:
UNREACHABLE_MSG("Unknown logic op {}", u32(logic_op));
}
}
// https://github.com/chaotic-cx/mesa-mirror/blob/0954afff5/src/amd/vulkan/radv_sampler.c#L21
vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode) {
switch (mode) {

View File

@ -34,6 +34,8 @@ bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor);
vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func);
vk::LogicOp LogicOp(Liverpool::ColorControl::LogicOp logic_op);
vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode);
vk::CompareOp DepthCompare(AmdGpu::DepthCompare comp);

View File

@ -72,12 +72,21 @@ GraphicsPipeline::GraphicsPipeline(
VertexInputs<vk::VertexInputAttributeDescription> vertex_attributes;
VertexInputs<vk::VertexInputBindingDescription> vertex_bindings;
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT> divisors;
VertexInputs<AmdGpu::Buffer> guest_buffers;
if (!instance.IsVertexInputDynamicState()) {
GetVertexInputs(vertex_attributes, vertex_bindings, guest_buffers);
const auto& vs_info = runtime_infos[u32(Shader::LogicalStage::Vertex)].vs_info;
GetVertexInputs(vertex_attributes, vertex_bindings, divisors, guest_buffers,
vs_info.step_rate_0, vs_info.step_rate_1);
}
const vk::PipelineVertexInputDivisorStateCreateInfo divisor_state = {
.vertexBindingDivisorCount = static_cast<u32>(divisors.size()),
.pVertexBindingDivisors = divisors.data(),
};
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
.pNext = divisors.empty() ? nullptr : &divisor_state,
.vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()),
.pVertexBindingDescriptions = vertex_bindings.data(),
.vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()),
@ -100,28 +109,63 @@ GraphicsPipeline::GraphicsPipeline(
.patchControlPoints = is_rect_list ? 3U : (is_quad_list ? 4U : key.patch_control_points),
};
const vk::PipelineRasterizationStateCreateInfo raster_state = {
.depthClampEnable = false,
.rasterizerDiscardEnable = false,
.polygonMode = LiverpoolToVK::PolygonMode(key.polygon_mode),
.lineWidth = 1.0f,
vk::StructureChain raster_chain = {
vk::PipelineRasterizationStateCreateInfo{
.depthClampEnable = key.depth_clamp_enable ||
(!key.depth_clip_enable && !instance.IsDepthClipEnableSupported()),
.rasterizerDiscardEnable = false,
.polygonMode = LiverpoolToVK::PolygonMode(key.polygon_mode),
.lineWidth = 1.0f,
},
vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT{
.provokingVertexMode = key.provoking_vtx_last == Liverpool::ProvokingVtxLast::First
? vk::ProvokingVertexModeEXT::eFirstVertex
: vk::ProvokingVertexModeEXT::eLastVertex,
},
vk::PipelineRasterizationDepthClipStateCreateInfoEXT{
.depthClipEnable = key.depth_clip_enable,
},
};
if (!instance.IsProvokingVertexSupported()) {
raster_chain.unlink<vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT>();
}
if (!instance.IsDepthClipEnableSupported()) {
raster_chain.unlink<vk::PipelineRasterizationDepthClipStateCreateInfoEXT>();
}
const vk::PipelineMultisampleStateCreateInfo multisampling = {
.rasterizationSamples =
LiverpoolToVK::NumSamples(key.num_samples, instance.GetFramebufferSampleCounts()),
.sampleShadingEnable = false,
};
const vk::PipelineViewportDepthClipControlCreateInfoEXT clip_control = {
.negativeOneToOne = key.clip_space == Liverpool::ClipSpace::MinusWToW,
const vk::DepthClampRangeEXT depth_clamp_range = {
.minDepthClamp = key.min_depth_clamp,
.maxDepthClamp = key.max_depth_clamp,
};
const vk::PipelineViewportStateCreateInfo viewport_info = {
.pNext = instance.IsDepthClipControlSupported() ? &clip_control : nullptr,
vk::StructureChain viewport_chain = {
vk::PipelineViewportStateCreateInfo{},
vk::PipelineViewportDepthClipControlCreateInfoEXT{
.negativeOneToOne = key.clip_space == Liverpool::ClipSpace::MinusWToW,
},
vk::PipelineViewportDepthClampControlCreateInfoEXT{
.depthClampMode = key.depth_clamp_user_defined_range
? vk::DepthClampModeEXT::eUserDefinedRange
: vk::DepthClampModeEXT::eViewportRange,
.pDepthClampRange = &depth_clamp_range,
},
};
boost::container::static_vector<vk::DynamicState, 20> dynamic_states = {
if (!instance.IsDepthClampControlSupported()) {
viewport_chain.unlink<vk::PipelineViewportDepthClampControlCreateInfoEXT>();
}
if (!instance.IsDepthClipControlSupported()) {
viewport_chain.unlink<vk::PipelineViewportDepthClipControlCreateInfoEXT>();
}
boost::container::static_vector<vk::DynamicState, 32> dynamic_states = {
vk::DynamicState::eViewportWithCount, vk::DynamicState::eScissorWithCount,
vk::DynamicState::eBlendConstants, vk::DynamicState::eDepthTestEnable,
vk::DynamicState::eDepthWriteEnable, vk::DynamicState::eDepthCompareOp,
@ -129,7 +173,8 @@ GraphicsPipeline::GraphicsPipeline(
vk::DynamicState::eStencilTestEnable, vk::DynamicState::eStencilReference,
vk::DynamicState::eStencilCompareMask, vk::DynamicState::eStencilWriteMask,
vk::DynamicState::eStencilOp, vk::DynamicState::eCullMode,
vk::DynamicState::eFrontFace,
vk::DynamicState::eFrontFace, vk::DynamicState::eRasterizerDiscardEnable,
vk::DynamicState::eLineWidth,
};
if (instance.IsPrimitiveRestartDisableSupported()) {
@ -212,11 +257,19 @@ GraphicsPipeline::GraphicsPipeline(
});
}
const auto depth_format =
instance.GetSupportedFormat(LiverpoolToVK::DepthFormat(key.z_format, key.stencil_format),
vk::FormatFeatureFlagBits2::eDepthStencilAttachment);
const vk::PipelineRenderingCreateInfo pipeline_rendering_ci = {
.colorAttachmentCount = key.num_color_attachments,
.pColorAttachmentFormats = key.color_formats.data(),
.depthAttachmentFormat = key.depth_format,
.stencilAttachmentFormat = key.stencil_format,
.depthAttachmentFormat = key.z_format != Liverpool::DepthBuffer::ZFormat::Invalid
? depth_format
: vk::Format::eUndefined,
.stencilAttachmentFormat =
key.stencil_format != Liverpool::DepthBuffer::StencilFormat::Invalid
? depth_format
: vk::Format::eUndefined,
};
std::array<vk::PipelineColorBlendAttachmentState, Liverpool::NumColorBuffers> attachments;
@ -271,8 +324,9 @@ GraphicsPipeline::GraphicsPipeline(
}
const vk::PipelineColorBlendStateCreateInfo color_blending = {
.logicOpEnable = false,
.logicOp = vk::LogicOp::eCopy,
.logicOpEnable =
instance.IsLogicOpSupported() && key.logic_op != Liverpool::ColorControl::LogicOp::Copy,
.logicOp = LiverpoolToVK::LogicOp(key.logic_op),
.attachmentCount = key.num_color_attachments,
.pAttachments = attachments.data(),
.blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f},
@ -285,8 +339,8 @@ GraphicsPipeline::GraphicsPipeline(
.pVertexInputState = !instance.IsVertexInputDynamicState() ? &vertex_input_info : nullptr,
.pInputAssemblyState = &input_assembly,
.pTessellationState = &tessellation_state,
.pViewportState = &viewport_info,
.pRasterizationState = &raster_state,
.pViewportState = &viewport_chain.get(),
.pRasterizationState = &raster_chain.get(),
.pMultisampleState = &multisampling,
.pColorBlendState = &color_blending,
.pDynamicState = &dynamic_info,
@ -304,19 +358,17 @@ GraphicsPipeline::GraphicsPipeline(
GraphicsPipeline::~GraphicsPipeline() = default;
template <typename Attribute, typename Binding>
void GraphicsPipeline::GetVertexInputs(VertexInputs<Attribute>& attributes,
VertexInputs<Binding>& bindings,
VertexInputs<AmdGpu::Buffer>& guest_buffers) const {
void GraphicsPipeline::GetVertexInputs(
VertexInputs<Attribute>& attributes, VertexInputs<Binding>& bindings,
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT>& divisors,
VertexInputs<AmdGpu::Buffer>& guest_buffers, u32 step_rate_0, u32 step_rate_1) const {
using InstanceIdType = Shader::Gcn::VertexAttribute::InstanceIdType;
if (!fetch_shader || fetch_shader->attributes.empty()) {
return;
}
const auto& vs_info = GetStage(Shader::LogicalStage::Vertex);
for (const auto& attrib : fetch_shader->attributes) {
if (attrib.UsesStepRates()) {
// Skip attribute binding as the data will be pulled by shader.
continue;
}
const auto step_rate = attrib.GetStepRate();
const auto& buffer = attrib.GetSharp(vs_info);
attributes.push_back(Attribute{
.location = attrib.semantic,
@ -327,12 +379,19 @@ void GraphicsPipeline::GetVertexInputs(VertexInputs<Attribute>& attributes,
bindings.push_back(Binding{
.binding = attrib.semantic,
.stride = buffer.GetStride(),
.inputRate = attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None
? vk::VertexInputRate::eVertex
: vk::VertexInputRate::eInstance,
.inputRate = step_rate == InstanceIdType::None ? vk::VertexInputRate::eVertex
: vk::VertexInputRate::eInstance,
});
const u32 divisor = step_rate == InstanceIdType::OverStepRate0
? step_rate_0
: (step_rate == InstanceIdType::OverStepRate1 ? step_rate_1 : 1);
if constexpr (std::is_same_v<Binding, vk::VertexInputBindingDescription2EXT>) {
bindings.back().divisor = 1;
bindings.back().divisor = divisor;
} else if (step_rate != InstanceIdType::None) {
divisors.push_back(vk::VertexInputBindingDivisorDescriptionEXT{
.binding = attrib.semantic,
.divisor = divisor,
});
}
guest_buffers.emplace_back(buffer);
}
@ -342,11 +401,13 @@ void GraphicsPipeline::GetVertexInputs(VertexInputs<Attribute>& attributes,
template void GraphicsPipeline::GetVertexInputs(
VertexInputs<vk::VertexInputAttributeDescription>& attributes,
VertexInputs<vk::VertexInputBindingDescription>& bindings,
VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT>& divisors,
VertexInputs<AmdGpu::Buffer>& guest_buffers, u32 step_rate_0, u32 step_rate_1) const;
template void GraphicsPipeline::GetVertexInputs(
VertexInputs<vk::VertexInputAttributeDescription2EXT>& attributes,
VertexInputs<vk::VertexInputBindingDescription2EXT>& bindings,
VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT>& divisors,
VertexInputs<AmdGpu::Buffer>& guest_buffers, u32 step_rate_0, u32 step_rate_1) const;
void GraphicsPipeline::BuildDescSetLayout() {
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;

View File

@ -33,22 +33,32 @@ using VertexInputs = boost::container::static_vector<T, MaxVertexBufferCount>;
struct GraphicsPipelineKey {
std::array<size_t, MaxShaderStages> stage_hashes;
std::array<vk::Format, MaxVertexBufferCount> vertex_buffer_formats;
u32 patch_control_points;
u32 num_color_attachments;
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
std::array<Shader::PsColorBuffer, Liverpool::NumColorBuffers> color_buffers;
vk::Format depth_format;
vk::Format stencil_format;
u32 num_samples;
u32 mrt_mask;
AmdGpu::PrimitiveType prim_type;
Liverpool::PolygonMode polygon_mode;
Liverpool::ClipSpace clip_space;
Liverpool::ColorBufferMask cb_shader_mask;
std::array<Liverpool::BlendControl, Liverpool::NumColorBuffers> blend_controls;
std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks;
std::array<vk::Format, MaxVertexBufferCount> vertex_buffer_formats;
u32 patch_control_points;
Liverpool::ColorBufferMask cb_shader_mask;
Liverpool::ColorControl::LogicOp logic_op;
u32 num_samples;
u32 mrt_mask;
struct {
Liverpool::DepthBuffer::ZFormat z_format : 2;
Liverpool::DepthBuffer::StencilFormat stencil_format : 1;
u32 depth_clamp_enable : 1;
u32 depth_clamp_user_defined_range : 1;
float min_depth_clamp;
float max_depth_clamp;
};
struct {
AmdGpu::PrimitiveType prim_type : 5;
Liverpool::PolygonMode polygon_mode : 2;
Liverpool::ClipSpace clip_space : 1;
Liverpool::ProvokingVtxLast provoking_vtx_last : 1;
u32 depth_clip_enable : 1;
};
bool operator==(const GraphicsPipelineKey& key) const noexcept {
return std::memcmp(this, &key, sizeof(key)) == 0;
@ -81,7 +91,9 @@ public:
/// Gets the attributes and bindings for vertex inputs.
template <typename Attribute, typename Binding>
void GetVertexInputs(VertexInputs<Attribute>& attributes, VertexInputs<Binding>& bindings,
VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT>& divisors,
VertexInputs<AmdGpu::Buffer>& guest_buffers, u32 step_rate_0,
u32 step_rate_1) const;
private:
void BuildDescSetLayout();

View File

@ -248,6 +248,7 @@ bool Instance::CreateDevice() {
// Required
ASSERT(add_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME));
ASSERT(add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME));
ASSERT(add_extension(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME));
// Optional
depth_range_unrestricted = add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
@ -269,10 +270,13 @@ bool Instance::CreateDevice() {
}
custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
depth_clip_control = add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
depth_clip_enable = add_extension(VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME);
depth_clamp_control = add_extension(VK_EXT_DEPTH_CLAMP_CONTROL_EXTENSION_NAME);
vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME);
fragment_shader_barycentric = add_extension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME);
legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME);
provoking_vertex = add_extension(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME);
shader_stencil_export = add_extension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME);
amd_gcn_shader = add_extension(VK_AMD_GCN_SHADER_EXTENSION_NAME);
@ -361,9 +365,11 @@ bool Instance::CreateDevice() {
.dualSrcBlend = features.dualSrcBlend,
.logicOp = features.logicOp,
.multiDrawIndirect = features.multiDrawIndirect,
.depthClamp = features.depthClamp,
.depthBiasClamp = features.depthBiasClamp,
.fillModeNonSolid = features.fillModeNonSolid,
.depthBounds = features.depthBounds,
.wideLines = features.wideLines,
.multiViewport = features.multiViewport,
.samplerAnisotropy = features.samplerAnisotropy,
.vertexPipelineStoresAndAtomics = features.vertexPipelineStoresAndAtomics,
@ -417,6 +423,12 @@ bool Instance::CreateDevice() {
vk::PhysicalDeviceDepthClipControlFeaturesEXT{
.depthClipControl = true,
},
vk::PhysicalDeviceDepthClipEnableFeaturesEXT{
.depthClipEnable = true,
},
vk::PhysicalDeviceDepthClampControlFeaturesEXT{
.depthClampControl = true,
},
vk::PhysicalDeviceRobustness2FeaturesEXT{
.robustBufferAccess2 = robustness2_features.robustBufferAccess2,
.robustImageAccess2 = robustness2_features.robustImageAccess2,
@ -436,6 +448,12 @@ bool Instance::CreateDevice() {
vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{
.legacyVertexAttributes = true,
},
vk::PhysicalDeviceProvokingVertexFeaturesEXT{
.provokingVertexLast = true,
},
vk::PhysicalDeviceVertexAttributeDivisorFeatures{
.vertexAttributeInstanceRateDivisor = true,
},
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT{
.shaderBufferFloat32AtomicMinMax =
shader_atomic_float2_features.shaderBufferFloat32AtomicMinMax,
@ -483,6 +501,12 @@ bool Instance::CreateDevice() {
if (!depth_clip_control) {
device_chain.unlink<vk::PhysicalDeviceDepthClipControlFeaturesEXT>();
}
if (!depth_clip_enable) {
device_chain.unlink<vk::PhysicalDeviceDepthClipEnableFeaturesEXT>();
}
if (!depth_clamp_control) {
device_chain.unlink<vk::PhysicalDeviceDepthClampControlFeaturesEXT>();
}
if (!robustness2) {
device_chain.unlink<vk::PhysicalDeviceRobustness2FeaturesEXT>();
}
@ -498,6 +522,9 @@ bool Instance::CreateDevice() {
if (!legacy_vertex_attributes) {
device_chain.unlink<vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT>();
}
if (!provoking_vertex) {
device_chain.unlink<vk::PhysicalDeviceProvokingVertexFeaturesEXT>();
}
if (!shader_atomic_float2) {
device_chain.unlink<vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
}

View File

@ -109,6 +109,16 @@ public:
return depth_clip_control;
}
/// Returns true when VK_EXT_depth_clip_enable is supported
bool IsDepthClipEnableSupported() const {
return depth_clip_enable;
}
/// Returns true when VK_EXT_depth_clamp_control is supported
bool IsDepthClampControlSupported() const {
return depth_clamp_control;
}
/// Returns true when VK_EXT_depth_range_unrestricted is supported
bool IsDepthRangeUnrestrictedSupported() const {
return depth_range_unrestricted;
@ -150,6 +160,11 @@ public:
return legacy_vertex_attributes;
}
/// Returns true when VK_EXT_provoking_vertex is supported.
bool IsProvokingVertexSupported() const {
return provoking_vertex;
}
/// Returns true when VK_AMD_shader_image_load_store_lod is supported.
bool IsImageLoadStoreLodSupported() const {
return image_load_store_lod;
@ -324,11 +339,21 @@ public:
return properties.limits.maxViewportDimensions[0];
}
/// Returns the maximum viewport height.
/// Returns the maximum viewport height.
u32 GetMaxViewportHeight() const {
return properties.limits.maxViewportDimensions[1];
}
/// Returns the maximum render area width.
u32 GetMaxFramebufferWidth() const {
return properties.limits.maxFramebufferWidth;
}
/// Returns the maximum render area height.
u32 GetMaxFramebufferHeight() const {
return properties.limits.maxFramebufferHeight;
}
/// Returns the sample count flags supported by framebuffers.
vk::SampleCountFlags GetFramebufferSampleCounts() const {
return properties.limits.framebufferColorSampleCounts &
@ -341,6 +366,11 @@ public:
return driver_id != vk::DriverId::eMoltenvk;
}
/// Returns true if logic ops are supported by the device.
bool IsLogicOpSupported() const {
return features.logicOp;
}
/// Determines if a format is supported for a set of feature flags.
[[nodiscard]] bool IsFormatSupported(vk::Format format, vk::FormatFeatureFlags2 flags) const;
@ -389,12 +419,15 @@ private:
bool custom_border_color{};
bool fragment_shader_barycentric{};
bool depth_clip_control{};
bool depth_clip_enable{};
bool depth_clamp_control{};
bool depth_range_unrestricted{};
bool dynamic_state_3{};
bool vertex_input_dynamic_state{};
bool robustness2{};
bool list_restart{};
bool legacy_vertex_attributes{};
bool provoking_vertex{};
bool shader_stencil_export{};
bool image_load_store_lod{};
bool amd_gcn_shader{};

View File

@ -94,15 +94,10 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
switch (stage) {
case Stage::Local: {
BuildCommon(regs.ls_program);
if (regs.stage_enable.IsStageEnabled(static_cast<u32>(Stage::Hull))) {
info.ls_info.links_with_tcs = true;
Shader::TessellationDataConstantBuffer tess_constants;
const auto* pgm = regs.ProgramForStage(static_cast<u32>(Stage::Hull));
const auto params = Liverpool::GetParams(*pgm);
const auto& hull_info = program_cache.at(params.hash)->info;
hull_info.ReadTessConstantBuffer(tess_constants);
info.ls_info.ls_stride = tess_constants.ls_stride;
}
Shader::TessellationDataConstantBuffer tess_constants;
const auto* hull_info = infos[u32(Shader::LogicalStage::TessellationControl)];
hull_info->ReadTessConstantBuffer(tess_constants);
info.ls_info.ls_stride = tess_constants.ls_stride;
break;
}
case Stage::Hull: {
@ -122,6 +117,8 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
case Stage::Vertex: {
BuildCommon(regs.vs_program);
GatherVertexOutputs(info.vs_info, regs.vs_output_control);
info.vs_info.step_rate_0 = regs.vgt_instance_step_rate_0;
info.vs_info.step_rate_1 = regs.vgt_instance_step_rate_1;
info.vs_info.emulate_depth_negative_one_to_one =
!instance.IsDepthClipControlSupported() &&
regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW;
@ -288,26 +285,21 @@ bool PipelineCache::RefreshGraphicsKey() {
auto& regs = liverpool->regs;
auto& key = graphics_key;
const auto depth_format = instance.GetSupportedFormat(
LiverpoolToVK::DepthFormat(regs.depth_buffer.z_info.format,
regs.depth_buffer.stencil_info.format),
vk::FormatFeatureFlagBits2::eDepthStencilAttachment);
if (regs.depth_buffer.DepthValid()) {
key.depth_format = depth_format;
} else {
key.depth_format = vk::Format::eUndefined;
}
if (regs.depth_buffer.StencilValid()) {
key.stencil_format = depth_format;
} else {
key.stencil_format = vk::Format::eUndefined;
}
key.z_format = regs.depth_buffer.DepthValid() ? regs.depth_buffer.z_info.format.Value()
: Liverpool::DepthBuffer::ZFormat::Invalid;
key.stencil_format = regs.depth_buffer.StencilValid()
? regs.depth_buffer.stencil_info.format.Value()
: Liverpool::DepthBuffer::StencilFormat::Invalid;
key.depth_clip_enable = regs.clipper_control.ZclipEnable();
key.clip_space = regs.clipper_control.clip_space;
key.provoking_vtx_last = regs.polygon_control.provoking_vtx_last;
key.prim_type = regs.primitive_type;
key.polygon_mode = regs.polygon_control.PolyMode();
key.clip_space = regs.clipper_control.clip_space;
key.logic_op = regs.color_control.rop3;
key.num_samples = regs.NumSamples();
RefreshDepthClampRange();
const bool skip_cb_binding =
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable;
@ -460,10 +452,6 @@ bool PipelineCache::RefreshGraphicsKey() {
// Stride will still be handled outside the pipeline using dynamic state.
u32 vertex_binding = 0;
for (const auto& attrib : fetch_shader->attributes) {
if (attrib.UsesStepRates()) {
// Skip attribute binding as the data will be pulled by shader.
continue;
}
const auto& buffer = attrib.GetSharp(*vs_info);
ASSERT(vertex_binding < MaxVertexBufferCount);
key.vertex_buffer_formats[vertex_binding++] =
@ -498,7 +486,63 @@ bool PipelineCache::RefreshGraphicsKey() {
}
return true;
} // namespace Vulkan
}
void PipelineCache::RefreshDepthClampRange() {
auto& regs = liverpool->regs;
auto& key = graphics_key;
key.depth_clamp_enable = !regs.depth_render_override.disable_viewport_clamp;
if (key.z_format == Liverpool::DepthBuffer::ZFormat::Invalid || !key.depth_clamp_enable) {
return;
}
bool depth_clamp_can_use_viewport_range = true;
bool depth_clamp_is_same_on_all_viewports = true;
float zmin = std::numeric_limits<float>::max();
float zmax = std::numeric_limits<float>::max();
const auto& vp_ctl = regs.viewport_control;
for (u32 i = 0; i < Liverpool::NumViewports; i++) {
const auto& vp = regs.viewports[i];
const auto& vp_d = regs.viewport_depths[i];
if (vp.xscale == 0) {
continue;
}
const auto zoffset = vp_ctl.zoffset_enable ? vp.zoffset : 0.f;
const auto zscale = vp_ctl.zscale_enable ? vp.zscale : 1.f;
float min_depth;
float max_depth;
if (regs.clipper_control.clip_space == AmdGpu::Liverpool::ClipSpace::MinusWToW) {
min_depth = zoffset - zscale;
max_depth = zoffset + zscale;
} else {
min_depth = zoffset;
max_depth = zoffset + zscale;
}
if (zmin == std::numeric_limits<float>::max()) {
zmin = vp_d.zmin;
zmax = vp_d.zmax;
}
depth_clamp_is_same_on_all_viewports &= (zmin == vp_d.zmin && zmax == vp_d.zmax);
depth_clamp_can_use_viewport_range &= (min_depth == vp_d.zmin && max_depth == vp_d.zmax);
}
if (zmin == std::numeric_limits<float>::max()) {
return;
}
if (!depth_clamp_can_use_viewport_range && !depth_clamp_is_same_on_all_viewports) {
LOG_ERROR(Render_Vulkan,
"Viewport depth clamping configuration cannot be accurately emulated");
}
key.depth_clamp_user_defined_range = !depth_clamp_can_use_viewport_range;
if (key.depth_clamp_user_defined_range) {
key.min_depth_clamp = zmin;
key.max_depth_clamp = zmax;
}
}
bool PipelineCache::RefreshComputeKey() {
Shader::Backend::Bindings binding{};

View File

@ -76,6 +76,8 @@ private:
bool RefreshGraphicsKey();
bool RefreshComputeKey();
void RefreshDepthClampRange();
void DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage, size_t perm_idx,
std::string_view ext);
std::optional<std::vector<u32>> GetShaderPatch(u64 hash, Shader::Stage stage, size_t perm_idx,

View File

@ -20,12 +20,9 @@
namespace Vulkan {
static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) {
Shader::PushData push_data{};
push_data.step0 = regs.vgt_instance_step_rate_0;
push_data.step1 = regs.vgt_instance_step_rate_1;
// TODO(roamic): Add support for multiple viewports and geometry shaders when ViewportIndex
// is encountered and implemented in the recompiler.
Shader::PushData push_data{};
push_data.xoffset = regs.viewport_control.xoffset_enable ? regs.viewports[0].xoffset : 0.f;
push_data.xscale = regs.viewport_control.xscale_enable ? regs.viewports[0].xscale : 1.f;
push_data.yoffset = regs.viewport_control.yoffset_enable ? regs.viewports[0].yoffset : 0.f;
@ -113,6 +110,8 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) {
// Prefetch color and depth buffers to let texture cache handle possible overlaps with bound
// textures (e.g. mipgen)
RenderState state;
state.width = instance.GetMaxFramebufferWidth();
state.height = instance.GetMaxFramebufferHeight();
cb_descs.clear();
db_desc.reset();
@ -1015,9 +1014,10 @@ void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) const {
UpdateViewportScissorState();
UpdateDepthStencilState();
UpdatePrimitiveState();
UpdateRasterizationState();
auto& dynamic_state = scheduler.GetDynamicState();
dynamic_state.SetBlendConstants(&liverpool->regs.blend_constants.red);
dynamic_state.SetBlendConstants(liverpool->regs.blend_constants);
dynamic_state.SetColorWriteMasks(pipeline.GetWriteMasks());
// Commit new dynamic state to the command buffer.
@ -1087,12 +1087,6 @@ void Rasterizer::UpdateViewportScissorState() const {
viewport.maxDepth = zoffset + zscale;
}
if (!regs.depth_render_override.disable_viewport_clamp) {
// Apply depth clamp.
viewport.minDepth = std::max(viewport.minDepth, vp_d.zmin);
viewport.maxDepth = std::min(viewport.maxDepth, vp_d.zmax);
}
if (!instance.IsDepthRangeUnrestrictedSupported()) {
// Unrestricted depth range not supported by device. Restrict to valid range.
viewport.minDepth = std::max(viewport.minDepth, 0.f);
@ -1232,10 +1226,17 @@ void Rasterizer::UpdatePrimitiveState() const {
const auto front_face = LiverpoolToVK::FrontFace(regs.polygon_control.front_face);
dynamic_state.SetPrimitiveRestartEnabled(prim_restart);
dynamic_state.SetRasterizerDiscardEnabled(regs.clipper_control.dx_rasterization_kill);
dynamic_state.SetCullMode(cull_mode);
dynamic_state.SetFrontFace(front_face);
}
void Rasterizer::UpdateRasterizationState() const {
const auto& regs = liverpool->regs;
auto& dynamic_state = scheduler.GetDynamicState();
dynamic_state.SetLineWidth(regs.line_control.Width());
}
void Rasterizer::ScopeMarkerBegin(const std::string_view& str, bool from_guest) {
if ((from_guest && !Config::getVkGuestMarkersEnabled()) ||
(!from_guest && !Config::getVkHostMarkersEnabled())) {

View File

@ -94,6 +94,7 @@ private:
void UpdateViewportScissorState() const;
void UpdateDepthStencilState() const;
void UpdatePrimitiveState() const;
void UpdateRasterizationState() const;
bool FilterDraw();

View File

@ -34,16 +34,11 @@ void Scheduler::BeginRendering(const RenderState& new_state) {
is_rendering = true;
render_state = new_state;
const auto width =
render_state.width != std::numeric_limits<u32>::max() ? render_state.width : 1;
const auto height =
render_state.height != std::numeric_limits<u32>::max() ? render_state.height : 1;
const vk::RenderingInfo rendering_info = {
.renderArea =
{
.offset = {0, 0},
.extent = {width, height},
.extent = {render_state.width, render_state.height},
},
.layerCount = 1,
.colorAttachmentCount = render_state.num_color_attachments,
@ -313,6 +308,10 @@ void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmd
cmdbuf.setPrimitiveRestartEnable(primitive_restart_enable);
}
}
if (dirty_state.rasterizer_discard_enable) {
dirty_state.rasterizer_discard_enable = false;
cmdbuf.setRasterizerDiscardEnable(rasterizer_discard_enable);
}
if (dirty_state.cull_mode) {
dirty_state.cull_mode = false;
cmdbuf.setCullMode(cull_mode);
@ -323,7 +322,7 @@ void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmd
}
if (dirty_state.blend_constants) {
dirty_state.blend_constants = false;
cmdbuf.setBlendConstants(blend_constants);
cmdbuf.setBlendConstants(blend_constants.data());
}
if (dirty_state.color_write_masks) {
dirty_state.color_write_masks = false;
@ -331,6 +330,10 @@ void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmd
cmdbuf.setColorWriteMaskEXT(0, color_write_masks);
}
}
if (dirty_state.line_width) {
dirty_state.line_width = false;
cmdbuf.setLineWidth(line_width);
}
}
} // namespace Vulkan

View File

@ -26,8 +26,8 @@ struct RenderState {
u32 num_color_attachments{};
bool has_depth{};
bool has_stencil{};
u32 width = std::numeric_limits<u32>::max();
u32 height = std::numeric_limits<u32>::max();
u32 width{};
u32 height{};
bool operator==(const RenderState& other) const noexcept {
return std::memcmp(this, &other, sizeof(RenderState)) == 0;
@ -96,11 +96,13 @@ struct DynamicState {
bool stencil_back_compare_mask : 1;
bool primitive_restart_enable : 1;
bool rasterizer_discard_enable : 1;
bool cull_mode : 1;
bool front_face : 1;
bool blend_constants : 1;
bool color_write_masks : 1;
bool line_width : 1;
} dirty_state{};
Viewports viewports{};
@ -130,11 +132,13 @@ struct DynamicState {
u32 stencil_back_compare_mask{};
bool primitive_restart_enable{};
bool rasterizer_discard_enable{};
vk::CullModeFlags cull_mode{};
vk::FrontFace front_face{};
float blend_constants[4]{};
std::array<float, 4> blend_constants{};
ColorWriteMasks color_write_masks{};
float line_width{};
/// Commits the dynamic state to the provided command buffer.
void Commit(const Instance& instance, const vk::CommandBuffer& cmdbuf);
@ -283,19 +287,33 @@ struct DynamicState {
}
}
void SetBlendConstants(const float blend_constants_[4]) {
if (!std::equal(blend_constants, std::end(blend_constants), blend_constants_)) {
std::memcpy(blend_constants, blend_constants_, sizeof(blend_constants));
void SetBlendConstants(const std::array<float, 4> blend_constants_) {
if (blend_constants != blend_constants_) {
blend_constants = blend_constants_;
dirty_state.blend_constants = true;
}
}
void SetRasterizerDiscardEnabled(const bool enabled) {
if (rasterizer_discard_enable != enabled) {
rasterizer_discard_enable = enabled;
dirty_state.rasterizer_discard_enable = true;
}
}
void SetColorWriteMasks(const ColorWriteMasks& color_write_masks_) {
if (!std::ranges::equal(color_write_masks, color_write_masks_)) {
color_write_masks = color_write_masks_;
dirty_state.color_write_masks = true;
}
}
void SetLineWidth(const float width) {
if (line_width != width) {
line_width = width;
dirty_state.line_width = true;
}
}
};
class Scheduler {