mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-22 18:15:14 +00:00
shader_recompiler: Replace buffer pulling with attribute divisor for instance step rates
This commit is contained in:
parent
b403e1be33
commit
c0878db548
@ -120,6 +120,9 @@ std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr
|
|||||||
}
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
using PointerType = EmitContext::PointerType;
|
||||||
|
using PointerSize = EmitContext::PointerSize;
|
||||||
|
|
||||||
Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) {
|
Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) {
|
||||||
const u32 index = ctx.binding.user_data + ctx.info.ud_mask.Index(reg);
|
const u32 index = ctx.binding.user_data + ctx.info.ud_mask.Index(reg);
|
||||||
const u32 half = PushData::UdRegsIndex + (index >> 2);
|
const u32 half = PushData::UdRegsIndex + (index >> 2);
|
||||||
@ -131,41 +134,6 @@ Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) {
|
|||||||
return ud_reg;
|
return ud_reg;
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitGetThreadBitScalarReg(EmitContext& ctx) {
|
|
||||||
UNREACHABLE_MSG("Unreachable instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitSetThreadBitScalarReg(EmitContext& ctx) {
|
|
||||||
UNREACHABLE_MSG("Unreachable instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGetScalarRegister(EmitContext&) {
|
|
||||||
UNREACHABLE_MSG("Unreachable instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitSetScalarRegister(EmitContext&) {
|
|
||||||
UNREACHABLE_MSG("Unreachable instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGetVectorRegister(EmitContext& ctx) {
|
|
||||||
UNREACHABLE_MSG("Unreachable instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitSetVectorRegister(EmitContext& ctx) {
|
|
||||||
UNREACHABLE_MSG("Unreachable instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitSetGotoVariable(EmitContext&) {
|
|
||||||
UNREACHABLE_MSG("Unreachable instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGetGotoVariable(EmitContext&) {
|
|
||||||
UNREACHABLE_MSG("Unreachable instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
using PointerType = EmitContext::PointerType;
|
|
||||||
using PointerSize = EmitContext::PointerSize;
|
|
||||||
|
|
||||||
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) {
|
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) {
|
||||||
const u32 flatbuf_off_dw = inst->Flags<u32>();
|
const u32 flatbuf_off_dw = inst->Flags<u32>();
|
||||||
if (!Config::directMemoryAccess()) {
|
if (!Config::directMemoryAccess()) {
|
||||||
@ -201,18 +169,12 @@ Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
|
|||||||
return ReadConstBuffer<PointerType::U32>(ctx, handle, index);
|
return ReadConstBuffer<PointerType::U32>(ctx, handle, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
|
static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
|
||||||
const auto index{rate_idx == 0 ? PushData::Step0Index : PushData::Step1Index};
|
|
||||||
return ctx.OpLoad(
|
|
||||||
ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
|
|
||||||
ctx.push_data_block, ctx.ConstU32(index)));
|
|
||||||
}
|
|
||||||
|
|
||||||
static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
|
|
||||||
if (IR::IsPosition(attr)) {
|
if (IR::IsPosition(attr)) {
|
||||||
ASSERT(attr == IR::Attribute::Position0);
|
ASSERT(attr == IR::Attribute::Position0);
|
||||||
const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
||||||
const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, index, ctx.ConstU32(0u))};
|
const auto pointer{
|
||||||
|
ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index), ctx.ConstU32(0u))};
|
||||||
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
||||||
return ctx.OpLoad(ctx.F32[1],
|
return ctx.OpLoad(ctx.F32[1],
|
||||||
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
|
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
|
||||||
@ -222,7 +184,7 @@ static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32
|
|||||||
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
|
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
|
||||||
const auto param = ctx.input_params.at(param_id).id;
|
const auto param = ctx.input_params.at(param_id).id;
|
||||||
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
||||||
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)};
|
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))};
|
||||||
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
||||||
return ctx.OpLoad(ctx.F32[1],
|
return ctx.OpLoad(ctx.F32[1],
|
||||||
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
|
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
|
||||||
@ -230,7 +192,7 @@ static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32
|
|||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
|
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
|
||||||
if (ctx.info.l_stage == LogicalStage::Geometry) {
|
if (ctx.info.l_stage == LogicalStage::Geometry) {
|
||||||
return EmitGetAttributeForGeometry(ctx, attr, comp, index);
|
return EmitGetAttributeForGeometry(ctx, attr, comp, index);
|
||||||
} else if (ctx.info.l_stage == LogicalStage::TessellationControl ||
|
} else if (ctx.info.l_stage == LogicalStage::TessellationControl ||
|
||||||
@ -248,18 +210,6 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
|
|||||||
if (IR::IsParam(attr)) {
|
if (IR::IsParam(attr)) {
|
||||||
const u32 param_index{u32(attr) - u32(IR::Attribute::Param0)};
|
const u32 param_index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||||
const auto& param{ctx.input_params.at(param_index)};
|
const auto& param{ctx.input_params.at(param_index)};
|
||||||
if (param.buffer_handle >= 0) {
|
|
||||||
const auto step_rate = EmitReadStepRate(ctx, param.id.value);
|
|
||||||
const auto offset = ctx.OpIAdd(
|
|
||||||
ctx.U32[1],
|
|
||||||
ctx.OpIMul(
|
|
||||||
ctx.U32[1],
|
|
||||||
ctx.OpUDiv(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id), step_rate),
|
|
||||||
ctx.ConstU32(param.num_components)),
|
|
||||||
ctx.ConstU32(comp));
|
|
||||||
return ReadConstBuffer<PointerType::F32>(ctx, param.buffer_handle, offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
Id result;
|
Id result;
|
||||||
if (param.is_loaded) {
|
if (param.is_loaded) {
|
||||||
// Attribute is either default or manually interpolated. The id points to an already
|
// Attribute is either default or manually interpolated. The id points to an already
|
||||||
@ -305,10 +255,6 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
|||||||
return ctx.OpLoad(ctx.U32[1], ctx.vertex_index);
|
return ctx.OpLoad(ctx.U32[1], ctx.vertex_index);
|
||||||
case IR::Attribute::InstanceId:
|
case IR::Attribute::InstanceId:
|
||||||
return ctx.OpLoad(ctx.U32[1], ctx.instance_id);
|
return ctx.OpLoad(ctx.U32[1], ctx.instance_id);
|
||||||
case IR::Attribute::InstanceId0:
|
|
||||||
return EmitReadStepRate(ctx, 0);
|
|
||||||
case IR::Attribute::InstanceId1:
|
|
||||||
return EmitReadStepRate(ctx, 1);
|
|
||||||
case IR::Attribute::WorkgroupIndex:
|
case IR::Attribute::WorkgroupIndex:
|
||||||
return ctx.workgroup_index_id;
|
return ctx.workgroup_index_id;
|
||||||
case IR::Attribute::WorkgroupId:
|
case IR::Attribute::WorkgroupId:
|
||||||
@ -640,4 +586,36 @@ void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a
|
|||||||
UNREACHABLE_MSG("SPIR-V instruction");
|
UNREACHABLE_MSG("SPIR-V instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitGetThreadBitScalarReg(EmitContext& ctx) {
|
||||||
|
UNREACHABLE_MSG("Unreachable instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitSetThreadBitScalarReg(EmitContext& ctx) {
|
||||||
|
UNREACHABLE_MSG("Unreachable instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGetScalarRegister(EmitContext&) {
|
||||||
|
UNREACHABLE_MSG("Unreachable instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitSetScalarRegister(EmitContext&) {
|
||||||
|
UNREACHABLE_MSG("Unreachable instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGetVectorRegister(EmitContext& ctx) {
|
||||||
|
UNREACHABLE_MSG("Unreachable instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitSetVectorRegister(EmitContext& ctx) {
|
||||||
|
UNREACHABLE_MSG("Unreachable instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitSetGotoVariable(EmitContext&) {
|
||||||
|
UNREACHABLE_MSG("Unreachable instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGetGotoVariable(EmitContext&) {
|
||||||
|
UNREACHABLE_MSG("Unreachable instruction");
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Shader::Backend::SPIRV
|
} // namespace Shader::Backend::SPIRV
|
||||||
|
@ -108,7 +108,7 @@ Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
|
|||||||
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||||
Id EmitBufferAtomicCmpSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
Id EmitBufferAtomicCmpSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||||
Id cmp_value);
|
Id cmp_value);
|
||||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index);
|
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index);
|
||||||
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
||||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
|
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
|
||||||
Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index);
|
Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index);
|
||||||
|
@ -377,35 +377,13 @@ void EmitContext::DefineInputs() {
|
|||||||
ASSERT(attrib.semantic < IR::NumParams);
|
ASSERT(attrib.semantic < IR::NumParams);
|
||||||
const auto sharp = attrib.GetSharp(info);
|
const auto sharp = attrib.GetSharp(info);
|
||||||
const Id type{GetAttributeType(*this, sharp.GetNumberFmt())[4]};
|
const Id type{GetAttributeType(*this, sharp.GetNumberFmt())[4]};
|
||||||
if (attrib.UsesStepRates()) {
|
Id id{DefineInput(type, attrib.semantic)};
|
||||||
const u32 rate_idx =
|
if (attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::Plain) {
|
||||||
attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::OverStepRate0 ? 0
|
Name(id, fmt::format("vs_instance_attr{}", attrib.semantic));
|
||||||
: 1;
|
|
||||||
const u32 num_components = AmdGpu::NumComponents(sharp.GetDataFmt());
|
|
||||||
const auto buffer =
|
|
||||||
std::ranges::find_if(info.buffers, [&attrib](const auto& buffer) {
|
|
||||||
return buffer.instance_attrib == attrib.semantic;
|
|
||||||
});
|
|
||||||
// Note that we pass index rather than Id
|
|
||||||
input_params[attrib.semantic] = SpirvAttribute{
|
|
||||||
.id = {rate_idx},
|
|
||||||
.pointer_type = input_u32,
|
|
||||||
.component_type = U32[1],
|
|
||||||
.num_components = std::min<u16>(attrib.num_elements, num_components),
|
|
||||||
.is_integer = true,
|
|
||||||
.is_loaded = false,
|
|
||||||
.buffer_handle = int(buffer - info.buffers.begin()),
|
|
||||||
};
|
|
||||||
} else {
|
} else {
|
||||||
Id id{DefineInput(type, attrib.semantic)};
|
Name(id, fmt::format("vs_in_attr{}", attrib.semantic));
|
||||||
if (attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::Plain) {
|
|
||||||
Name(id, fmt::format("vs_instance_attr{}", attrib.semantic));
|
|
||||||
} else {
|
|
||||||
Name(id, fmt::format("vs_in_attr{}", attrib.semantic));
|
|
||||||
}
|
|
||||||
input_params[attrib.semantic] =
|
|
||||||
GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false);
|
|
||||||
}
|
}
|
||||||
|
input_params[attrib.semantic] = GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -700,12 +678,10 @@ void EmitContext::DefineOutputs() {
|
|||||||
|
|
||||||
void EmitContext::DefinePushDataBlock() {
|
void EmitContext::DefinePushDataBlock() {
|
||||||
// Create push constants block for instance steps rates
|
// Create push constants block for instance steps rates
|
||||||
const Id struct_type{Name(TypeStruct(U32[1], U32[1], F32[1], F32[1], F32[1], F32[1], U32[4],
|
const Id struct_type{Name(TypeStruct(F32[1], F32[1], F32[1], F32[1], U32[4], U32[4], U32[4],
|
||||||
U32[4], U32[4], U32[4], U32[4], U32[4], U32[2]),
|
U32[4], U32[4], U32[4], U32[2]),
|
||||||
"AuxData")};
|
"AuxData")};
|
||||||
Decorate(struct_type, spv::Decoration::Block);
|
Decorate(struct_type, spv::Decoration::Block);
|
||||||
MemberName(struct_type, PushData::Step0Index, "sr0");
|
|
||||||
MemberName(struct_type, PushData::Step1Index, "sr1");
|
|
||||||
MemberName(struct_type, PushData::XOffsetIndex, "xoffset");
|
MemberName(struct_type, PushData::XOffsetIndex, "xoffset");
|
||||||
MemberName(struct_type, PushData::YOffsetIndex, "yoffset");
|
MemberName(struct_type, PushData::YOffsetIndex, "yoffset");
|
||||||
MemberName(struct_type, PushData::XScaleIndex, "xscale");
|
MemberName(struct_type, PushData::XScaleIndex, "xscale");
|
||||||
@ -717,19 +693,17 @@ void EmitContext::DefinePushDataBlock() {
|
|||||||
MemberName(struct_type, PushData::BufOffsetIndex + 0, "buf_offsets0");
|
MemberName(struct_type, PushData::BufOffsetIndex + 0, "buf_offsets0");
|
||||||
MemberName(struct_type, PushData::BufOffsetIndex + 1, "buf_offsets1");
|
MemberName(struct_type, PushData::BufOffsetIndex + 1, "buf_offsets1");
|
||||||
MemberName(struct_type, PushData::BufOffsetIndex + 2, "buf_offsets2");
|
MemberName(struct_type, PushData::BufOffsetIndex + 2, "buf_offsets2");
|
||||||
MemberDecorate(struct_type, PushData::Step0Index, spv::Decoration::Offset, 0U);
|
MemberDecorate(struct_type, PushData::XOffsetIndex, spv::Decoration::Offset, 0U);
|
||||||
MemberDecorate(struct_type, PushData::Step1Index, spv::Decoration::Offset, 4U);
|
MemberDecorate(struct_type, PushData::YOffsetIndex, spv::Decoration::Offset, 4U);
|
||||||
MemberDecorate(struct_type, PushData::XOffsetIndex, spv::Decoration::Offset, 8U);
|
MemberDecorate(struct_type, PushData::XScaleIndex, spv::Decoration::Offset, 8U);
|
||||||
MemberDecorate(struct_type, PushData::YOffsetIndex, spv::Decoration::Offset, 12U);
|
MemberDecorate(struct_type, PushData::YScaleIndex, spv::Decoration::Offset, 12U);
|
||||||
MemberDecorate(struct_type, PushData::XScaleIndex, spv::Decoration::Offset, 16U);
|
MemberDecorate(struct_type, PushData::UdRegsIndex + 0, spv::Decoration::Offset, 16U);
|
||||||
MemberDecorate(struct_type, PushData::YScaleIndex, spv::Decoration::Offset, 20U);
|
MemberDecorate(struct_type, PushData::UdRegsIndex + 1, spv::Decoration::Offset, 32U);
|
||||||
MemberDecorate(struct_type, PushData::UdRegsIndex + 0, spv::Decoration::Offset, 24U);
|
MemberDecorate(struct_type, PushData::UdRegsIndex + 2, spv::Decoration::Offset, 48U);
|
||||||
MemberDecorate(struct_type, PushData::UdRegsIndex + 1, spv::Decoration::Offset, 40U);
|
MemberDecorate(struct_type, PushData::UdRegsIndex + 3, spv::Decoration::Offset, 64U);
|
||||||
MemberDecorate(struct_type, PushData::UdRegsIndex + 2, spv::Decoration::Offset, 56U);
|
MemberDecorate(struct_type, PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 80U);
|
||||||
MemberDecorate(struct_type, PushData::UdRegsIndex + 3, spv::Decoration::Offset, 72U);
|
MemberDecorate(struct_type, PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 96U);
|
||||||
MemberDecorate(struct_type, PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 88U);
|
MemberDecorate(struct_type, PushData::BufOffsetIndex + 2, spv::Decoration::Offset, 112U);
|
||||||
MemberDecorate(struct_type, PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 104U);
|
|
||||||
MemberDecorate(struct_type, PushData::BufOffsetIndex + 2, spv::Decoration::Offset, 120U);
|
|
||||||
push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant);
|
push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant);
|
||||||
Name(push_data_block, "push_data");
|
Name(push_data_block, "push_data");
|
||||||
interfaces.push_back(push_data_block);
|
interfaces.push_back(push_data_block);
|
||||||
|
@ -361,7 +361,6 @@ public:
|
|||||||
u32 num_components;
|
u32 num_components;
|
||||||
bool is_integer{};
|
bool is_integer{};
|
||||||
bool is_loaded{};
|
bool is_loaded{};
|
||||||
s32 buffer_handle{-1};
|
|
||||||
};
|
};
|
||||||
Id input_attr_array;
|
Id input_attr_array;
|
||||||
Id output_attr_array;
|
Id output_attr_array;
|
||||||
|
@ -3,7 +3,6 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <ranges>
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "shader_recompiler/info.h"
|
#include "shader_recompiler/info.h"
|
||||||
@ -29,11 +28,6 @@ struct VertexAttribute {
|
|||||||
return static_cast<InstanceIdType>(instance_data);
|
return static_cast<InstanceIdType>(instance_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] bool UsesStepRates() const {
|
|
||||||
const auto step_rate = GetStepRate();
|
|
||||||
return step_rate == OverStepRate0 || step_rate == OverStepRate1;
|
|
||||||
}
|
|
||||||
|
|
||||||
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Shader::Info& info) const noexcept {
|
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Shader::Info& info) const noexcept {
|
||||||
return info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
return info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
||||||
}
|
}
|
||||||
@ -52,12 +46,6 @@ struct FetchShaderData {
|
|||||||
s8 vertex_offset_sgpr = -1; ///< SGPR of vertex offset from VADDR
|
s8 vertex_offset_sgpr = -1; ///< SGPR of vertex offset from VADDR
|
||||||
s8 instance_offset_sgpr = -1; ///< SGPR of instance offset from VADDR
|
s8 instance_offset_sgpr = -1; ///< SGPR of instance offset from VADDR
|
||||||
|
|
||||||
[[nodiscard]] bool UsesStepRates() const {
|
|
||||||
return std::ranges::find_if(attributes, [](const VertexAttribute& attribute) {
|
|
||||||
return attribute.UsesStepRates();
|
|
||||||
}) != attributes.end();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool operator==(const FetchShaderData& other) const {
|
bool operator==(const FetchShaderData& other) const {
|
||||||
return attributes == other.attributes && vertex_offset_sgpr == other.vertex_offset_sgpr &&
|
return attributes == other.attributes && vertex_offset_sgpr == other.vertex_offset_sgpr &&
|
||||||
instance_offset_sgpr == other.instance_offset_sgpr;
|
instance_offset_sgpr == other.instance_offset_sgpr;
|
||||||
|
@ -481,11 +481,11 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Translator::EmitFetch(const GcnInst& inst) {
|
void Translator::EmitFetch(const GcnInst& inst) {
|
||||||
// Read the pointer to the fetch shader assembly.
|
|
||||||
const auto code_sgpr_base = inst.src[0].code;
|
const auto code_sgpr_base = inst.src[0].code;
|
||||||
|
|
||||||
|
// The fetch shader must be inlined to access as regular buffers, so that
|
||||||
|
// bounds checks can be emitted to emulate robust buffer access.
|
||||||
if (!profile.supports_robust_buffer_access) {
|
if (!profile.supports_robust_buffer_access) {
|
||||||
// The fetch shader must be inlined to access as regular buffers, so that
|
|
||||||
// bounds checks can be emitted to emulate robust buffer access.
|
|
||||||
const auto* code = GetFetchShaderCode(info, code_sgpr_base);
|
const auto* code = GetFetchShaderCode(info, code_sgpr_base);
|
||||||
GcnCodeSlice slice(code, code + std::numeric_limits<u32>::max());
|
GcnCodeSlice slice(code, code + std::numeric_limits<u32>::max());
|
||||||
GcnDecodeContext decoder;
|
GcnDecodeContext decoder;
|
||||||
@ -535,16 +535,6 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||||||
for (u32 i = 0; i < 4; i++) {
|
for (u32 i = 0; i < 4; i++) {
|
||||||
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(swizzled, i)});
|
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(swizzled, i)});
|
||||||
}
|
}
|
||||||
|
|
||||||
// In case of programmable step rates we need to fallback to instance data pulling in
|
|
||||||
// shader, so VBs should be bound as regular data buffers
|
|
||||||
if (attrib.UsesStepRates()) {
|
|
||||||
info.buffers.push_back({
|
|
||||||
.sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
|
|
||||||
.used_types = IR::Type::F32,
|
|
||||||
.instance_attrib = attrib.semantic,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -113,17 +113,13 @@ struct FMaskResource {
|
|||||||
using FMaskResourceList = boost::container::small_vector<FMaskResource, NumFMasks>;
|
using FMaskResourceList = boost::container::small_vector<FMaskResource, NumFMasks>;
|
||||||
|
|
||||||
struct PushData {
|
struct PushData {
|
||||||
static constexpr u32 Step0Index = 0;
|
static constexpr u32 XOffsetIndex = 0;
|
||||||
static constexpr u32 Step1Index = 1;
|
static constexpr u32 YOffsetIndex = 1;
|
||||||
static constexpr u32 XOffsetIndex = 2;
|
static constexpr u32 XScaleIndex = 2;
|
||||||
static constexpr u32 YOffsetIndex = 3;
|
static constexpr u32 YScaleIndex = 3;
|
||||||
static constexpr u32 XScaleIndex = 4;
|
static constexpr u32 UdRegsIndex = 4;
|
||||||
static constexpr u32 YScaleIndex = 5;
|
|
||||||
static constexpr u32 UdRegsIndex = 6;
|
|
||||||
static constexpr u32 BufOffsetIndex = UdRegsIndex + NumUserDataRegs / 4;
|
static constexpr u32 BufOffsetIndex = UdRegsIndex + NumUserDataRegs / 4;
|
||||||
|
|
||||||
u32 step0;
|
|
||||||
u32 step1;
|
|
||||||
float xoffset;
|
float xoffset;
|
||||||
float yoffset;
|
float yoffset;
|
||||||
float xscale;
|
float xscale;
|
||||||
|
@ -255,8 +255,8 @@ void IREmitter::SetM0(const U32& value) {
|
|||||||
Inst(Opcode::SetM0, value);
|
Inst(Opcode::SetM0, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, IR::Value index) {
|
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, u32 index) {
|
||||||
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), index);
|
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), Imm32(index));
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) {
|
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) {
|
||||||
|
@ -81,8 +81,7 @@ public:
|
|||||||
|
|
||||||
[[nodiscard]] U1 Condition(IR::Condition cond);
|
[[nodiscard]] U1 Condition(IR::Condition cond);
|
||||||
|
|
||||||
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0,
|
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, u32 index = 0);
|
||||||
IR::Value index = IR::Value(u32(0u)));
|
|
||||||
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
|
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
|
||||||
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
|
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
|
||||||
|
|
||||||
|
@ -116,7 +116,7 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
|||||||
}
|
}
|
||||||
|
|
||||||
const auto shl_inst = inst.Arg(1).TryInstRecursive();
|
const auto shl_inst = inst.Arg(1).TryInstRecursive();
|
||||||
const auto vertex_id = ir.Imm32(shl_inst->Arg(0).Resolve().U32() >> 2);
|
const auto vertex_id = shl_inst->Arg(0).Resolve().U32() >> 2;
|
||||||
const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1);
|
const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1);
|
||||||
const auto bucket = offset.Resolve().U32() / 256u;
|
const auto bucket = offset.Resolve().U32() / 256u;
|
||||||
const auto attrib = bucket < 4 ? IR::Attribute::Position0
|
const auto attrib = bucket < 4 ? IR::Attribute::Position0
|
||||||
|
@ -85,6 +85,8 @@ struct VertexRuntimeInfo {
|
|||||||
std::array<VsOutputMap, 3> outputs;
|
std::array<VsOutputMap, 3> outputs;
|
||||||
bool emulate_depth_negative_one_to_one{};
|
bool emulate_depth_negative_one_to_one{};
|
||||||
bool clip_disable{};
|
bool clip_disable{};
|
||||||
|
u32 step_rate_0;
|
||||||
|
u32 step_rate_1;
|
||||||
// Domain
|
// Domain
|
||||||
AmdGpu::TessellationType tess_type;
|
AmdGpu::TessellationType tess_type;
|
||||||
AmdGpu::TessellationTopology tess_topology;
|
AmdGpu::TessellationTopology tess_topology;
|
||||||
|
@ -14,6 +14,7 @@ namespace Shader {
|
|||||||
|
|
||||||
struct VsAttribSpecialization {
|
struct VsAttribSpecialization {
|
||||||
s32 num_components{};
|
s32 num_components{};
|
||||||
|
u32 divisor{};
|
||||||
AmdGpu::NumberClass num_class{};
|
AmdGpu::NumberClass num_class{};
|
||||||
AmdGpu::CompMapping dst_select{};
|
AmdGpu::CompMapping dst_select{};
|
||||||
|
|
||||||
@ -74,13 +75,13 @@ struct SamplerSpecialization {
|
|||||||
* after the first compilation of a module.
|
* after the first compilation of a module.
|
||||||
*/
|
*/
|
||||||
struct StageSpecialization {
|
struct StageSpecialization {
|
||||||
static constexpr size_t MaxStageResources = 64;
|
static constexpr size_t MaxStageResources = 128;
|
||||||
|
|
||||||
const Shader::Info* info;
|
const Shader::Info* info;
|
||||||
RuntimeInfo runtime_info;
|
RuntimeInfo runtime_info;
|
||||||
|
std::bitset<MaxStageResources> bitset{};
|
||||||
std::optional<Gcn::FetchShaderData> fetch_shader_data{};
|
std::optional<Gcn::FetchShaderData> fetch_shader_data{};
|
||||||
boost::container::small_vector<VsAttribSpecialization, 32> vs_attribs;
|
boost::container::small_vector<VsAttribSpecialization, 32> vs_attribs;
|
||||||
std::bitset<MaxStageResources> bitset{};
|
|
||||||
boost::container::small_vector<BufferSpecialization, 16> buffers;
|
boost::container::small_vector<BufferSpecialization, 16> buffers;
|
||||||
boost::container::small_vector<ImageSpecialization, 16> images;
|
boost::container::small_vector<ImageSpecialization, 16> images;
|
||||||
boost::container::small_vector<FMaskSpecialization, 8> fmasks;
|
boost::container::small_vector<FMaskSpecialization, 8> fmasks;
|
||||||
@ -94,10 +95,17 @@ struct StageSpecialization {
|
|||||||
if (info_.stage == Stage::Vertex && fetch_shader_data) {
|
if (info_.stage == Stage::Vertex && fetch_shader_data) {
|
||||||
// Specialize shader on VS input number types to follow spec.
|
// Specialize shader on VS input number types to follow spec.
|
||||||
ForEachSharp(vs_attribs, fetch_shader_data->attributes,
|
ForEachSharp(vs_attribs, fetch_shader_data->attributes,
|
||||||
[&profile_](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
[&profile_, this](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||||
spec.num_components = desc.UsesStepRates()
|
spec.num_components = AmdGpu::NumComponents(sharp.GetDataFmt());
|
||||||
? AmdGpu::NumComponents(sharp.GetDataFmt())
|
using InstanceIdType = Shader::Gcn::VertexAttribute::InstanceIdType;
|
||||||
: 0;
|
if (const auto step_rate = desc.GetStepRate();
|
||||||
|
step_rate != InstanceIdType::None) {
|
||||||
|
spec.divisor = step_rate == InstanceIdType::OverStepRate0
|
||||||
|
? runtime_info.vs_info.step_rate_0
|
||||||
|
: (step_rate == InstanceIdType::OverStepRate1
|
||||||
|
? runtime_info.vs_info.step_rate_1
|
||||||
|
: 1);
|
||||||
|
}
|
||||||
spec.num_class = profile_.support_legacy_vertex_attributes
|
spec.num_class = profile_.support_legacy_vertex_attributes
|
||||||
? AmdGpu::NumberClass{}
|
? AmdGpu::NumberClass{}
|
||||||
: AmdGpu::GetNumberClass(sharp.GetNumberFmt());
|
: AmdGpu::GetNumberClass(sharp.GetNumberFmt());
|
||||||
|
@ -198,10 +198,13 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
|
|||||||
}
|
}
|
||||||
|
|
||||||
void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) {
|
void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) {
|
||||||
|
const auto& regs = liverpool->regs;
|
||||||
Vulkan::VertexInputs<vk::VertexInputAttributeDescription2EXT> attributes;
|
Vulkan::VertexInputs<vk::VertexInputAttributeDescription2EXT> attributes;
|
||||||
Vulkan::VertexInputs<vk::VertexInputBindingDescription2EXT> bindings;
|
Vulkan::VertexInputs<vk::VertexInputBindingDescription2EXT> bindings;
|
||||||
|
Vulkan::VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT> divisors;
|
||||||
Vulkan::VertexInputs<AmdGpu::Buffer> guest_buffers;
|
Vulkan::VertexInputs<AmdGpu::Buffer> guest_buffers;
|
||||||
pipeline.GetVertexInputs(attributes, bindings, guest_buffers);
|
pipeline.GetVertexInputs(attributes, bindings, divisors, guest_buffers,
|
||||||
|
regs.vgt_instance_step_rate_0, regs.vgt_instance_step_rate_1);
|
||||||
|
|
||||||
if (instance.IsVertexInputDynamicState()) {
|
if (instance.IsVertexInputDynamicState()) {
|
||||||
// Update current vertex inputs.
|
// Update current vertex inputs.
|
||||||
|
@ -72,12 +72,21 @@ GraphicsPipeline::GraphicsPipeline(
|
|||||||
|
|
||||||
VertexInputs<vk::VertexInputAttributeDescription> vertex_attributes;
|
VertexInputs<vk::VertexInputAttributeDescription> vertex_attributes;
|
||||||
VertexInputs<vk::VertexInputBindingDescription> vertex_bindings;
|
VertexInputs<vk::VertexInputBindingDescription> vertex_bindings;
|
||||||
|
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT> divisors;
|
||||||
VertexInputs<AmdGpu::Buffer> guest_buffers;
|
VertexInputs<AmdGpu::Buffer> guest_buffers;
|
||||||
if (!instance.IsVertexInputDynamicState()) {
|
if (!instance.IsVertexInputDynamicState()) {
|
||||||
GetVertexInputs(vertex_attributes, vertex_bindings, guest_buffers);
|
const auto& vs_info = runtime_infos[u32(Shader::LogicalStage::Vertex)].vs_info;
|
||||||
|
GetVertexInputs(vertex_attributes, vertex_bindings, divisors, guest_buffers,
|
||||||
|
vs_info.step_rate_0, vs_info.step_rate_1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const vk::PipelineVertexInputDivisorStateCreateInfo divisor_state = {
|
||||||
|
.vertexBindingDivisorCount = static_cast<u32>(divisors.size()),
|
||||||
|
.pVertexBindingDivisors = divisors.data(),
|
||||||
|
};
|
||||||
|
|
||||||
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
|
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
|
||||||
|
.pNext = &divisor_state,
|
||||||
.vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()),
|
.vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()),
|
||||||
.pVertexBindingDescriptions = vertex_bindings.data(),
|
.pVertexBindingDescriptions = vertex_bindings.data(),
|
||||||
.vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()),
|
.vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()),
|
||||||
@ -304,19 +313,16 @@ GraphicsPipeline::GraphicsPipeline(
|
|||||||
GraphicsPipeline::~GraphicsPipeline() = default;
|
GraphicsPipeline::~GraphicsPipeline() = default;
|
||||||
|
|
||||||
template <typename Attribute, typename Binding>
|
template <typename Attribute, typename Binding>
|
||||||
void GraphicsPipeline::GetVertexInputs(VertexInputs<Attribute>& attributes,
|
void GraphicsPipeline::GetVertexInputs(
|
||||||
VertexInputs<Binding>& bindings,
|
VertexInputs<Attribute>& attributes, VertexInputs<Binding>& bindings,
|
||||||
VertexInputs<AmdGpu::Buffer>& guest_buffers) const {
|
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT>& divisors,
|
||||||
|
VertexInputs<AmdGpu::Buffer>& guest_buffers, u32 step_rate_0, u32 step_rate_1) const {
|
||||||
|
using InstanceIdType = Shader::Gcn::VertexAttribute::InstanceIdType;
|
||||||
if (!fetch_shader || fetch_shader->attributes.empty()) {
|
if (!fetch_shader || fetch_shader->attributes.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const auto& vs_info = GetStage(Shader::LogicalStage::Vertex);
|
const auto& vs_info = GetStage(Shader::LogicalStage::Vertex);
|
||||||
for (const auto& attrib : fetch_shader->attributes) {
|
for (const auto& attrib : fetch_shader->attributes) {
|
||||||
if (attrib.UsesStepRates()) {
|
|
||||||
// Skip attribute binding as the data will be pulled by shader.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto& buffer = attrib.GetSharp(vs_info);
|
const auto& buffer = attrib.GetSharp(vs_info);
|
||||||
attributes.push_back(Attribute{
|
attributes.push_back(Attribute{
|
||||||
.location = attrib.semantic,
|
.location = attrib.semantic,
|
||||||
@ -327,12 +333,21 @@ void GraphicsPipeline::GetVertexInputs(VertexInputs<Attribute>& attributes,
|
|||||||
bindings.push_back(Binding{
|
bindings.push_back(Binding{
|
||||||
.binding = attrib.semantic,
|
.binding = attrib.semantic,
|
||||||
.stride = buffer.GetStride(),
|
.stride = buffer.GetStride(),
|
||||||
.inputRate = attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None
|
.inputRate = attrib.GetStepRate() == InstanceIdType::None
|
||||||
? vk::VertexInputRate::eVertex
|
? vk::VertexInputRate::eVertex
|
||||||
: vk::VertexInputRate::eInstance,
|
: vk::VertexInputRate::eInstance,
|
||||||
});
|
});
|
||||||
|
const u32 divisor =
|
||||||
|
attrib.GetStepRate() == InstanceIdType::OverStepRate0
|
||||||
|
? step_rate_0
|
||||||
|
: (attrib.GetStepRate() == InstanceIdType::OverStepRate1 ? step_rate_1 : 1);
|
||||||
if constexpr (std::is_same_v<Binding, vk::VertexInputBindingDescription2EXT>) {
|
if constexpr (std::is_same_v<Binding, vk::VertexInputBindingDescription2EXT>) {
|
||||||
bindings.back().divisor = 1;
|
bindings.back().divisor = divisor;
|
||||||
|
} else {
|
||||||
|
divisors.push_back(vk::VertexInputBindingDivisorDescriptionEXT{
|
||||||
|
.binding = attrib.semantic,
|
||||||
|
.divisor = divisor,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
guest_buffers.emplace_back(buffer);
|
guest_buffers.emplace_back(buffer);
|
||||||
}
|
}
|
||||||
@ -342,11 +357,13 @@ void GraphicsPipeline::GetVertexInputs(VertexInputs<Attribute>& attributes,
|
|||||||
template void GraphicsPipeline::GetVertexInputs(
|
template void GraphicsPipeline::GetVertexInputs(
|
||||||
VertexInputs<vk::VertexInputAttributeDescription>& attributes,
|
VertexInputs<vk::VertexInputAttributeDescription>& attributes,
|
||||||
VertexInputs<vk::VertexInputBindingDescription>& bindings,
|
VertexInputs<vk::VertexInputBindingDescription>& bindings,
|
||||||
VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
|
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT>& divisors,
|
||||||
|
VertexInputs<AmdGpu::Buffer>& guest_buffers, u32 step_rate_0, u32 step_rate_1) const;
|
||||||
template void GraphicsPipeline::GetVertexInputs(
|
template void GraphicsPipeline::GetVertexInputs(
|
||||||
VertexInputs<vk::VertexInputAttributeDescription2EXT>& attributes,
|
VertexInputs<vk::VertexInputAttributeDescription2EXT>& attributes,
|
||||||
VertexInputs<vk::VertexInputBindingDescription2EXT>& bindings,
|
VertexInputs<vk::VertexInputBindingDescription2EXT>& bindings,
|
||||||
VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
|
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT>& divisors,
|
||||||
|
VertexInputs<AmdGpu::Buffer>& guest_buffers, u32 step_rate_0, u32 step_rate_1) const;
|
||||||
|
|
||||||
void GraphicsPipeline::BuildDescSetLayout() {
|
void GraphicsPipeline::BuildDescSetLayout() {
|
||||||
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
|
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
|
||||||
|
@ -81,7 +81,9 @@ public:
|
|||||||
/// Gets the attributes and bindings for vertex inputs.
|
/// Gets the attributes and bindings for vertex inputs.
|
||||||
template <typename Attribute, typename Binding>
|
template <typename Attribute, typename Binding>
|
||||||
void GetVertexInputs(VertexInputs<Attribute>& attributes, VertexInputs<Binding>& bindings,
|
void GetVertexInputs(VertexInputs<Attribute>& attributes, VertexInputs<Binding>& bindings,
|
||||||
VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
|
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT>& divisors,
|
||||||
|
VertexInputs<AmdGpu::Buffer>& guest_buffers, u32 step_rate_0,
|
||||||
|
u32 step_rate_1) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void BuildDescSetLayout();
|
void BuildDescSetLayout();
|
||||||
|
@ -277,6 +277,7 @@ bool Instance::CreateDevice() {
|
|||||||
image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME);
|
image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME);
|
||||||
amd_gcn_shader = add_extension(VK_AMD_GCN_SHADER_EXTENSION_NAME);
|
amd_gcn_shader = add_extension(VK_AMD_GCN_SHADER_EXTENSION_NAME);
|
||||||
amd_shader_trinary_minmax = add_extension(VK_AMD_SHADER_TRINARY_MINMAX_EXTENSION_NAME);
|
amd_shader_trinary_minmax = add_extension(VK_AMD_SHADER_TRINARY_MINMAX_EXTENSION_NAME);
|
||||||
|
vertex_attribute_divisor = add_extension(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
|
||||||
shader_atomic_float2 = add_extension(VK_EXT_SHADER_ATOMIC_FLOAT_2_EXTENSION_NAME);
|
shader_atomic_float2 = add_extension(VK_EXT_SHADER_ATOMIC_FLOAT_2_EXTENSION_NAME);
|
||||||
if (shader_atomic_float2) {
|
if (shader_atomic_float2) {
|
||||||
shader_atomic_float2_features =
|
shader_atomic_float2_features =
|
||||||
@ -436,6 +437,9 @@ bool Instance::CreateDevice() {
|
|||||||
vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{
|
vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{
|
||||||
.legacyVertexAttributes = true,
|
.legacyVertexAttributes = true,
|
||||||
},
|
},
|
||||||
|
vk::PhysicalDeviceVertexAttributeDivisorFeatures{
|
||||||
|
.vertexAttributeInstanceRateDivisor = true,
|
||||||
|
},
|
||||||
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT{
|
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT{
|
||||||
.shaderBufferFloat32AtomicMinMax =
|
.shaderBufferFloat32AtomicMinMax =
|
||||||
shader_atomic_float2_features.shaderBufferFloat32AtomicMinMax,
|
shader_atomic_float2_features.shaderBufferFloat32AtomicMinMax,
|
||||||
@ -498,6 +502,9 @@ bool Instance::CreateDevice() {
|
|||||||
if (!legacy_vertex_attributes) {
|
if (!legacy_vertex_attributes) {
|
||||||
device_chain.unlink<vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT>();
|
device_chain.unlink<vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT>();
|
||||||
}
|
}
|
||||||
|
if (!vertex_attribute_divisor) {
|
||||||
|
device_chain.unlink<vk::PhysicalDeviceVertexAttributeDivisorFeatures>();
|
||||||
|
}
|
||||||
if (!shader_atomic_float2) {
|
if (!shader_atomic_float2) {
|
||||||
device_chain.unlink<vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
|
device_chain.unlink<vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
|
||||||
}
|
}
|
||||||
|
@ -150,6 +150,11 @@ public:
|
|||||||
return legacy_vertex_attributes;
|
return legacy_vertex_attributes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true when VK_EXT_vertex_attribute_divisor is supported.
|
||||||
|
bool IsVertexAttributeDivisorSupported() const {
|
||||||
|
return vertex_attribute_divisor;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true when VK_AMD_shader_image_load_store_lod is supported.
|
/// Returns true when VK_AMD_shader_image_load_store_lod is supported.
|
||||||
bool IsImageLoadStoreLodSupported() const {
|
bool IsImageLoadStoreLodSupported() const {
|
||||||
return image_load_store_lod;
|
return image_load_store_lod;
|
||||||
@ -398,6 +403,7 @@ private:
|
|||||||
u32 queue_family_index{0};
|
u32 queue_family_index{0};
|
||||||
bool custom_border_color{};
|
bool custom_border_color{};
|
||||||
bool fragment_shader_barycentric{};
|
bool fragment_shader_barycentric{};
|
||||||
|
bool vertex_attribute_divisor{};
|
||||||
bool depth_clip_control{};
|
bool depth_clip_control{};
|
||||||
bool depth_range_unrestricted{};
|
bool depth_range_unrestricted{};
|
||||||
bool dynamic_state_3{};
|
bool dynamic_state_3{};
|
||||||
|
@ -122,6 +122,8 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
|
|||||||
case Stage::Vertex: {
|
case Stage::Vertex: {
|
||||||
BuildCommon(regs.vs_program);
|
BuildCommon(regs.vs_program);
|
||||||
GatherVertexOutputs(info.vs_info, regs.vs_output_control);
|
GatherVertexOutputs(info.vs_info, regs.vs_output_control);
|
||||||
|
info.vs_info.step_rate_0 = regs.vgt_instance_step_rate_0;
|
||||||
|
info.vs_info.step_rate_1 = regs.vgt_instance_step_rate_1;
|
||||||
info.vs_info.emulate_depth_negative_one_to_one =
|
info.vs_info.emulate_depth_negative_one_to_one =
|
||||||
!instance.IsDepthClipControlSupported() &&
|
!instance.IsDepthClipControlSupported() &&
|
||||||
regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW;
|
regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW;
|
||||||
@ -460,10 +462,6 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||||||
// Stride will still be handled outside the pipeline using dynamic state.
|
// Stride will still be handled outside the pipeline using dynamic state.
|
||||||
u32 vertex_binding = 0;
|
u32 vertex_binding = 0;
|
||||||
for (const auto& attrib : fetch_shader->attributes) {
|
for (const auto& attrib : fetch_shader->attributes) {
|
||||||
if (attrib.UsesStepRates()) {
|
|
||||||
// Skip attribute binding as the data will be pulled by shader.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const auto& buffer = attrib.GetSharp(*vs_info);
|
const auto& buffer = attrib.GetSharp(*vs_info);
|
||||||
ASSERT(vertex_binding < MaxVertexBufferCount);
|
ASSERT(vertex_binding < MaxVertexBufferCount);
|
||||||
key.vertex_buffer_formats[vertex_binding++] =
|
key.vertex_buffer_formats[vertex_binding++] =
|
||||||
|
@ -20,12 +20,9 @@
|
|||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) {
|
static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) {
|
||||||
Shader::PushData push_data{};
|
|
||||||
push_data.step0 = regs.vgt_instance_step_rate_0;
|
|
||||||
push_data.step1 = regs.vgt_instance_step_rate_1;
|
|
||||||
|
|
||||||
// TODO(roamic): Add support for multiple viewports and geometry shaders when ViewportIndex
|
// TODO(roamic): Add support for multiple viewports and geometry shaders when ViewportIndex
|
||||||
// is encountered and implemented in the recompiler.
|
// is encountered and implemented in the recompiler.
|
||||||
|
Shader::PushData push_data{};
|
||||||
push_data.xoffset = regs.viewport_control.xoffset_enable ? regs.viewports[0].xoffset : 0.f;
|
push_data.xoffset = regs.viewport_control.xoffset_enable ? regs.viewports[0].xoffset : 0.f;
|
||||||
push_data.xscale = regs.viewport_control.xscale_enable ? regs.viewports[0].xscale : 1.f;
|
push_data.xscale = regs.viewport_control.xscale_enable ? regs.viewports[0].xscale : 1.f;
|
||||||
push_data.yoffset = regs.viewport_control.yoffset_enable ? regs.viewports[0].yoffset : 0.f;
|
push_data.yoffset = regs.viewport_control.yoffset_enable ? regs.viewports[0].yoffset : 0.f;
|
||||||
|
Loading…
Reference in New Issue
Block a user