shader_recompiler: Tessellation WIP

This commit is contained in:
IndecisiveTurtle 2024-10-08 23:24:59 +03:00 committed by Frodo Baggins
parent 715ac8a279
commit c9f0771c0f
38 changed files with 807 additions and 102 deletions

View File

@ -664,6 +664,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp
src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp
src/shader_recompiler/ir/passes/hull_shader_transform.cpp
src/shader_recompiler/ir/passes/identity_removal_pass.cpp
src/shader_recompiler/ir/passes/ir_passes.h
src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp
@ -683,6 +684,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/ir/opcodes.cpp
src/shader_recompiler/ir/opcodes.h
src/shader_recompiler/ir/opcodes.inc
src/shader_recompiler/ir/patch.cpp
src/shader_recompiler/ir/patch.h
src/shader_recompiler/ir/post_order.cpp
src/shader_recompiler/ir/post_order.h
src/shader_recompiler/ir/program.cpp

View File

@ -1,6 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma clang optimize off
#include <span>
#include <type_traits>
#include <utility>
@ -72,7 +72,10 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
return arg.VectorReg();
} else if constexpr (std::is_same_v<ArgType, const char*>) {
return arg.StringLiteral();
} else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
return arg.Patch();
}
UNREACHABLE();
}
template <auto func, bool is_first_arg_inst, size_t... I>
@ -206,6 +209,32 @@ Id DefineMain(EmitContext& ctx, const IR::Program& program) {
return main;
}
spv::ExecutionMode ExecutionMode(AmdGpu::TessellationType primitive) {
switch (primitive) {
case AmdGpu::TessellationType::Isoline:
return spv::ExecutionMode::Isolines;
case AmdGpu::TessellationType::Triangle:
return spv::ExecutionMode::Triangles;
case AmdGpu::TessellationType::Quad:
return spv::ExecutionMode::Quads;
}
UNREACHABLE_MSG("Tessellation primitive {}", primitive);
}
spv::ExecutionMode ExecutionMode(AmdGpu::TessellationPartitioning spacing) {
switch (spacing) {
case AmdGpu::TessellationPartitioning::Integer:
return spv::ExecutionMode::SpacingEqual;
case AmdGpu::TessellationPartitioning::FracOdd:
return spv::ExecutionMode::SpacingFractionalOdd;
case AmdGpu::TessellationPartitioning::FracEven:
return spv::ExecutionMode::SpacingFractionalEven;
default:
break;
}
UNREACHABLE_MSG("Tessellation spacing {}", spacing);
}
void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ctx) {
ctx.AddCapability(spv::Capability::Image1D);
ctx.AddCapability(spv::Capability::Sampled1D);
@ -244,11 +273,12 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
if (info.uses_group_ballot) {
ctx.AddCapability(spv::Capability::GroupNonUniformBallot);
}
if (info.stage == Stage::Export || info.stage == Stage::Vertex) {
const auto stage = info.l_stage;
if (stage == LogicalStage::Vertex) {
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
ctx.AddCapability(spv::Capability::DrawParameters);
}
if (info.stage == Stage::Geometry) {
if (stage == LogicalStage::Geometry) {
ctx.AddCapability(spv::Capability::Geometry);
}
if (info.stage == Stage::Fragment && profile.needs_manual_interpolation) {
@ -257,23 +287,39 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
}
}
void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
const auto& info = program.info;
void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
spv::ExecutionModel execution_model{};
switch (program.info.stage) {
case Stage::Compute: {
switch (info.l_stage) {
case LogicalStage::Compute: {
const std::array<u32, 3> workgroup_size{ctx.runtime_info.cs_info.workgroup_size};
execution_model = spv::ExecutionModel::GLCompute;
ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
workgroup_size[1], workgroup_size[2]);
break;
}
case Stage::Export:
case Stage::Vertex:
case LogicalStage::Vertex:
execution_model = spv::ExecutionModel::Vertex;
break;
case Stage::Fragment:
case LogicalStage::TessellationControl:
execution_model = spv::ExecutionModel::TessellationControl;
ctx.AddCapability(spv::Capability::Tessellation);
ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
ctx.runtime_info.hs_info.output_control_points);
break;
case LogicalStage::TessellationEval: {
execution_model = spv::ExecutionModel::TessellationEvaluation;
ctx.AddCapability(spv::Capability::Tessellation);
const auto& vs_info = ctx.runtime_info.vs_info;
ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_type));
ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_partitioning));
ctx.AddExecutionMode(main,
vs_info.tess_topology == AmdGpu::TessellationTopology::TriangleCcw
? spv::ExecutionMode::VertexOrderCcw
: spv::ExecutionMode::VertexOrderCw);
break;
}
case LogicalStage::Fragment:
execution_model = spv::ExecutionModel::Fragment;
if (ctx.profile.lower_left_origin_mode) {
ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft);
@ -288,7 +334,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
}
break;
case Stage::Geometry:
case LogicalStage::Geometry:
execution_model = spv::ExecutionModel::Geometry;
ctx.AddExecutionMode(main, GetInputPrimitiveType(ctx.runtime_info.gs_info.in_primitive));
ctx.AddExecutionMode(main,
@ -299,7 +345,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
ctx.runtime_info.gs_info.num_invocations);
break;
default:
throw NotImplementedException("Stage {}", u32(program.info.stage));
UNREACHABLE_MSG("Stage {}", u32(info.stage));
}
ctx.AddEntryPoint(execution_model, main, "main", interfaces);
}
@ -345,7 +391,7 @@ std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in
const IR::Program& program, Bindings& binding) {
EmitContext ctx{profile, runtime_info, program.info, binding};
const Id main{DefineMain(ctx, program)};
DefineEntryPoint(program, ctx, main);
DefineEntryPoint(program.info, ctx, main);
SetupCapabilities(program.info, profile, ctx);
SetupFloatMode(ctx, profile, runtime_info, main);
PatchPhiNodes(program, ctx);

View File

@ -4,6 +4,7 @@
#include "common/assert.h"
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
#include "shader_recompiler/ir/patch.h"
#include <magic_enum/magic_enum.hpp>
@ -242,8 +243,14 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
}
return coord;
}
case IR::Attribute::TessellationEvaluationPointU:
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value));
case IR::Attribute::TessellationEvaluationPointV:
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.ConstU32(1U)));
default:
throw NotImplementedException("Read attribute {}", attr);
UNREACHABLE_MSG("Read attribute {}", attr);
}
}
@ -269,7 +276,7 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
ASSERT(ctx.info.stage == Stage::Geometry);
return ctx.OpLoad(ctx.U32[1], ctx.primitive_id);
default:
throw NotImplementedException("Read U32 attribute {}", attr);
UNREACHABLE_MSG("Read U32 attribute {}", attr);
}
}
@ -287,6 +294,42 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen
}
}
Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) {
const u32 index{IR::GenericPatchIndex(patch)};
const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))};
const Id type{ctx.stage == Stage::Hull ? ctx.output_f32 : ctx.input_f32};
const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)};
return ctx.OpLoad(ctx.F32[1], pointer);
}
void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
const Id pointer{[&] {
if (IR::IsGeneric(patch)) {
const u32 index{IR::GenericPatchIndex(patch)};
const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))};
return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element);
}
switch (patch) {
case IR::Patch::TessellationLodLeft:
case IR::Patch::TessellationLodRight:
case IR::Patch::TessellationLodTop:
case IR::Patch::TessellationLodBottom: {
const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
const Id index_id{ctx.ConstU32(index)};
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id);
}
case IR::Patch::TessellationLodInteriorU:
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner,
ctx.u32_zero_value);
case IR::Patch::TessellationLodInteriorV:
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.ConstU32(1u));
default:
UNREACHABLE_MSG("Patch {}", u32(patch));
}
}()};
ctx.OpStore(pointer, value);
}
template <u32 N>
static Id EmitLoadBufferU32xN(EmitContext& ctx, u32 handle, Id address) {
auto& buffer = ctx.buffers[handle];

View File

@ -9,6 +9,7 @@
namespace Shader::IR {
enum class Attribute : u64;
enum class ScalarReg : u32;
enum class Patch : u64;
class Inst;
class Value;
} // namespace Shader::IR
@ -88,6 +89,8 @@ Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
Id EmitGetPatch(EmitContext& ctx, IR::Patch patch);
void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value);
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
void EmitSetSampleMask(EmitContext& ctx, Id value);
void EmitSetFragDepth(EmitContext& ctx, Id value);

View File

@ -34,7 +34,7 @@ std::string_view StageName(Stage stage) {
case Stage::Compute:
return "cs";
}
throw InvalidArgument("Invalid stage {}", u32(stage));
UNREACHABLE_MSG("Invalid hw stage {}", u32(stage));
}
static constexpr u32 NumVertices(AmdGpu::PrimitiveType type) {
@ -65,7 +65,7 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar
EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
const Info& info_, Bindings& binding_)
: Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
profile{profile_}, stage{info.stage}, binding{binding_} {
profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} {
AddCapability(spv::Capability::Shader);
DefineArithmeticTypes();
DefineInterfaces();
@ -268,9 +268,8 @@ void EmitContext::DefineInputs() {
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
}
switch (stage) {
case Stage::Export:
case Stage::Vertex: {
switch (l_stage) {
case LogicalStage::Vertex: {
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
@ -311,12 +310,11 @@ void EmitContext::DefineInputs() {
}
input_params[attrib.semantic] =
GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false);
interfaces.push_back(id);
}
}
break;
}
case Stage::Fragment:
case LogicalStage::Fragment:
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
@ -351,15 +349,14 @@ void EmitContext::DefineInputs() {
}
input_params[semantic] =
GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false);
interfaces.push_back(attr_id);
}
break;
case Stage::Compute:
case LogicalStage::Compute:
workgroup_id = DefineVariable(U32[3], spv::BuiltIn::WorkgroupId, spv::StorageClass::Input);
local_invocation_id =
DefineVariable(U32[3], spv::BuiltIn::LocalInvocationId, spv::StorageClass::Input);
break;
case Stage::Geometry: {
case LogicalStage::Geometry: {
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
const auto gl_per_vertex =
Name(TypeStruct(TypeVector(F32[1], 4), F32[1], TypeArray(F32[1], ConstU32(1u))),
@ -389,15 +386,18 @@ void EmitContext::DefineInputs() {
}
break;
}
case LogicalStage::TessellationEval: {
tess_coord = DefineInput(F32[3], std::nullopt, spv::BuiltIn::TessCoord);
break;
}
default:
break;
}
}
void EmitContext::DefineOutputs() {
switch (stage) {
case Stage::Export:
case Stage::Vertex: {
switch (l_stage) {
case LogicalStage::Vertex: {
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
info.stores.Get(IR::Attribute::Position2) ||
@ -419,11 +419,33 @@ void EmitContext::DefineOutputs() {
Name(id, fmt::format("out_attr{}", i));
output_params[i] =
GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true);
interfaces.push_back(id);
}
break;
}
case Stage::Fragment:
case LogicalStage::TessellationControl: {
if (info.stores_tess_level_outer) {
const Id type{TypeArray(F32[1], ConstU32(4U))};
output_tess_level_outer =
DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelOuter);
Decorate(output_tess_level_outer, spv::Decoration::Patch);
}
if (info.stores_tess_level_inner) {
const Id type{TypeArray(F32[1], ConstU32(2U))};
output_tess_level_inner =
DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelInner);
Decorate(output_tess_level_inner, spv::Decoration::Patch);
}
for (size_t index = 0; index < 30; ++index) {
if (!(info.uses_patches & (1U << index))) {
continue;
}
const Id id{DefineOutput(F32[4], index)};
Decorate(id, spv::Decoration::Patch);
patches[index] = id;
}
break;
}
case LogicalStage::Fragment:
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
if (!info.stores.GetAny(mrt)) {
@ -435,22 +457,22 @@ void EmitContext::DefineOutputs() {
const Id id{DefineOutput(type, i)};
Name(id, fmt::format("frag_color{}", i));
frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true);
interfaces.push_back(id);
}
break;
case Stage::Geometry: {
case LogicalStage::Geometry: {
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) {
const Id id{DefineOutput(F32[4], attr_id)};
Name(id, fmt::format("out_attr{}", attr_id));
output_params[attr_id] = {id, output_f32, F32[1], 4u};
interfaces.push_back(id);
}
break;
}
default:
case LogicalStage::Compute:
break;
default:
UNREACHABLE();
}
}

View File

@ -46,14 +46,18 @@ public:
void DefineBufferOffsets();
void DefineInterpolatedAttribs();
[[nodiscard]] Id DefineInput(Id type, u32 location) {
const Id input_id{DefineVar(type, spv::StorageClass::Input)};
Decorate(input_id, spv::Decoration::Location, location);
[[nodiscard]] Id DefineInput(Id type, std::optional<u32> location = std::nullopt,
std::optional<spv::BuiltIn> builtin = std::nullopt) {
const Id input_id{DefineVariable(type, builtin, spv::StorageClass::Input)};
if (location) {
Decorate(input_id, spv::Decoration::Location, *location);
}
return input_id;
}
[[nodiscard]] Id DefineOutput(Id type, std::optional<u32> location = std::nullopt) {
const Id output_id{DefineVar(type, spv::StorageClass::Output)};
[[nodiscard]] Id DefineOutput(Id type, std::optional<u32> location = std::nullopt,
std::optional<spv::BuiltIn> builtin = std::nullopt) {
const Id output_id{DefineVariable(type, builtin, spv::StorageClass::Output)};
if (location) {
Decorate(output_id, spv::Decoration::Location, *location);
}
@ -131,7 +135,8 @@ public:
const Info& info;
const RuntimeInfo& runtime_info;
const Profile& profile;
Stage stage{};
Stage stage;
LogicalStage l_stage{};
Id void_id{};
Id U8{};
@ -188,6 +193,11 @@ public:
Id clip_distances{};
Id cull_distances{};
Id output_tess_level_outer{};
Id output_tess_level_inner{};
Id tess_coord;
std::array<Id, 30> patches{};
Id workgroup_id{};
Id local_invocation_id{};
Id subgroup_local_invocation_id{};

View File

@ -1,6 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma clang optimize off
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/ir/reg.h"

View File

@ -34,9 +34,8 @@ void Translator::EmitPrologue() {
}
IR::VectorReg dst_vreg = IR::VectorReg::V0;
switch (info.stage) {
case Stage::Vertex:
case Stage::Export:
switch (info.l_stage) {
case LogicalStage::Vertex:
// v0: vertex ID, always present
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
// v1: instance ID, step rate 0
@ -122,7 +121,17 @@ void Translator::EmitPrologue() {
}
}
break;
case Stage::Compute:
case LogicalStage::TessellationControl:
ir.SetVectorReg(IR::VectorReg::V0, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
break;
case LogicalStage::TessellationEval:
ir.SetVectorReg(IR::VectorReg::V0,
ir.GetAttribute(IR::Attribute::TessellationEvaluationPointU));
ir.SetVectorReg(IR::VectorReg::V1,
ir.GetAttribute(IR::Attribute::TessellationEvaluationPointV));
ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
break;
case LogicalStage::Compute:
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 0));
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1));
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2));
@ -137,7 +146,7 @@ void Translator::EmitPrologue() {
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2));
}
break;
case Stage::Geometry:
case LogicalStage::Geometry:
switch (runtime_info.gs_info.out_primitive[0]) {
case AmdGpu::GsOutputPrimitiveType::TriangleStrip:
ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2
@ -152,7 +161,7 @@ void Translator::EmitPrologue() {
ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
break;
default:
throw NotImplementedException("Unknown shader stage");
UNREACHABLE_MSG("Unknown shader stage");
}
}
@ -503,7 +512,8 @@ void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Inf
// Special case for emitting fetch shader.
if (inst.opcode == Opcode::S_SWAPPC_B64) {
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export);
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export ||
info.stage == Stage::Local);
translator.EmitFetch(inst);
continue;
}

View File

@ -217,7 +217,7 @@ public:
// VOP3a
void V_MAD_F32(const GcnInst& inst);
void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = false);
void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = true);
void V_MAD_U32_U24(const GcnInst& inst);
void V_CUBEID_F32(const GcnInst& inst);
void V_CUBESC_F32(const GcnInst& inst);

View File

@ -1060,8 +1060,14 @@ void Translator::V_CUBEMA_F32(const GcnInst& inst) {
void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{ir.BitwiseAnd(GetSrc(inst.src[1]), ir.Imm32(0x1F))};
const IR::U32 src2{ir.BitwiseAnd(GetSrc(inst.src[2]), ir.Imm32(0x1F))};
IR::U32 src1{GetSrc(inst.src[1])};
IR::U32 src2{GetSrc(inst.src[2])};
if (!src1.IsImmediate()) {
src1 = ir.BitwiseAnd(src1, ir.Imm32(0x1F));
}
if (!src2.IsImmediate()) {
src2 = ir.BitwiseAnd(src2, ir.Imm32(0x1F));
}
SetDst(inst.dst[0], ir.BitFieldExtract(src0, src1, src2, is_signed));
}

View File

@ -187,7 +187,8 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst)
buffer_info.index_enable.Assign(mtbuf.idxen);
buffer_info.offset_enable.Assign(mtbuf.offen);
buffer_info.inst_offset.Assign(mtbuf.offset);
buffer_info.ring_access.Assign(is_ring);
buffer_info.globally_coherent.Assign(mtbuf.glc);
buffer_info.system_coherent.Assign(mtbuf.slc);
if (is_typed) {
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
@ -245,7 +246,7 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
const IR::ScalarReg sharp{inst.src[2].code * 4};
const IR::Value soffset{GetSrc(inst.src[3])};
if (info.stage != Stage::Export && info.stage != Stage::Geometry) {
if (info.stage != Stage::Export && info.stage != Stage::Hull && info.stage != Stage::Geometry) {
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0,
"Non immediate offset not supported");
}
@ -267,7 +268,8 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
buffer_info.index_enable.Assign(mtbuf.idxen);
buffer_info.offset_enable.Assign(mtbuf.offen);
buffer_info.inst_offset.Assign(mtbuf.offset);
buffer_info.ring_access.Assign(is_ring);
buffer_info.globally_coherent.Assign(mtbuf.glc);
buffer_info.system_coherent.Assign(mtbuf.slc);
if (is_typed) {
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);

View File

@ -163,6 +163,7 @@ struct Info {
UserDataMask ud_mask{};
CopyShaderData gs_copy_data;
u32 uses_patches{};
BufferResourceList buffers;
TextureBufferResourceList texture_buffers;
@ -175,6 +176,7 @@ struct Info {
std::span<const u32> user_data;
Stage stage;
LogicalStage l_stage;
u64 pgm_hash{};
VAddr pgm_base;
@ -190,14 +192,16 @@ struct Info {
bool uses_shared{};
bool uses_fp16{};
bool uses_fp64{};
bool stores_tess_level_outer{};
bool stores_tess_level_inner{};
bool translation_failed{}; // indicates that shader has unsupported instructions
bool has_readconst{};
u8 mrt_mask{0u};
bool has_fetch_shader{false};
u32 fetch_shader_sgpr_base{0u};
explicit Info(Stage stage_, ShaderParams params)
: stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params)
: stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
user_data{params.user_data} {}
template <typename T>

View File

@ -114,6 +114,10 @@ std::string NameOf(Attribute attribute) {
return "LocalInvocationId";
case Attribute::LocalInvocationIndex:
return "LocalInvocationIndex";
case Attribute::TessellationEvaluationPointU:
return "TessellationEvaluationPointU";
case Attribute::TessellationEvaluationPointV:
return "TessellationEvaluationPointV";
default:
break;
}

View File

@ -74,6 +74,8 @@ enum class Attribute : u64 {
FragCoord = 77,
InstanceId0 = 78, // step rate 0
InstanceId1 = 79, // step rate 1
TessellationEvaluationPointU = 80,
TessellationEvaluationPointV = 81,
Max,
};

View File

@ -94,6 +94,8 @@ static std::string ArgToIndex(std::map<const Inst*, size_t>& inst_to_index, size
return fmt::format("{}", arg.VectorReg());
case Type::Attribute:
return fmt::format("{}", arg.Attribute());
case Type::Patch:
return fmt::format("{}", arg.Patch());
default:
return "<unknown immediate type>";
}

View File

@ -278,6 +278,14 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp
Inst(Opcode::SetAttribute, attribute, value, Imm32(comp));
}
F32 IREmitter::GetPatch(Patch patch) {
return Inst<F32>(Opcode::GetPatch, patch);
}
void IREmitter::SetPatch(Patch patch, const F32& value) {
Inst(Opcode::SetPatch, patch, value);
}
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
switch (bit_size) {
case 32:

View File

@ -11,6 +11,7 @@
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/condition.h"
#include "shader_recompiler/ir/value.h"
#include "shader_recompiler/ir/patch.h"
namespace Shader::IR {
@ -84,6 +85,9 @@ public:
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
[[nodiscard]] F32 GetPatch(Patch patch);
void SetPatch(Patch patch, const F32& value);
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
void WriteShared(int bit_size, const Value& value, const U32& offset);

View File

@ -52,6 +52,7 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::Discard:
case Opcode::DiscardCond:
case Opcode::SetAttribute:
case Opcode::SetPatch:
case Opcode::StoreBufferU32:
case Opcode::StoreBufferU32x2:
case Opcode::StoreBufferU32x3:

View File

@ -30,7 +30,7 @@ constexpr Type Opaque{Type::Opaque};
constexpr Type ScalarReg{Type::ScalarReg};
constexpr Type VectorReg{Type::VectorReg};
constexpr Type Attribute{Type::Attribute};
constexpr Type SystemValue{Type::SystemValue};
constexpr Type Patch{Type::Patch};
constexpr Type U1{Type::U1};
constexpr Type U8{Type::U8};
constexpr Type U16{Type::U16};

View File

@ -60,6 +60,8 @@ OPCODE(SetGotoVariable, Void, U32,
OPCODE(GetAttribute, F32, Attribute, U32, U32, )
OPCODE(GetAttributeU32, U32, Attribute, U32, )
OPCODE(SetAttribute, Void, Attribute, F32, U32, )
OPCODE(GetPatch, F32, Patch, )
OPCODE(SetPatch, Void, Patch, F32, )
// Flags
OPCODE(GetScc, U1, Void, )

View File

@ -0,0 +1,157 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma clang optimize off
#include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/ir/program.h"
namespace Shader::Optimization {
/**
* Tessellation shaders pass outputs to the next shader using LDS.
* The Hull shader stage receives input control points stored in LDS.
*
* The LDS layout is:
* - TCS inputs for patch 0
* - TCS inputs for patch 1
* - TCS inputs for patch 2
* - ...
* - TCS outputs for patch 0
* - TCS outputs for patch 1
* - TCS outputs for patch 2
* - ...
* - Per-patch TCS outputs for patch 0
* - Per-patch TCS outputs for patch 1
* - Per-patch TCS outputs for patch 2
*
* If the Hull stage does not write any new control points the driver will
* optimize LDS layout so input and output control point spaces overlap.
*
* Tessellation factors are stored in the per-patch TCS output block
* as well as a factor V# that is automatically bound by the driver.
*
* This pass attempts to resolve LDS accesses to attribute accesses and correctly
* write to the tessellation factor tables. For the latter we replace the
* buffer store instruction to factor writes according to their offset.
*
* LDS stores can either be output control point writes or per-patch data writes.
* This is detected by looking at how the address is formed. In any case the calculation
* will be of the form a * b + c. For output control points a = output_control_point_id
* while for per-patch writes a = patch_id.
*
* Both patch_id and output_control_point_id are packed in VGPR1 by the driver and shader
* uses V_BFE_U32 to extract them. We use the starting bit_pos to determine which is which.
*
* LDS reads are more tricky as amount of different calculations performed can vary.
* The final result, if output control point space is distinct, is of the form:
* patch_id * input_control_point_stride * num_control_points_per_input_patch + a
* The value "a" can be anything in the range of [0, input_control_point_stride]
*
* This pass does not attempt to deduce the exact attribute referenced by "a" but rather
* only using "a" itself index into input attributes. Those are defined as an array in the shader
* layout (location = 0) in vec4[num_control_points_per_input_patch] attr[];
* ...
* float value = attr[a / in_stride][(a % in_stride) >> 4][(a & 0xF) >> 2];
*
* This requires knowing in_stride which is not provided to us by the guest.
* To deduce it we perform a breadth first search on the arguments of a DS_READ*
* looking for a buffer load with offset = 0. This will be the buffer holding tessellation
* constants and it contains the value of in_stride we can read at compile time.
*
* NOTE: This pass must be run before constant propagation as it relies on relatively specific
* pattern matching that might be mutated that that optimization pass.
*
*/
void HullShaderTransform(const IR::Program& program) {
LOG_INFO(Render_Vulkan, "{}", IR::DumpProgram(program));
for (IR::Block* block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto opcode = inst.GetOpcode();
switch (opcode) {
case IR::Opcode::StoreBufferU32:
case IR::Opcode::StoreBufferU32x2:
case IR::Opcode::StoreBufferU32x3:
case IR::Opcode::StoreBufferU32x4: {
const auto info = inst.Flags<IR::BufferInstInfo>();
if (!info.globally_coherent) {
break;
}
const auto GetValue = [&](IR::Value data) -> IR::F32 {
if (auto* inst = data.TryInstRecursive();
inst && inst->GetOpcode() == IR::Opcode::BitCastU32F32) {
return IR::F32{inst->Arg(0)};
}
return ir.BitCast<IR::F32, IR::U32>(IR::U32{data});
};
const u32 num_dwords = u32(opcode) - u32(IR::Opcode::StoreBufferU32) + 1;
const auto factor_idx = info.inst_offset.Value() >> 2;
const IR::Value data = inst.Arg(2);
inst.Invalidate();
if (num_dwords == 1) {
ir.SetPatch(IR::PatchFactor(factor_idx), GetValue(data));
break;
}
auto* inst = data.TryInstRecursive();
ASSERT(inst && (inst->GetOpcode() == IR::Opcode::CompositeConstructU32x2 ||
inst->GetOpcode() == IR::Opcode::CompositeConstructU32x3 ||
inst->GetOpcode() == IR::Opcode::CompositeConstructU32x4));
for (s32 i = 0; i < num_dwords; i++) {
ir.SetPatch(IR::PatchFactor(factor_idx + i), GetValue(inst->Arg(i)));
}
break;
}
case IR::Opcode::WriteSharedU32:
case IR::Opcode::WriteSharedU64: {
const u32 num_dwords = opcode == IR::Opcode::WriteSharedU32 ? 1 : 2;
const IR::Value data = inst.Arg(1);
const auto [data_lo, data_hi] = [&] -> std::pair<IR::U32, IR::U32> {
if (num_dwords == 1) {
return {IR::U32{data}, IR::U32{}};
}
const auto* prod = data.InstRecursive();
return {IR::U32{prod->Arg(0)}, IR::U32{prod->Arg(1)}};
}();
const IR::Inst* ds_offset = inst.Arg(0).InstRecursive();
const u32 offset_dw = ds_offset->Arg(1).U32() >> 4;
IR::Inst* prod = ds_offset->Arg(0).TryInstRecursive();
ASSERT(prod && (prod->GetOpcode() == IR::Opcode::IAdd32 ||
prod->GetOpcode() == IR::Opcode::IMul32));
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
prod = prod->Arg(0).TryInstRecursive();
ASSERT(prod && prod->GetOpcode() == IR::Opcode::IMul32);
}
prod = prod->Arg(0).TryInstRecursive();
ASSERT(prod && prod->GetOpcode() == IR::Opcode::BitFieldSExtract &&
prod->Arg(2).IsImmediate() && prod->Arg(2).U32() == 24);
prod = prod->Arg(0).TryInstRecursive();
ASSERT(prod && prod->GetOpcode() == IR::Opcode::BitFieldUExtract);
const u32 bit_pos = prod->Arg(1).U32();
const auto SetOutput = [&ir](IR::U32 value, u32 offset_dw, bool is_patch_const) {
const IR::F32 data = ir.BitCast<IR::F32, IR::U32>(value);
if (!is_patch_const) {
const u32 param = offset_dw >> 2;
const u32 comp = offset_dw & 3;
ir.SetAttribute(IR::Attribute::Param0 + param, data, comp);
} else {
ir.SetPatch(IR::PatchGeneric(offset_dw), data);
}
};
ASSERT_MSG(bit_pos == 0 || bit_pos == 8, "Unknown bit extract pos {}", bit_pos);
const bool is_patch_const = bit_pos == 0;
SetOutput(data_lo, offset_dw, is_patch_const);
if (num_dwords > 1) {
SetOutput(data_hi, offset_dw + 1, is_patch_const);
}
inst.Invalidate();
break;
}
default:
break;
}
}
}
LOG_INFO(Render_Vulkan, "{}", IR::DumpProgram(program));
}
} // namespace Shader::Optimization

View File

@ -18,5 +18,6 @@ void CollectShaderInfoPass(IR::Program& program);
void LowerSharedMemToRegisters(IR::Program& program);
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
Stage stage);
void HullShaderTransform(const IR::Program& program);
} // namespace Shader::Optimization

View File

@ -23,12 +23,43 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
};
switch (stage) {
case Stage::Local: {
ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
const auto opcode = inst.GetOpcode();
switch (opcode) {
case IR::Opcode::WriteSharedU64: {
u32 offset = 0;
const auto* addr = inst.Arg(0).InstRecursive();
if (addr->GetOpcode() == IR::Opcode::IAdd32) {
ASSERT(addr->Arg(1).IsImmediate());
offset = addr->Arg(1).U32();
}
const IR::Inst* pair = inst.Arg(1).InstRecursive();
for (s32 i = 0; i < 2; i++) {
const auto attrib = IR::Attribute::Param0 + (offset / 16);
const auto comp = (offset / 4) % 4;
const IR::U32 value = IR::U32{pair->Arg(i)};
ir.SetAttribute(attrib, ir.BitCast<IR::F32, IR::U32>(value), comp);
offset += 4;
}
inst.Invalidate();
break;
}
case IR::Opcode::WriteSharedU32:
UNREACHABLE();
default:
break;
}
});
break;
}
case Stage::Export: {
ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
const auto opcode = inst.GetOpcode();
switch (opcode) {
case IR::Opcode::StoreBufferU32: {
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
const auto info = inst.Flags<IR::BufferInstInfo>();
if (!info.system_coherent || !info.globally_coherent) {
break;
}
@ -61,7 +92,8 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
const auto opcode = inst.GetOpcode();
switch (opcode) {
case IR::Opcode::LoadBufferU32: {
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
const auto info = inst.Flags<IR::BufferInstInfo>();
if (!info.system_coherent || !info.globally_coherent) {
break;
}
@ -80,7 +112,8 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
break;
}
case IR::Opcode::StoreBufferU32: {
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
const auto info = inst.Flags<IR::BufferInstInfo>();
if (!info.system_coherent || !info.globally_coherent) {
break;
}

View File

@ -17,6 +17,22 @@ void Visit(Info& info, IR::Inst& inst) {
case IR::Opcode::GetUserData:
info.ud_mask.Set(inst.Arg(0).ScalarReg());
break;
case IR::Opcode::SetPatch: {
const auto patch = inst.Arg(0).Patch();
if (patch <= IR::Patch::TessellationLodBottom) {
info.stores_tess_level_outer = true;
} else if (patch <= IR::Patch::TessellationLodInteriorV) {
info.stores_tess_level_inner = true;
} else {
info.uses_patches |= 1U << IR::GenericPatchIndex(patch);
}
break;
}
case IR::Opcode::GetPatch: {
const auto patch = inst.Arg(0).Patch();
info.uses_patches |= 1U << IR::GenericPatchIndex(patch);
break;
}
case IR::Opcode::LoadSharedU32:
case IR::Opcode::LoadSharedU64:
case IR::Opcode::WriteSharedU32:

View File

@ -0,0 +1,28 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/ir/patch.h"
namespace Shader::IR {
std::string NameOf(Patch patch) {
switch (patch) {
case Patch::TessellationLodLeft:
return "TessellationLodLeft";
case Patch::TessellationLodTop:
return "TessellationLodTop";
case Patch::TessellationLodRight:
return "TessellationLodRight";
case Patch::TessellationLodBottom:
return "TessellationLodBottom";
case Patch::TessellationLodInteriorU:
return "TessellationLodInteriorU";
case Patch::TessellationLodInteriorV:
return "TessellationLodInteriorV";
default:
const u32 index = u32(patch) - u32(Patch::Component0);
return fmt::format("Component{}", index);
}
}
} // namespace Shader::IR

View File

@ -0,0 +1,173 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <fmt/format.h>
#include "common/types.h"
namespace Shader::IR {
enum class Patch : u64 {
TessellationLodLeft,
TessellationLodTop,
TessellationLodRight,
TessellationLodBottom,
TessellationLodInteriorU,
TessellationLodInteriorV,
Component0,
Component1,
Component2,
Component3,
Component4,
Component5,
Component6,
Component7,
Component8,
Component9,
Component10,
Component11,
Component12,
Component13,
Component14,
Component15,
Component16,
Component17,
Component18,
Component19,
Component20,
Component21,
Component22,
Component23,
Component24,
Component25,
Component26,
Component27,
Component28,
Component29,
Component30,
Component31,
Component32,
Component33,
Component34,
Component35,
Component36,
Component37,
Component38,
Component39,
Component40,
Component41,
Component42,
Component43,
Component44,
Component45,
Component46,
Component47,
Component48,
Component49,
Component50,
Component51,
Component52,
Component53,
Component54,
Component55,
Component56,
Component57,
Component58,
Component59,
Component60,
Component61,
Component62,
Component63,
Component64,
Component65,
Component66,
Component67,
Component68,
Component69,
Component70,
Component71,
Component72,
Component73,
Component74,
Component75,
Component76,
Component77,
Component78,
Component79,
Component80,
Component81,
Component82,
Component83,
Component84,
Component85,
Component86,
Component87,
Component88,
Component89,
Component90,
Component91,
Component92,
Component93,
Component94,
Component95,
Component96,
Component97,
Component98,
Component99,
Component100,
Component101,
Component102,
Component103,
Component104,
Component105,
Component106,
Component107,
Component108,
Component109,
Component110,
Component111,
Component112,
Component113,
Component114,
Component115,
Component116,
Component117,
Component118,
Component119,
};
static_assert(static_cast<u64>(Patch::Component119) == 125);
constexpr bool IsGeneric(Patch patch) noexcept {
return patch >= Patch::Component0 && patch <= Patch::Component119;
}
constexpr Patch PatchFactor(u32 index) {
return static_cast<Patch>(index);
}
constexpr Patch PatchGeneric(u32 index) {
return static_cast<Patch>(static_cast<u32>(Patch::Component0) + index);
}
constexpr u32 GenericPatchIndex(Patch patch) {
return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) / 4;
}
constexpr u32 GenericPatchElement(Patch patch) {
return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) % 4;
}
[[nodiscard]] std::string NameOf(Patch patch);
} // namespace Shader::IR
template <>
struct fmt::formatter<Shader::IR::Patch> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(const Shader::IR::Patch patch, format_context& ctx) const {
return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(patch));
}
};

View File

@ -49,7 +49,8 @@ union BufferInstInfo {
BitField<0, 1, u32> index_enable;
BitField<1, 1, u32> offset_enable;
BitField<2, 12, u32> inst_offset;
BitField<14, 1, u32> ring_access; // global + system coherency
BitField<14, 1, u32> system_coherent;
BitField<15, 1, u32> globally_coherent;
};
enum class ScalarReg : u32 {

View File

@ -15,7 +15,7 @@ enum class Type {
ScalarReg = 1 << 1,
VectorReg = 1 << 2,
Attribute = 1 << 3,
SystemValue = 1 << 4,
Patch = 1 << 4,
U1 = 1 << 5,
U8 = 1 << 6,
U16 = 1 << 7,

View File

@ -16,6 +16,8 @@ Value::Value(IR::VectorReg reg) noexcept : type{Type::VectorReg}, vreg{reg} {}
Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {}
Value::Value(IR::Patch patch) noexcept : type{Type::Patch}, patch{patch} {}
Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {}
Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {}

View File

@ -18,6 +18,7 @@
#include "shader_recompiler/ir/opcodes.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/type.h"
#include "shader_recompiler/ir/patch.h"
namespace Shader::IR {
@ -34,6 +35,7 @@ public:
explicit Value(IR::ScalarReg reg) noexcept;
explicit Value(IR::VectorReg reg) noexcept;
explicit Value(IR::Attribute value) noexcept;
explicit Value(IR::Patch patch) noexcept;
explicit Value(bool value) noexcept;
explicit Value(u8 value) noexcept;
explicit Value(u16 value) noexcept;
@ -56,6 +58,7 @@ public:
[[nodiscard]] IR::ScalarReg ScalarReg() const;
[[nodiscard]] IR::VectorReg VectorReg() const;
[[nodiscard]] IR::Attribute Attribute() const;
[[nodiscard]] IR::Patch Patch() const;
[[nodiscard]] bool U1() const;
[[nodiscard]] u8 U8() const;
[[nodiscard]] u16 U16() const;
@ -75,6 +78,7 @@ private:
IR::ScalarReg sreg;
IR::VectorReg vreg;
IR::Attribute attribute;
IR::Patch patch;
bool imm_u1;
u8 imm_u8;
u16 imm_u16;
@ -330,6 +334,11 @@ inline IR::Attribute Value::Attribute() const {
return attribute;
}
inline IR::Patch Value::Patch() const {
DEBUG_ASSERT(type == Type::Patch);
return patch;
}
inline bool Value::U1() const {
if (IsIdentity()) {
return inst->Arg(0).U1();

View File

@ -60,9 +60,14 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
// Run optimization passes
const auto stage = program.info.stage;
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
if (stage == Stage::Hull) {
Shader::Optimization::HullShaderTransform(program);
}
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
if (program.info.stage != Stage::Compute) {
Shader::Optimization::RingAccessElimination(program, runtime_info, stage);
if (stage != Stage::Compute) {
Shader::Optimization::LowerSharedMemToRegisters(program);
}
Shader::Optimization::RingAccessElimination(program, runtime_info, program.info.stage);

View File

@ -23,6 +23,15 @@ enum class Stage : u32 {
};
constexpr u32 MaxStageTypes = 7;
enum class LogicalStage : u32 {
Fragment,
Vertex,
TessellationControl,
TessellationEval,
Geometry,
Compute,
};
[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept {
return static_cast<Stage>(index);
}
@ -64,12 +73,23 @@ struct VertexRuntimeInfo {
u32 num_outputs;
std::array<VsOutputMap, 3> outputs;
bool emulate_depth_negative_one_to_one{};
AmdGpu::TessellationType tess_type;
AmdGpu::TessellationTopology tess_topology;
AmdGpu::TessellationPartitioning tess_partitioning;
bool operator==(const VertexRuntimeInfo& other) const noexcept {
return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one;
return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one &&
tess_type == other.tess_type && tess_topology == other.tess_topology &&
tess_partitioning == other.tess_partitioning;
}
};
struct HullRuntimeInfo {
u32 output_control_points;
auto operator<=>(const HullRuntimeInfo&) const noexcept = default;
};
static constexpr auto GsMaxOutputStreams = 4u;
using GsOutputPrimTypes = std::array<AmdGpu::GsOutputPrimitiveType, GsMaxOutputStreams>;
struct GeometryRuntimeInfo {
@ -152,6 +172,7 @@ struct RuntimeInfo {
union {
ExportRuntimeInfo es_info;
VertexRuntimeInfo vs_info;
HullRuntimeInfo hs_info;
GeometryRuntimeInfo gs_info;
FragmentRuntimeInfo fs_info;
ComputeRuntimeInfo cs_info;
@ -174,6 +195,8 @@ struct RuntimeInfo {
return es_info == other.es_info;
case Stage::Geometry:
return gs_info == other.gs_info;
case Stage::Hull:
return hs_info == other.hs_info;
default:
return true;
}

View File

@ -956,6 +956,7 @@ struct Liverpool {
enum VgtStages : u32 {
Vs = 0u, // always enabled
EsGs = 0xB0u,
LsHs = 0x45u,
};
VgtStages raw;
@ -1059,6 +1060,20 @@ struct Liverpool {
};
};
union LsHsConfig {
u32 raw;
BitField<0, 8, u32> num_patches;
BitField<8, 6, u32> hs_input_control_points;
BitField<14, 6, u32> hs_output_control_points;
};
union TessellationConfig {
u32 raw;
BitField<0, 2, TessellationType> type;
BitField<2, 3, TessellationPartitioning> partitioning;
BitField<5, 3, TessellationTopology> topology;
};
union Eqaa {
u32 raw;
BitField<0, 1, u32> max_anchor_samples;
@ -1200,9 +1215,10 @@ struct Liverpool {
BitField<0, 11, u32> vgt_gs_max_vert_out;
INSERT_PADDING_WORDS(0xA2D5 - 0xA2CE - 1);
ShaderStageEnable stage_enable;
INSERT_PADDING_WORDS(1);
LsHsConfig ls_hs_config;
u32 vgt_gs_vert_itemsize[4];
INSERT_PADDING_WORDS(4);
TessellationConfig tess_config;
INSERT_PADDING_WORDS(3);
PolygonOffset poly_offset;
GsInstances vgt_gs_instance_cnt;
StreamOutConfig vgt_strmout_config;
@ -1445,6 +1461,7 @@ static_assert(GFX6_3D_REG_INDEX(vgt_gsvs_ring_itemsize) == 0xA2AC);
static_assert(GFX6_3D_REG_INDEX(vgt_gs_max_vert_out) == 0xA2CE);
static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5);
static_assert(GFX6_3D_REG_INDEX(vgt_gs_vert_itemsize[0]) == 0xA2D7);
static_assert(GFX6_3D_REG_INDEX(tess_config) == 0xA2DB);
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
static_assert(GFX6_3D_REG_INDEX(vgt_gs_instance_cnt) == 0xA2E4);
static_assert(GFX6_3D_REG_INDEX(vgt_strmout_config) == 0xA2E5);

View File

@ -21,6 +21,26 @@ enum class FpDenormMode : u32 {
InOutAllow = 3,
};
enum class TessellationType : u32 {
Isoline = 0,
Triangle = 1,
Quad = 2,
};
enum class TessellationPartitioning : u32 {
Integer = 0,
Pow2 = 1,
FracOdd = 2,
FracEven = 3,
};
enum class TessellationTopology : u32 {
Point = 0,
Line = 1,
TriangleCw = 2,
TriangleCcw = 3,
};
// See `VGT_PRIMITIVE_TYPE` description in [Radeon Sea Islands 3D/Compute Register Reference Guide]
enum class PrimitiveType : u32 {
None = 0,

View File

@ -52,7 +52,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings;
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes;
if (fetch_shader && !instance.IsVertexInputDynamicState()) {
const auto& vs_info = GetStage(Shader::Stage::Vertex);
const auto& vs_info = GetStage(Shader::LogicalStage::Vertex);
for (const auto& attrib : fetch_shader->attributes) {
if (attrib.UsesStepRates()) {
// Skip attribute binding as the data will be pulled by shader
@ -204,7 +204,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
boost::container::static_vector<vk::PipelineShaderStageCreateInfo, MaxShaderStages>
shader_stages;
auto stage = u32(Shader::Stage::Vertex);
auto stage = u32(LogicalStage::Vertex);
if (infos[stage]) {
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eVertex,
@ -212,7 +212,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
.pName = "main",
});
}
stage = u32(Shader::Stage::Geometry);
stage = u32(LogicalStage::Geometry);
if (infos[stage]) {
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eGeometry,
@ -220,7 +220,23 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
.pName = "main",
});
}
stage = u32(Shader::Stage::Fragment);
stage = u32(LogicalStage::TessellationControl);
if (infos[stage]) {
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eTessellationControl,
.module = modules[stage],
.pName = "main",
});
}
stage = u32(LogicalStage::TessellationEval);
if (infos[stage]) {
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eTessellationEvaluation,
.module = modules[stage],
.pName = "main",
});
}
stage = u32(LogicalStage::Fragment);
if (infos[stage]) {
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eFragment,

View File

@ -73,7 +73,7 @@ public:
bool IsEmbeddedVs() const noexcept {
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash;
return key.stage_hashes[u32(Shader::LogicalStage::Vertex)] == EmbeddedVsHash;
}
auto GetWriteMasks() const {

View File

@ -22,6 +22,8 @@ extern std::unique_ptr<Vulkan::Presenter> presenter;
namespace Vulkan {
using Shader::LogicalStage;
using Shader::Stage;
using Shader::VsOutput;
constexpr static std::array DescriptorHeapSizes = {
@ -78,7 +80,7 @@ void GatherVertexOutputs(Shader::VertexRuntimeInfo& info,
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
}
Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_stage) {
auto info = Shader::RuntimeInfo{stage};
const auto& regs = liverpool->regs;
const auto BuildCommon = [&](const auto& program) {
@ -89,20 +91,34 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
info.fp_round_mode32 = program.settings.fp_round_mode32;
};
switch (stage) {
case Shader::Stage::Export: {
case Stage::Local: {
BuildCommon(regs.ls_program);
break;
}
case Stage::Hull: {
BuildCommon(regs.hs_program);
info.hs_info.output_control_points = regs.ls_hs_config.hs_output_control_points.Value();
break;
}
case Stage::Export: {
BuildCommon(regs.es_program);
info.es_info.vertex_data_size = regs.vgt_esgs_ring_itemsize;
break;
}
case Shader::Stage::Vertex: {
case Stage::Vertex: {
BuildCommon(regs.vs_program);
GatherVertexOutputs(info.vs_info, regs.vs_output_control);
info.vs_info.emulate_depth_negative_one_to_one =
!instance.IsDepthClipControlSupported() &&
regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW;
if (l_stage == LogicalStage::TessellationEval) {
info.vs_info.tess_type = regs.tess_config.type;
info.vs_info.tess_topology = regs.tess_config.topology;
info.vs_info.tess_partitioning = regs.tess_config.partitioning;
}
break;
}
case Shader::Stage::Geometry: {
case Stage::Geometry: {
BuildCommon(regs.gs_program);
auto& gs_info = info.gs_info;
gs_info.output_vertices = regs.vgt_gs_max_vert_out;
@ -121,7 +137,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
DumpShader(gs_info.vs_copy, gs_info.vs_copy_hash, Shader::Stage::Vertex, 0, "copy.bin");
break;
}
case Shader::Stage::Fragment: {
case Stage::Fragment: {
BuildCommon(regs.ps_program);
info.fs_info.en_flags = regs.ps_input_ena;
info.fs_info.addr_flags = regs.ps_input_addr;
@ -143,7 +159,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
}
break;
}
case Shader::Stage::Compute: {
case Stage::Compute: {
const auto& cs_pgm = regs.cs_program;
info.num_user_data = cs_pgm.settings.num_user_regs;
info.num_allocated_vgprs = regs.cs_program.settings.num_vgprs * 4;
@ -304,7 +320,7 @@ bool PipelineCache::RefreshGraphicsKey() {
fetch_shader = std::nullopt;
Shader::Backend::Bindings binding{};
const auto& TryBindStageRemap = [&](Shader::Stage stage_in, Shader::Stage stage_out) -> bool {
const auto& TryBindStage = [&](Shader::Stage stage_in, Shader::LogicalStage stage_out) -> bool {
const auto stage_in_idx = static_cast<u32>(stage_in);
const auto stage_out_idx = static_cast<u32>(stage_out);
if (!regs.stage_enable.IsStageEnabled(stage_in_idx)) {
@ -331,23 +347,23 @@ bool PipelineCache::RefreshGraphicsKey() {
auto params = Liverpool::GetParams(*pgm);
std::optional<Shader::Gcn::FetchShaderData> fetch_shader_;
std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_,
key.stage_hashes[stage_out_idx]) = GetProgram(stage_in, params, binding);
key.stage_hashes[stage_out_idx]) =
GetProgram(stage_in, stage_out, params, binding);
if (fetch_shader_) {
fetch_shader = fetch_shader_;
}
return true;
};
const auto& TryBindStage = [&](Shader::Stage stage) { return TryBindStageRemap(stage, stage); };
const auto& IsGsFeaturesSupported = [&]() -> bool {
// These checks are temporary until all functionality is implemented.
return !regs.vgt_gs_mode.onchip && !regs.vgt_strmout_config.raw;
};
TryBindStage(Shader::Stage::Fragment);
infos.fill(nullptr);
TryBindStage(Stage::Fragment, LogicalStage::Fragment);
const auto* fs_info = infos[static_cast<u32>(Shader::Stage::Fragment)];
const auto* fs_info = infos[static_cast<u32>(LogicalStage::Fragment)];
key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u;
switch (regs.stage_enable.raw) {
@ -355,22 +371,36 @@ bool PipelineCache::RefreshGraphicsKey() {
if (!instance.IsGeometryStageSupported() || !IsGsFeaturesSupported()) {
return false;
}
if (!TryBindStageRemap(Shader::Stage::Export, Shader::Stage::Vertex)) {
if (!TryBindStage(Stage::Export, LogicalStage::Vertex)) {
return false;
}
if (!TryBindStage(Shader::Stage::Geometry)) {
if (!TryBindStage(Stage::Geometry, LogicalStage::Geometry)) {
return false;
}
break;
}
case Liverpool::ShaderStageEnable::VgtStages::LsHs: {
if (!instance.IsTessellationSupported()) {
break;
}
if (!TryBindStage(Stage::Local, LogicalStage::Vertex)) {
return false;
}
if (!TryBindStage(Stage::Hull, LogicalStage::TessellationControl)) {
return false;
}
if (!TryBindStage(Stage::Vertex, LogicalStage::TessellationEval)) {
return false;
}
break;
}
default: {
TryBindStage(Shader::Stage::Vertex);
infos[static_cast<u32>(Shader::Stage::Geometry)] = nullptr;
TryBindStage(Stage::Vertex, LogicalStage::Vertex);
break;
}
}
const auto vs_info = infos[static_cast<u32>(Shader::Stage::Vertex)];
const auto vs_info = infos[static_cast<u32>(Shader::LogicalStage::Vertex)];
if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) {
u32 vertex_binding = 0;
for (const auto& attrib : fetch_shader->attributes) {
@ -430,7 +460,7 @@ bool PipelineCache::RefreshComputeKey() {
const auto* cs_pgm = &liverpool->regs.cs_program;
const auto cs_params = Liverpool::GetParams(*cs_pgm);
std::tie(infos[0], modules[0], fetch_shader, compute_key.value) =
GetProgram(Shader::Stage::Compute, cs_params, binding);
GetProgram(Shader::Stage::Compute, LogicalStage::Compute, cs_params, binding);
return true;
}
@ -466,13 +496,13 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
return module;
}
std::tuple<const Shader::Info*, vk::ShaderModule, std::optional<Shader::Gcn::FetchShaderData>, u64>
PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
Shader::Backend::Bindings& binding) {
const auto runtime_info = BuildRuntimeInfo(stage);
PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stage,
Shader::ShaderParams params,
Shader::Backend::Bindings& binding) {
const auto runtime_info = BuildRuntimeInfo(stage, l_stage);
auto [it_pgm, new_program] = program_cache.try_emplace(params.hash);
if (new_program) {
it_pgm.value() = std::make_unique<Program>(stage, params);
it_pgm.value() = std::make_unique<Program>(stage, l_stage, params);
auto& program = it_pgm.value();
auto start = binding;
const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding);
@ -491,7 +521,7 @@ PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec);
if (it == program->modules.end()) {
auto new_info = Shader::Info(stage, params);
auto new_info = Shader::Info(stage, l_stage, params);
module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding);
program->AddPermut(module, std::move(spec));
} else {

View File

@ -34,11 +34,13 @@ struct Program {
vk::ShaderModule module;
Shader::StageSpecialization spec;
};
using ModuleList = boost::container::small_vector<Module, 8>;
Shader::Info info;
boost::container::small_vector<Module, 8> modules;
ModuleList modules;
explicit Program(Shader::Stage stage, Shader::ShaderParams params) : info{stage, params} {}
explicit Program(Shader::Stage stage, Shader::LogicalStage l_stage, Shader::ShaderParams params)
: info{stage, l_stage, params} {}
void AddPermut(vk::ShaderModule module, const Shader::StageSpecialization&& spec) {
modules.emplace_back(module, std::move(spec));
@ -55,10 +57,10 @@ public:
const ComputePipeline* GetComputePipeline();
std::tuple<const Shader::Info*, vk::ShaderModule, std::optional<Shader::Gcn::FetchShaderData>,
u64>
GetProgram(Shader::Stage stage, Shader::ShaderParams params,
Shader::Backend::Bindings& binding);
using Result = std::tuple<const Shader::Info*, vk::ShaderModule,
std::optional<Shader::Gcn::FetchShaderData>, u64>;
Result GetProgram(Shader::Stage stage, Shader::LogicalStage l_stage,
Shader::ShaderParams params, Shader::Backend::Bindings& binding);
std::optional<vk::ShaderModule> ReplaceShader(vk::ShaderModule module,
std::span<const u32> spv_code);
@ -74,7 +76,7 @@ private:
vk::ShaderModule CompileModule(Shader::Info& info, const Shader::RuntimeInfo& runtime_info,
std::span<const u32> code, size_t perm_idx,
Shader::Backend::Bindings& binding);
Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage);
Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage, Shader::LogicalStage l_stage);
private:
const Instance& instance;